| { |
| "best_global_step": 6000, |
| "best_metric": 2.9490778941081053, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/HNet_BPT128_12.8K-100B/checkpoint-6000", |
| "epoch": 1.702127659574468, |
| "eval_steps": 500, |
| "global_step": 6000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0028368794326241137, |
| "grad_norm": 1633.828125, |
| "loss": 88.481, |
| "loss_ce": 60.09074783325195, |
| "loss_region": 0.06510699540376663, |
| "loss_total": 60.155853271484375, |
| "lr": 2.20454076850486e-05, |
| "router/selected_tokens_s0": 919.375, |
| "router/selected_tokens_s1": 208.0625, |
| "step": 10, |
| "tokens_trained": 0.03276544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.005673758865248227, |
| "grad_norm": 466.1059875488281, |
| "loss": 44.1432, |
| "loss_ce": 25.237300872802734, |
| "loss_region": 0.36207184195518494, |
| "loss_total": 25.59937286376953, |
| "lr": 4.654030511288038e-05, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 20, |
| "tokens_trained": 0.06553088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.00851063829787234, |
| "grad_norm": 339.109375, |
| "loss": 14.1273, |
| "loss_ce": 7.768259525299072, |
| "loss_region": 0.3637296259403229, |
| "loss_total": 8.131989479064941, |
| "lr": 7.103520254071216e-05, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 30, |
| "tokens_trained": 0.09829632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.011347517730496455, |
| "grad_norm": 476.25433349609375, |
| "loss": 5.6328, |
| "loss_ce": 5.754528999328613, |
| "loss_region": 0.36484554409980774, |
| "loss_total": 6.119374752044678, |
| "lr": 9.553009996854394e-05, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 40, |
| "tokens_trained": 0.13106176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.014184397163120567, |
| "grad_norm": 555.9451904296875, |
| "loss": 4.3424, |
| "loss_ce": 5.022282123565674, |
| "loss_region": 0.36592456698417664, |
| "loss_total": 5.388206481933594, |
| "lr": 0.00012002499739637572, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 50, |
| "tokens_trained": 0.1638272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.01702127659574468, |
| "grad_norm": 365.4439392089844, |
| "loss": 5.3311, |
| "loss_ce": 4.102235317230225, |
| "loss_region": 0.3665260076522827, |
| "loss_total": 4.468761444091797, |
| "lr": 0.00014451989482420748, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 60, |
| "tokens_trained": 0.19659264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.019858156028368795, |
| "grad_norm": 675.146728515625, |
| "loss": 18.0367, |
| "loss_ce": 17.55718994140625, |
| "loss_region": 0.36727550625801086, |
| "loss_total": 17.92446517944336, |
| "lr": 0.00016901479225203927, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 70, |
| "tokens_trained": 0.22935808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.02269503546099291, |
| "grad_norm": 669.8828735351562, |
| "loss": 10.9644, |
| "loss_ce": 12.981254577636719, |
| "loss_region": 0.36880001425743103, |
| "loss_total": 13.350054740905762, |
| "lr": 0.00019350968967987104, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 80, |
| "tokens_trained": 0.26212272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.02553191489361702, |
| "grad_norm": 582.4469604492188, |
| "loss": 9.771, |
| "loss_ce": 9.486519813537598, |
| "loss_region": 0.3685120940208435, |
| "loss_total": 9.855031967163086, |
| "lr": 0.0002180045871077028, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 90, |
| "tokens_trained": 0.294888136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.028368794326241134, |
| "grad_norm": 399.1022033691406, |
| "loss": 12.1846, |
| "loss_ce": 15.043132781982422, |
| "loss_region": 0.3677713871002197, |
| "loss_total": 15.410903930664062, |
| "lr": 0.00024249948453553463, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 100, |
| "tokens_trained": 0.327653576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.031205673758865248, |
| "grad_norm": 572.5389404296875, |
| "loss": 8.1413, |
| "loss_ce": 7.673736572265625, |
| "loss_region": 0.3677681088447571, |
| "loss_total": 8.041504859924316, |
| "lr": 0.00026699438196336637, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 110, |
| "tokens_trained": 0.360419016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03404255319148936, |
| "grad_norm": 305.3782043457031, |
| "loss": 9.9432, |
| "loss_ce": 13.94490909576416, |
| "loss_region": 0.3664073050022125, |
| "loss_total": 14.31131649017334, |
| "lr": 0.00029148927939119814, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 120, |
| "tokens_trained": 0.393182856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03687943262411347, |
| "grad_norm": 385.6461181640625, |
| "loss": 13.2327, |
| "loss_ce": 11.810239791870117, |
| "loss_region": 0.36761465668678284, |
| "loss_total": 12.177854537963867, |
| "lr": 0.00031598417681902996, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 130, |
| "tokens_trained": 0.425948296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03971631205673759, |
| "grad_norm": 325.9845275878906, |
| "loss": 10.9926, |
| "loss_ce": 11.57892894744873, |
| "loss_region": 0.3681558072566986, |
| "loss_total": 11.947084426879883, |
| "lr": 0.00034047907424686173, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 140, |
| "tokens_trained": 0.458713736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0425531914893617, |
| "grad_norm": 375.1774597167969, |
| "loss": 10.6799, |
| "loss_ce": 7.312507152557373, |
| "loss_region": 0.36791399121284485, |
| "loss_total": 7.680421352386475, |
| "lr": 0.0003649739716746935, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 150, |
| "tokens_trained": 0.491474616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.04539007092198582, |
| "grad_norm": 355.0230407714844, |
| "loss": 8.5176, |
| "loss_ce": 21.199607849121094, |
| "loss_region": 0.06011432409286499, |
| "loss_total": 21.259721755981445, |
| "lr": 0.00038946886910252526, |
| "router/selected_tokens_s0": 160.875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 160, |
| "tokens_trained": 0.524240056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.04822695035460993, |
| "grad_norm": 273.1413269042969, |
| "loss": 13.482, |
| "loss_ce": 17.121381759643555, |
| "loss_region": 0.0692189484834671, |
| "loss_total": 17.190601348876953, |
| "lr": 0.0004139637665303571, |
| "router/selected_tokens_s0": 2280.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 170, |
| "tokens_trained": 0.557004696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.05106382978723404, |
| "grad_norm": 249.40753173828125, |
| "loss": 10.4567, |
| "loss_ce": 9.94619083404541, |
| "loss_region": 0.08772184699773788, |
| "loss_total": 10.033912658691406, |
| "lr": 0.0004384586639581888, |
| "router/selected_tokens_s0": 3865.125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 180, |
| "tokens_trained": 0.589768376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.05390070921985816, |
| "grad_norm": 222.1225128173828, |
| "loss": 9.2011, |
| "loss_ce": 6.419787406921387, |
| "loss_region": 0.11581941694021225, |
| "loss_total": 6.535606861114502, |
| "lr": 0.0004629535613860206, |
| "router/selected_tokens_s0": 5891.0625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 190, |
| "tokens_trained": 0.622533016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.05673758865248227, |
| "grad_norm": 206.31980895996094, |
| "loss": 3.1707, |
| "loss_ce": 3.6852493286132812, |
| "loss_region": 0.12171252071857452, |
| "loss_total": 3.80696177482605, |
| "lr": 0.00048744845881385244, |
| "router/selected_tokens_s0": 6350.1875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 200, |
| "tokens_trained": 0.655298456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.059574468085106386, |
| "grad_norm": 196.80050659179688, |
| "loss": 2.8133, |
| "loss_ce": 3.34086275100708, |
| "loss_region": 0.10963311791419983, |
| "loss_total": 3.450495958328247, |
| "lr": 0.0005119433562416841, |
| "router/selected_tokens_s0": 5418.75, |
| "router/selected_tokens_s1": 1.0, |
| "step": 210, |
| "tokens_trained": 0.688063096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.062411347517730496, |
| "grad_norm": 197.86050415039062, |
| "loss": 3.5837, |
| "loss_ce": 6.36979341506958, |
| "loss_region": 0.11656106263399124, |
| "loss_total": 6.486354351043701, |
| "lr": 0.0005364382536695159, |
| "router/selected_tokens_s0": 5989.125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 220, |
| "tokens_trained": 0.720828536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.06524822695035461, |
| "grad_norm": 207.3871307373047, |
| "loss": 5.6426, |
| "loss_ce": 7.008437633514404, |
| "loss_region": 0.12679478526115417, |
| "loss_total": 7.135232448577881, |
| "lr": 0.0005609331510973477, |
| "router/selected_tokens_s0": 6642.625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 230, |
| "tokens_trained": 0.753593976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.06808510638297872, |
| "grad_norm": 93.20205688476562, |
| "loss": 6.2345, |
| "loss_ce": 4.154336929321289, |
| "loss_region": 0.3657049834728241, |
| "loss_total": 4.5200419425964355, |
| "lr": 0.0005854280485251795, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 240, |
| "tokens_trained": 0.786359416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07092198581560284, |
| "grad_norm": 164.30181884765625, |
| "loss": 4.8424, |
| "loss_ce": 5.935614585876465, |
| "loss_region": 0.3678229749202728, |
| "loss_total": 6.30343770980835, |
| "lr": 0.0006099229459530113, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 250, |
| "tokens_trained": 0.819124856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07375886524822695, |
| "grad_norm": 87.75625610351562, |
| "loss": 6.6223, |
| "loss_ce": 2.8206207752227783, |
| "loss_region": 0.36699220538139343, |
| "loss_total": 3.187613010406494, |
| "lr": 0.0006344178433808431, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 260, |
| "tokens_trained": 0.851889496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07659574468085106, |
| "grad_norm": 91.14336395263672, |
| "loss": 3.8191, |
| "loss_ce": 4.011717796325684, |
| "loss_region": 0.3666604161262512, |
| "loss_total": 4.378378391265869, |
| "lr": 0.0006589127408086749, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 270, |
| "tokens_trained": 0.884654936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07943262411347518, |
| "grad_norm": 139.20018005371094, |
| "loss": 3.5321, |
| "loss_ce": 3.950164556503296, |
| "loss_region": 0.3653871715068817, |
| "loss_total": 4.3155517578125, |
| "lr": 0.0006834076382365066, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 280, |
| "tokens_trained": 0.91742036 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08226950354609928, |
| "grad_norm": 73.47037506103516, |
| "loss": 4.4399, |
| "loss_ce": 4.244175910949707, |
| "loss_region": 0.36372801661491394, |
| "loss_total": 4.607903957366943, |
| "lr": 0.0007079025356643384, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 290, |
| "tokens_trained": 0.9501858 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0851063829787234, |
| "grad_norm": 96.47210693359375, |
| "loss": 5.3423, |
| "loss_ce": 3.720698833465576, |
| "loss_region": 0.3621433675289154, |
| "loss_total": 4.0828423500061035, |
| "lr": 0.0007323974330921702, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 300, |
| "tokens_trained": 0.98295124 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08794326241134752, |
| "grad_norm": 51.55535888671875, |
| "loss": 4.7622, |
| "loss_ce": 3.124469757080078, |
| "loss_region": 0.36035263538360596, |
| "loss_total": 3.4848222732543945, |
| "lr": 0.000756892330520002, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 310, |
| "tokens_trained": 1.01571668 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09078014184397164, |
| "grad_norm": 100.0770034790039, |
| "loss": 4.7539, |
| "loss_ce": 2.9495983123779297, |
| "loss_region": 0.0525033101439476, |
| "loss_total": 3.0021016597747803, |
| "lr": 0.0007813872279478337, |
| "router/selected_tokens_s0": 182.5625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 320, |
| "tokens_trained": 1.04848196 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09361702127659574, |
| "grad_norm": 55.15224075317383, |
| "loss": 2.6889, |
| "loss_ce": 2.26753830909729, |
| "loss_region": 0.17705734074115753, |
| "loss_total": 2.4445955753326416, |
| "lr": 0.0008058821253756655, |
| "router/selected_tokens_s0": 10099.1875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 330, |
| "tokens_trained": 1.0812466 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09645390070921986, |
| "grad_norm": 45.57613754272461, |
| "loss": 1.9699, |
| "loss_ce": 1.9211128950119019, |
| "loss_region": 0.06836946308612823, |
| "loss_total": 1.9894824028015137, |
| "lr": 0.0008303770228034974, |
| "router/selected_tokens_s0": 26.25, |
| "router/selected_tokens_s1": 1.0, |
| "step": 340, |
| "tokens_trained": 1.11401204 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09929078014184398, |
| "grad_norm": 63.2139778137207, |
| "loss": 2.5665, |
| "loss_ce": 1.9517995119094849, |
| "loss_region": 0.12128585577011108, |
| "loss_total": 2.073085308074951, |
| "lr": 0.0008548719202313291, |
| "router/selected_tokens_s0": 6079.3125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 350, |
| "tokens_trained": 1.14677748 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10212765957446808, |
| "grad_norm": 19.581270217895508, |
| "loss": 2.433, |
| "loss_ce": 1.3946959972381592, |
| "loss_region": 0.10749296098947525, |
| "loss_total": 1.5021889209747314, |
| "lr": 0.0008793668176591608, |
| "router/selected_tokens_s0": 4936.9375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 360, |
| "tokens_trained": 1.17954212 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1049645390070922, |
| "grad_norm": 74.17733764648438, |
| "loss": 3.503, |
| "loss_ce": 3.7360174655914307, |
| "loss_region": 0.08607521653175354, |
| "loss_total": 3.8220927715301514, |
| "lr": 0.0009038617150869926, |
| "router/selected_tokens_s0": 3165.4375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 370, |
| "tokens_trained": 1.21230756 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10780141843971631, |
| "grad_norm": 67.1612548828125, |
| "loss": 2.989, |
| "loss_ce": 3.997297525405884, |
| "loss_region": 0.18156586587429047, |
| "loss_total": 4.178863525390625, |
| "lr": 0.0009283566125148244, |
| "router/selected_tokens_s0": 10333.6875, |
| "router/selected_tokens_s1": 1.0625, |
| "step": 380, |
| "tokens_trained": 1.2450722 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11063829787234042, |
| "grad_norm": 84.84760284423828, |
| "loss": 2.8334, |
| "loss_ce": 3.0769035816192627, |
| "loss_region": 0.050393715500831604, |
| "loss_total": 3.1272974014282227, |
| "lr": 0.0009528515099426562, |
| "router/selected_tokens_s0": 52.9375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 390, |
| "tokens_trained": 1.27783604 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11347517730496454, |
| "grad_norm": 63.6200065612793, |
| "loss": 3.1662, |
| "loss_ce": 2.779733180999756, |
| "loss_region": 0.10030720382928848, |
| "loss_total": 2.880040407180786, |
| "lr": 0.000977346407370488, |
| "router/selected_tokens_s0": 12.625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 400, |
| "tokens_trained": 1.31060148 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11631205673758865, |
| "grad_norm": 72.42300415039062, |
| "loss": 2.8148, |
| "loss_ce": 2.7900760173797607, |
| "loss_region": 0.19263741374015808, |
| "loss_total": 2.982713460922241, |
| "lr": 0.0010018413047983197, |
| "router/selected_tokens_s0": 10859.0625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 410, |
| "tokens_trained": 1.34336692 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11914893617021277, |
| "grad_norm": 40.36540603637695, |
| "loss": 2.6625, |
| "loss_ce": 1.7843694686889648, |
| "loss_region": 0.10816018283367157, |
| "loss_total": 1.8925296068191528, |
| "lr": 0.0010263362022261515, |
| "router/selected_tokens_s0": 5132.5625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 420, |
| "tokens_trained": 1.37613236 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.12198581560283688, |
| "grad_norm": 57.87046813964844, |
| "loss": 2.1447, |
| "loss_ce": 2.605336904525757, |
| "loss_region": 0.1274668127298355, |
| "loss_total": 2.7328038215637207, |
| "lr": 0.0010508310996539833, |
| "router/selected_tokens_s0": 6691.0625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 430, |
| "tokens_trained": 1.4088978 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.12482269503546099, |
| "grad_norm": 46.76475143432617, |
| "loss": 2.2982, |
| "loss_ce": 2.855947971343994, |
| "loss_region": 0.06147913262248039, |
| "loss_total": 2.9174270629882812, |
| "lr": 0.0010753259970818151, |
| "router/selected_tokens_s0": 1345.9375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 440, |
| "tokens_trained": 1.44166324 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1276595744680851, |
| "grad_norm": 26.56728172302246, |
| "loss": 2.1202, |
| "loss_ce": 2.314136505126953, |
| "loss_region": 0.1346735656261444, |
| "loss_total": 2.44881010055542, |
| "lr": 0.001099820894509647, |
| "router/selected_tokens_s0": 7680.125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 450, |
| "tokens_trained": 1.47442868 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13049645390070921, |
| "grad_norm": 31.932329177856445, |
| "loss": 2.6495, |
| "loss_ce": 1.6046358346939087, |
| "loss_region": 0.07332747429609299, |
| "loss_total": 1.6779632568359375, |
| "lr": 0.0011243157919374788, |
| "router/selected_tokens_s0": 2986.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 460, |
| "tokens_trained": 1.50719412 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13333333333333333, |
| "grad_norm": 40.14197540283203, |
| "loss": 2.5686, |
| "loss_ce": 2.384312629699707, |
| "loss_region": 0.09212882071733475, |
| "loss_total": 2.4764413833618164, |
| "lr": 0.0011488106893653104, |
| "router/selected_tokens_s0": 4881.3125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 470, |
| "tokens_trained": 1.53995956 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13617021276595745, |
| "grad_norm": 40.96647644042969, |
| "loss": 2.1554, |
| "loss_ce": 2.062847852706909, |
| "loss_region": 0.08483830839395523, |
| "loss_total": 2.147686243057251, |
| "lr": 0.0011733055867931422, |
| "router/selected_tokens_s0": 4314.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 480, |
| "tokens_trained": 1.5727242 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13900709219858157, |
| "grad_norm": 58.40409469604492, |
| "loss": 2.0697, |
| "loss_ce": 2.608842134475708, |
| "loss_region": 0.12910506129264832, |
| "loss_total": 2.7379472255706787, |
| "lr": 0.001197800484220974, |
| "router/selected_tokens_s0": 7422.8125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 490, |
| "tokens_trained": 1.60548964 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.14184397163120568, |
| "grad_norm": 25.093971252441406, |
| "loss": 2.0358, |
| "loss_ce": 1.524597406387329, |
| "loss_region": 0.08822071552276611, |
| "loss_total": 1.6128181219100952, |
| "lr": 0.0012222953816488059, |
| "router/selected_tokens_s0": 4142.625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 500, |
| "tokens_trained": 1.638252944 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "eval_ppl": 5.238461214674061, |
| "eval_runtime": 2.1242, |
| "step": 500, |
| "tokens_trained": 1.638252944 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "eval_F": 7.813110399249941e-05, |
| "eval_F_cds": 2.695175635612254e-05, |
| "eval_F_dig": 7.813110399249941e-05, |
| "eval_F_exon": 8.154278957883149e-05, |
| "eval_F_intron": 9.008829419540036e-05, |
| "eval_F_nig": 8.194492915137831e-05, |
| "eval_F_promoter": 3.947950224243573e-05, |
| "eval_F_utr": 7.461016190405133e-05, |
| "eval_G": 0.09749281352746117, |
| "eval_G_cds": 0.09038638220397988, |
| "eval_G_dig": 0.11551821065122275, |
| "eval_G_exon": 0.09913527481618896, |
| "eval_G_intron": 0.09883524461080323, |
| "eval_G_nig": 0.10110245416326033, |
| "eval_G_promoter": 0.08840385852915167, |
| "eval_G_utr": 0.09609994947160018, |
| "eval_avg_bp_per_token": 12799.0, |
| "eval_bp_per_token/cds": 37103.333333333336, |
| "eval_bp_per_token/dig": 12799.0, |
| "eval_bp_per_token/exon": 12263.5, |
| "eval_bp_per_token/intron": 11100.221276595745, |
| "eval_bp_per_token/nig": 12203.317647058824, |
| "eval_bp_per_token/promoter": 25329.6, |
| "eval_bp_per_token/utr": 13403.0, |
| "eval_ppl_cds": 5.197959664020222, |
| "eval_ppl_dig": 5.7282999910686785, |
| "eval_ppl_exon": 5.2613736013319095, |
| "eval_ppl_intron": 5.244331873829972, |
| "eval_ppl_nig": 5.31566409531509, |
| "eval_ppl_promoter": 5.143870486889258, |
| "eval_ppl_utr": 5.205834202019033, |
| "step": 500, |
| "tokens_trained": 1.638252944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.14468085106382977, |
| "grad_norm": 36.171993255615234, |
| "loss": 1.7069, |
| "loss_ce": 1.6758953332901, |
| "loss_region": 0.07858321815729141, |
| "loss_total": 1.7544785737991333, |
| "lr": 0.0012243786686061229, |
| "router/selected_tokens_s0": 3400.3125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 510, |
| "tokens_trained": 1.671014152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1475177304964539, |
| "grad_norm": 33.956119537353516, |
| "loss": 1.6551, |
| "loss_ce": 1.5848710536956787, |
| "loss_region": 0.06811508536338806, |
| "loss_total": 1.6529861688613892, |
| "lr": 0.0012239717766222718, |
| "router/selected_tokens_s0": 2687.4375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 520, |
| "tokens_trained": 1.703779592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.150354609929078, |
| "grad_norm": 28.901918411254883, |
| "loss": 1.6154, |
| "loss_ce": 1.5558669567108154, |
| "loss_region": 0.07830837368965149, |
| "loss_total": 1.6341753005981445, |
| "lr": 0.001223564884638421, |
| "router/selected_tokens_s0": 3778.375, |
| "router/selected_tokens_s1": 1.5, |
| "step": 530, |
| "tokens_trained": 1.736545032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15319148936170213, |
| "grad_norm": 16.806047439575195, |
| "loss": 1.6273, |
| "loss_ce": 1.399499535560608, |
| "loss_region": 0.08511029928922653, |
| "loss_total": 1.484609842300415, |
| "lr": 0.00122315799265457, |
| "router/selected_tokens_s0": 4515.25, |
| "router/selected_tokens_s1": 1.1875, |
| "step": 540, |
| "tokens_trained": 1.769310472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15602836879432624, |
| "grad_norm": 22.434467315673828, |
| "loss": 1.5519, |
| "loss_ce": 1.454548716545105, |
| "loss_region": 0.06522653251886368, |
| "loss_total": 1.5197752714157104, |
| "lr": 0.001222751100670719, |
| "router/selected_tokens_s0": 2753.5, |
| "router/selected_tokens_s1": 1.25, |
| "step": 550, |
| "tokens_trained": 1.802075912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15886524822695036, |
| "grad_norm": 20.20318603515625, |
| "loss": 1.5147, |
| "loss_ce": 1.388825535774231, |
| "loss_region": 0.05828768387436867, |
| "loss_total": 1.447113275527954, |
| "lr": 0.001222344208686868, |
| "router/selected_tokens_s0": 2056.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 560, |
| "tokens_trained": 1.834841352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16170212765957448, |
| "grad_norm": 28.31165885925293, |
| "loss": 1.5272, |
| "loss_ce": 1.5384010076522827, |
| "loss_region": 0.05947807803750038, |
| "loss_total": 1.5978790521621704, |
| "lr": 0.0012219373167030169, |
| "router/selected_tokens_s0": 2200.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 570, |
| "tokens_trained": 1.867606792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16453900709219857, |
| "grad_norm": 17.549209594726562, |
| "loss": 1.494, |
| "loss_ce": 1.359383225440979, |
| "loss_region": 0.21121427416801453, |
| "loss_total": 1.570597529411316, |
| "lr": 0.0012215304247191658, |
| "router/selected_tokens_s0": 1947.6875, |
| "router/selected_tokens_s1": 1947.5625, |
| "step": 580, |
| "tokens_trained": 1.900370632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1673758865248227, |
| "grad_norm": 16.347366333007812, |
| "loss": 1.4343, |
| "loss_ce": 1.331540584564209, |
| "loss_region": 0.05151217430830002, |
| "loss_total": 1.3830527067184448, |
| "lr": 0.0012211235327353148, |
| "router/selected_tokens_s0": 1371.4375, |
| "router/selected_tokens_s1": 1.375, |
| "step": 590, |
| "tokens_trained": 1.933136072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1702127659574468, |
| "grad_norm": 20.583803176879883, |
| "loss": 1.3997, |
| "loss_ce": 1.466564416885376, |
| "loss_region": 0.054751571267843246, |
| "loss_total": 1.5213159322738647, |
| "lr": 0.0012207166407514638, |
| "router/selected_tokens_s0": 1821.1875, |
| "router/selected_tokens_s1": 2.0625, |
| "step": 600, |
| "tokens_trained": 1.965901512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17304964539007092, |
| "grad_norm": 28.91676139831543, |
| "loss": 1.8869, |
| "loss_ce": 1.7341006994247437, |
| "loss_region": 0.06411188840866089, |
| "loss_total": 1.7982125282287598, |
| "lr": 0.0012203097487676127, |
| "router/selected_tokens_s0": 2751.625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 610, |
| "tokens_trained": 1.998666952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17588652482269504, |
| "grad_norm": 16.068174362182617, |
| "loss": 1.7234, |
| "loss_ce": 1.4155008792877197, |
| "loss_region": 0.10453462600708008, |
| "loss_total": 1.5200355052947998, |
| "lr": 0.0012199028567837617, |
| "router/selected_tokens_s0": 6356.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 620, |
| "tokens_trained": 2.031432392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17872340425531916, |
| "grad_norm": 12.171530723571777, |
| "loss": 1.4577, |
| "loss_ce": 1.3392314910888672, |
| "loss_region": 0.044366203248500824, |
| "loss_total": 1.383597731590271, |
| "lr": 0.0012194959647999107, |
| "router/selected_tokens_s0": 746.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 630, |
| "tokens_trained": 2.064197832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18156028368794327, |
| "grad_norm": 14.937308311462402, |
| "loss": 1.5005, |
| "loss_ce": 1.3874304294586182, |
| "loss_region": 0.056757938116788864, |
| "loss_total": 1.4441883563995361, |
| "lr": 0.0012190890728160596, |
| "router/selected_tokens_s0": 1590.25, |
| "router/selected_tokens_s1": 62.5, |
| "step": 640, |
| "tokens_trained": 2.096963272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18439716312056736, |
| "grad_norm": 15.287280082702637, |
| "loss": 1.468, |
| "loss_ce": 1.3241463899612427, |
| "loss_region": 0.052737943828105927, |
| "loss_total": 1.3768843412399292, |
| "lr": 0.0012186821808322086, |
| "router/selected_tokens_s0": 1661.375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 650, |
| "tokens_trained": 2.129728712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18723404255319148, |
| "grad_norm": 6.567054748535156, |
| "loss": 1.4058, |
| "loss_ce": 1.2686595916748047, |
| "loss_region": 0.04463791474699974, |
| "loss_total": 1.3132975101470947, |
| "lr": 0.0012182752888483576, |
| "router/selected_tokens_s0": 795.3125, |
| "router/selected_tokens_s1": 1.3125, |
| "step": 660, |
| "tokens_trained": 2.162494152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1900709219858156, |
| "grad_norm": 35.38864517211914, |
| "loss": 1.483, |
| "loss_ce": 1.6116914749145508, |
| "loss_region": 0.0765811875462532, |
| "loss_total": 1.6882727146148682, |
| "lr": 0.0012178683968645065, |
| "router/selected_tokens_s0": 4140.0, |
| "router/selected_tokens_s1": 7.625, |
| "step": 670, |
| "tokens_trained": 2.195259592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19290780141843972, |
| "grad_norm": 19.182458877563477, |
| "loss": 1.6566, |
| "loss_ce": 1.384118676185608, |
| "loss_region": 0.050435055047273636, |
| "loss_total": 1.4345537424087524, |
| "lr": 0.0012174615048806555, |
| "router/selected_tokens_s0": 1329.1875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 680, |
| "tokens_trained": 2.228025032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19574468085106383, |
| "grad_norm": 19.877857208251953, |
| "loss": 1.4028, |
| "loss_ce": 1.4199568033218384, |
| "loss_region": 0.04767146706581116, |
| "loss_total": 1.4676282405853271, |
| "lr": 0.0012170546128968045, |
| "router/selected_tokens_s0": 1076.875, |
| "router/selected_tokens_s1": 1.625, |
| "step": 690, |
| "tokens_trained": 2.260790472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19858156028368795, |
| "grad_norm": 16.101842880249023, |
| "loss": 1.3997, |
| "loss_ce": 1.364323616027832, |
| "loss_region": 0.06121417507529259, |
| "loss_total": 1.4255378246307373, |
| "lr": 0.0012166477209129534, |
| "router/selected_tokens_s0": 2551.3125, |
| "router/selected_tokens_s1": 1.5625, |
| "step": 700, |
| "tokens_trained": 2.293555912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20141843971631207, |
| "grad_norm": 22.22663688659668, |
| "loss": 1.3765, |
| "loss_ce": 1.3531076908111572, |
| "loss_region": 0.047868210822343826, |
| "loss_total": 1.4009759426116943, |
| "lr": 0.0012162408289291026, |
| "router/selected_tokens_s0": 1133.3125, |
| "router/selected_tokens_s1": 1.125, |
| "step": 710, |
| "tokens_trained": 2.326321352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20425531914893616, |
| "grad_norm": 31.300945281982422, |
| "loss": 1.4667, |
| "loss_ce": 1.442090630531311, |
| "loss_region": 0.07350843399763107, |
| "loss_total": 1.515599012374878, |
| "lr": 0.0012158339369452516, |
| "router/selected_tokens_s0": 3763.0625, |
| "router/selected_tokens_s1": 2.4375, |
| "step": 720, |
| "tokens_trained": 2.359086792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20709219858156028, |
| "grad_norm": 17.472919464111328, |
| "loss": 1.452, |
| "loss_ce": 1.3612605333328247, |
| "loss_region": 0.044522251933813095, |
| "loss_total": 1.4057828187942505, |
| "lr": 0.0012154270449614005, |
| "router/selected_tokens_s0": 111.875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 730, |
| "tokens_trained": 2.391852232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2099290780141844, |
| "grad_norm": 23.76650047302246, |
| "loss": 1.4316, |
| "loss_ce": 1.5030287504196167, |
| "loss_region": 0.057180847972631454, |
| "loss_total": 1.5602096319198608, |
| "lr": 0.0012150201529775495, |
| "router/selected_tokens_s0": 2120.125, |
| "router/selected_tokens_s1": 1.8125, |
| "step": 740, |
| "tokens_trained": 2.424616064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2127659574468085, |
| "grad_norm": 8.664741516113281, |
| "loss": 1.3639, |
| "loss_ce": 1.2497303485870361, |
| "loss_region": 0.043838031589984894, |
| "loss_total": 1.2935683727264404, |
| "lr": 0.0012146132609936982, |
| "router/selected_tokens_s0": 625.625, |
| "router/selected_tokens_s1": 5.75, |
| "step": 750, |
| "tokens_trained": 2.457381496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.21560283687943263, |
| "grad_norm": 11.039902687072754, |
| "loss": 1.3213, |
| "loss_ce": 1.2822058200836182, |
| "loss_region": 0.06590178608894348, |
| "loss_total": 1.3481075763702393, |
| "lr": 0.0012142063690098472, |
| "router/selected_tokens_s0": 2828.875, |
| "router/selected_tokens_s1": 13.3125, |
| "step": 760, |
| "tokens_trained": 2.490146936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.21843971631205675, |
| "grad_norm": 12.05501937866211, |
| "loss": 1.3965, |
| "loss_ce": 1.312122106552124, |
| "loss_region": 0.17912708222866058, |
| "loss_total": 1.4912492036819458, |
| "lr": 0.0012137994770259962, |
| "router/selected_tokens_s0": 321.5, |
| "router/selected_tokens_s1": 284.0625, |
| "step": 770, |
| "tokens_trained": 2.522912376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22127659574468084, |
| "grad_norm": 19.33881378173828, |
| "loss": 1.3326, |
| "loss_ce": 1.362036943435669, |
| "loss_region": 0.06399328261613846, |
| "loss_total": 1.4260302782058716, |
| "lr": 0.0012133925850421454, |
| "router/selected_tokens_s0": 228.875, |
| "router/selected_tokens_s1": 2.5, |
| "step": 780, |
| "tokens_trained": 2.555677816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22411347517730495, |
| "grad_norm": 16.22661781311035, |
| "loss": 1.3875, |
| "loss_ce": 1.3392794132232666, |
| "loss_region": 0.047626323997974396, |
| "loss_total": 1.3869057893753052, |
| "lr": 0.0012129856930582943, |
| "router/selected_tokens_s0": 192.6875, |
| "router/selected_tokens_s1": 1.8125, |
| "step": 790, |
| "tokens_trained": 2.588443256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22695035460992907, |
| "grad_norm": 18.713428497314453, |
| "loss": 1.369, |
| "loss_ce": 1.3001604080200195, |
| "loss_region": 0.0622205026447773, |
| "loss_total": 1.362380862236023, |
| "lr": 0.0012125788010744433, |
| "router/selected_tokens_s0": 2593.3125, |
| "router/selected_tokens_s1": 1.1875, |
| "step": 800, |
| "tokens_trained": 2.621208696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2297872340425532, |
| "grad_norm": 21.459232330322266, |
| "loss": 1.4114, |
| "loss_ce": 1.3201086521148682, |
| "loss_region": 0.05014314129948616, |
| "loss_total": 1.3702517747879028, |
| "lr": 0.0012121719090905923, |
| "router/selected_tokens_s0": 1457.6875, |
| "router/selected_tokens_s1": 1.8125, |
| "step": 810, |
| "tokens_trained": 2.653974128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2326241134751773, |
| "grad_norm": 17.591365814208984, |
| "loss": 1.4042, |
| "loss_ce": 1.3406790494918823, |
| "loss_region": 0.07056135684251785, |
| "loss_total": 1.4112404584884644, |
| "lr": 0.0012117650171067412, |
| "router/selected_tokens_s0": 3305.5625, |
| "router/selected_tokens_s1": 1.875, |
| "step": 820, |
| "tokens_trained": 2.686739568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23546099290780143, |
| "grad_norm": 12.04948616027832, |
| "loss": 1.3367, |
| "loss_ce": 1.2736438512802124, |
| "loss_region": 0.15874218940734863, |
| "loss_total": 1.432386040687561, |
| "lr": 0.0012113581251228902, |
| "router/selected_tokens_s0": 676.0, |
| "router/selected_tokens_s1": 474.0625, |
| "step": 830, |
| "tokens_trained": 2.719505008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23829787234042554, |
| "grad_norm": 22.205650329589844, |
| "loss": 1.3664, |
| "loss_ce": 1.3812791109085083, |
| "loss_region": 0.045553792268037796, |
| "loss_total": 1.426832914352417, |
| "lr": 0.0012109512331390391, |
| "router/selected_tokens_s0": 1017.8125, |
| "router/selected_tokens_s1": 2.25, |
| "step": 840, |
| "tokens_trained": 2.752270448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24113475177304963, |
| "grad_norm": 13.030964851379395, |
| "loss": 1.3782, |
| "loss_ce": 1.2519162893295288, |
| "loss_region": 0.1624305546283722, |
| "loss_total": 1.4143468141555786, |
| "lr": 0.0012105443411551881, |
| "router/selected_tokens_s0": 917.4375, |
| "router/selected_tokens_s1": 681.625, |
| "step": 850, |
| "tokens_trained": 2.785035888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24397163120567375, |
| "grad_norm": 7.984014511108398, |
| "loss": 1.3127, |
| "loss_ce": 1.2660592794418335, |
| "loss_region": 0.04773706942796707, |
| "loss_total": 1.3137964010238647, |
| "lr": 0.001210137449171337, |
| "router/selected_tokens_s0": 1192.9375, |
| "router/selected_tokens_s1": 1.8125, |
| "step": 860, |
| "tokens_trained": 2.817797816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24680851063829787, |
| "grad_norm": 4.871339797973633, |
| "loss": 1.2833, |
| "loss_ce": 1.2215490341186523, |
| "loss_region": 0.05293113738298416, |
| "loss_total": 1.2744802236557007, |
| "lr": 0.001209730557187486, |
| "router/selected_tokens_s0": 1671.6875, |
| "router/selected_tokens_s1": 3.5625, |
| "step": 870, |
| "tokens_trained": 2.850563256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24964539007092199, |
| "grad_norm": 3.742021083831787, |
| "loss": 1.2882, |
| "loss_ce": 1.2314670085906982, |
| "loss_region": 0.04821491986513138, |
| "loss_total": 1.279681921005249, |
| "lr": 0.001209323665203635, |
| "router/selected_tokens_s0": 1251.0625, |
| "router/selected_tokens_s1": 3.0, |
| "step": 880, |
| "tokens_trained": 2.883328696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2524822695035461, |
| "grad_norm": 9.155281066894531, |
| "loss": 1.2934, |
| "loss_ce": 1.2408264875411987, |
| "loss_region": 0.04964429885149002, |
| "loss_total": 1.290470838546753, |
| "lr": 0.001208916773219784, |
| "router/selected_tokens_s0": 1380.125, |
| "router/selected_tokens_s1": 3.5625, |
| "step": 890, |
| "tokens_trained": 2.916094136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2553191489361702, |
| "grad_norm": 9.283857345581055, |
| "loss": 1.2886, |
| "loss_ce": 1.2425544261932373, |
| "loss_region": 0.043535713106393814, |
| "loss_total": 1.2860901355743408, |
| "lr": 0.001208509881235933, |
| "router/selected_tokens_s0": 811.5625, |
| "router/selected_tokens_s1": 1.0625, |
| "step": 900, |
| "tokens_trained": 2.948859576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2581560283687943, |
| "grad_norm": 13.81477165222168, |
| "loss": 1.3118, |
| "loss_ce": 1.2899353504180908, |
| "loss_region": 0.18143458664417267, |
| "loss_total": 1.471369981765747, |
| "lr": 0.001208102989252082, |
| "router/selected_tokens_s0": 833.1875, |
| "router/selected_tokens_s1": 739.5, |
| "step": 910, |
| "tokens_trained": 2.981619384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26099290780141843, |
| "grad_norm": 10.127344131469727, |
| "loss": 1.3015, |
| "loss_ce": 1.2317105531692505, |
| "loss_region": 0.04436146095395088, |
| "loss_total": 1.2760720252990723, |
| "lr": 0.0012076960972682309, |
| "router/selected_tokens_s0": 756.25, |
| "router/selected_tokens_s1": 7.0625, |
| "step": 920, |
| "tokens_trained": 3.014384824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26382978723404255, |
| "grad_norm": 7.185147285461426, |
| "loss": 1.2804, |
| "loss_ce": 1.2676362991333008, |
| "loss_region": 0.04398803040385246, |
| "loss_total": 1.31162428855896, |
| "lr": 0.0012072892052843798, |
| "router/selected_tokens_s0": 752.4375, |
| "router/selected_tokens_s1": 4.0, |
| "step": 930, |
| "tokens_trained": 3.047149464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26666666666666666, |
| "grad_norm": 8.473845481872559, |
| "loss": 1.298, |
| "loss_ce": 1.2457917928695679, |
| "loss_region": 0.053872235119342804, |
| "loss_total": 1.29966402053833, |
| "lr": 0.0012068823133005288, |
| "router/selected_tokens_s0": 1438.5, |
| "router/selected_tokens_s1": 41.3125, |
| "step": 940, |
| "tokens_trained": 3.079914904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2695035460992908, |
| "grad_norm": 9.972776412963867, |
| "loss": 1.3166, |
| "loss_ce": 1.1854572296142578, |
| "loss_region": 0.04348060116171837, |
| "loss_total": 1.2289378643035889, |
| "lr": 0.0012064754213166778, |
| "router/selected_tokens_s0": 766.75, |
| "router/selected_tokens_s1": 2.1875, |
| "step": 950, |
| "tokens_trained": 3.112675728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2723404255319149, |
| "grad_norm": 7.5483622550964355, |
| "loss": 1.2805, |
| "loss_ce": 1.2480460405349731, |
| "loss_region": 0.06218457594513893, |
| "loss_total": 1.3102306127548218, |
| "lr": 0.001206068529332827, |
| "router/selected_tokens_s0": 1183.125, |
| "router/selected_tokens_s1": 123.9375, |
| "step": 960, |
| "tokens_trained": 3.145441168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.275177304964539, |
| "grad_norm": 2.173365354537964, |
| "loss": 1.2747, |
| "loss_ce": 1.206543207168579, |
| "loss_region": 0.13238129019737244, |
| "loss_total": 1.338924527168274, |
| "lr": 0.001205661637348976, |
| "router/selected_tokens_s0": 1298.875, |
| "router/selected_tokens_s1": 719.6875, |
| "step": 970, |
| "tokens_trained": 3.178206608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.27801418439716313, |
| "grad_norm": 5.311792373657227, |
| "loss": 1.3034, |
| "loss_ce": 1.224631428718567, |
| "loss_region": 0.05017566680908203, |
| "loss_total": 1.274807095527649, |
| "lr": 0.0012052547453651249, |
| "router/selected_tokens_s0": 1392.0625, |
| "router/selected_tokens_s1": 13.5625, |
| "step": 980, |
| "tokens_trained": 3.210972048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.28085106382978725, |
| "grad_norm": 9.337197303771973, |
| "loss": 1.3066, |
| "loss_ce": 1.247117042541504, |
| "loss_region": 0.16016621887683868, |
| "loss_total": 1.4072833061218262, |
| "lr": 0.0012048478533812738, |
| "router/selected_tokens_s0": 650.6875, |
| "router/selected_tokens_s1": 484.5625, |
| "step": 990, |
| "tokens_trained": 3.243737488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.28368794326241137, |
| "grad_norm": 6.30027961730957, |
| "loss": 1.3249, |
| "loss_ce": 1.234486699104309, |
| "loss_region": 0.04393050819635391, |
| "loss_total": 1.2784172296524048, |
| "lr": 0.0012044409613974226, |
| "router/selected_tokens_s0": 834.9375, |
| "router/selected_tokens_s1": 1.5625, |
| "step": 1000, |
| "tokens_trained": 3.276502928 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "eval_ppl": 3.482606606572146, |
| "eval_runtime": 2.0593, |
| "step": 1000, |
| "tokens_trained": 3.276502928 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "eval_F": 0.0001322708635525194, |
| "eval_F_cds": 2.695175635612254e-05, |
| "eval_F_dig": 0.0008203765919212438, |
| "eval_F_exon": 0.00012231418436824723, |
| "eval_F_intron": 0.00014299120738248654, |
| "eval_F_nig": 0.00015232116242256205, |
| "eval_F_promoter": 6.843113722022193e-05, |
| "eval_F_utr": 7.461016190405133e-05, |
| "eval_G": 0.027253320926880115, |
| "eval_G_cds": 0.01253249651873147, |
| "eval_G_dig": 0.06213467604891007, |
| "eval_G_exon": 0.024896961978975552, |
| "eval_G_intron": 0.028342175683099283, |
| "eval_G_nig": 0.02919129746961289, |
| "eval_G_promoter": 0.022597925718165046, |
| "eval_G_utr": 0.023054706568891827, |
| "eval_avg_bp_per_token": 7560.243980738363, |
| "eval_bp_per_token/cds": 37103.333333333336, |
| "eval_bp_per_token/dig": 1218.952380952381, |
| "eval_bp_per_token/exon": 8175.666666666667, |
| "eval_bp_per_token/intron": 6993.436997319035, |
| "eval_bp_per_token/nig": 6565.0759493670885, |
| "eval_bp_per_token/promoter": 14613.23076923077, |
| "eval_bp_per_token/utr": 13403.0, |
| "eval_ppl_cds": 3.943436136880756, |
| "eval_ppl_dig": 3.176018188461278, |
| "eval_ppl_exon": 3.6071938696456316, |
| "eval_ppl_intron": 3.4512226027500135, |
| "eval_ppl_nig": 3.448745566665228, |
| "eval_ppl_promoter": 3.613322540752091, |
| "eval_ppl_utr": 3.6552658587709104, |
| "step": 1000, |
| "tokens_trained": 3.276502928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2865248226950355, |
| "grad_norm": 11.638834953308105, |
| "loss": 1.2968, |
| "loss_ce": 1.2562685012817383, |
| "loss_region": 0.060554198920726776, |
| "loss_total": 1.3168226480484009, |
| "lr": 0.0012040340694135716, |
| "router/selected_tokens_s0": 1682.25, |
| "router/selected_tokens_s1": 97.0625, |
| "step": 1010, |
| "tokens_trained": 3.309267568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.28936170212765955, |
| "grad_norm": 5.937789440155029, |
| "loss": 1.2905, |
| "loss_ce": 1.187785029411316, |
| "loss_region": 0.04547436535358429, |
| "loss_total": 1.2332594394683838, |
| "lr": 0.0012036271774297205, |
| "router/selected_tokens_s0": 1010.0625, |
| "router/selected_tokens_s1": 1.4375, |
| "step": 1020, |
| "tokens_trained": 3.342033008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.29219858156028367, |
| "grad_norm": 3.617494583129883, |
| "loss": 1.2825, |
| "loss_ce": 1.1796470880508423, |
| "loss_region": 0.0736464411020279, |
| "loss_total": 1.253293514251709, |
| "lr": 0.0012032202854458697, |
| "router/selected_tokens_s0": 1146.0, |
| "router/selected_tokens_s1": 199.1875, |
| "step": 1030, |
| "tokens_trained": 3.374798448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2950354609929078, |
| "grad_norm": 2.3506853580474854, |
| "loss": 1.2695, |
| "loss_ce": 1.18293035030365, |
| "loss_region": 0.04785334691405296, |
| "loss_total": 1.2307837009429932, |
| "lr": 0.0012028133934620187, |
| "router/selected_tokens_s0": 1239.75, |
| "router/selected_tokens_s1": 3.0625, |
| "step": 1040, |
| "tokens_trained": 3.407563888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2978723404255319, |
| "grad_norm": 6.109251499176025, |
| "loss": 1.2707, |
| "loss_ce": 1.2379461526870728, |
| "loss_region": 0.045713216066360474, |
| "loss_total": 1.2836593389511108, |
| "lr": 0.0012024065014781676, |
| "router/selected_tokens_s0": 1013.0, |
| "router/selected_tokens_s1": 2.5625, |
| "step": 1050, |
| "tokens_trained": 3.440328528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.300709219858156, |
| "grad_norm": 3.1301839351654053, |
| "loss": 1.2889, |
| "loss_ce": 1.2027920484542847, |
| "loss_region": 0.16526439785957336, |
| "loss_total": 1.3680564165115356, |
| "lr": 0.0012019996094943166, |
| "router/selected_tokens_s0": 473.25, |
| "router/selected_tokens_s1": 370.125, |
| "step": 1060, |
| "tokens_trained": 3.473093968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30354609929078014, |
| "grad_norm": 4.692104816436768, |
| "loss": 1.2901, |
| "loss_ce": 1.2338565587997437, |
| "loss_region": 0.04492371156811714, |
| "loss_total": 1.278780221939087, |
| "lr": 0.0012015927175104656, |
| "router/selected_tokens_s0": 879.5, |
| "router/selected_tokens_s1": 4.0625, |
| "step": 1070, |
| "tokens_trained": 3.505859408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30638297872340425, |
| "grad_norm": 3.102351188659668, |
| "loss": 1.2657, |
| "loss_ce": 1.2443020343780518, |
| "loss_region": 0.05263407900929451, |
| "loss_total": 1.2969361543655396, |
| "lr": 0.0012011858255266145, |
| "router/selected_tokens_s0": 1711.9375, |
| "router/selected_tokens_s1": 5.3125, |
| "step": 1080, |
| "tokens_trained": 3.538624848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30921985815602837, |
| "grad_norm": 5.832976341247559, |
| "loss": 1.294, |
| "loss_ce": 1.2071852684020996, |
| "loss_region": 0.04505143314599991, |
| "loss_total": 1.2522367238998413, |
| "lr": 0.0012007789335427635, |
| "router/selected_tokens_s0": 877.1875, |
| "router/selected_tokens_s1": 5.125, |
| "step": 1090, |
| "tokens_trained": 3.571390288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3120567375886525, |
| "grad_norm": 2.2769346237182617, |
| "loss": 1.256, |
| "loss_ce": 1.1854358911514282, |
| "loss_region": 0.044262707233428955, |
| "loss_total": 1.229698657989502, |
| "lr": 0.0012003720415589125, |
| "router/selected_tokens_s0": 823.875, |
| "router/selected_tokens_s1": 2.1875, |
| "step": 1100, |
| "tokens_trained": 3.604155728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3148936170212766, |
| "grad_norm": 3.1605770587921143, |
| "loss": 1.234, |
| "loss_ce": 1.1617177724838257, |
| "loss_region": 0.04650801420211792, |
| "loss_total": 1.2082257270812988, |
| "lr": 0.0011999651495750614, |
| "router/selected_tokens_s0": 1039.4375, |
| "router/selected_tokens_s1": 8.5625, |
| "step": 1110, |
| "tokens_trained": 3.636921168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3177304964539007, |
| "grad_norm": 4.28217077255249, |
| "loss": 1.244, |
| "loss_ce": 1.1768839359283447, |
| "loss_region": 0.04629362002015114, |
| "loss_total": 1.2231775522232056, |
| "lr": 0.0011995582575912104, |
| "router/selected_tokens_s0": 1077.6875, |
| "router/selected_tokens_s1": 3.125, |
| "step": 1120, |
| "tokens_trained": 3.669686584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32056737588652484, |
| "grad_norm": 3.1309468746185303, |
| "loss": 1.2511, |
| "loss_ce": 1.1429551839828491, |
| "loss_region": 0.04654176905751228, |
| "loss_total": 1.1894969940185547, |
| "lr": 0.0011991513656073594, |
| "router/selected_tokens_s0": 909.5, |
| "router/selected_tokens_s1": 10.25, |
| "step": 1130, |
| "tokens_trained": 3.702452024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32340425531914896, |
| "grad_norm": 3.077151298522949, |
| "loss": 1.2317, |
| "loss_ce": 1.1686514616012573, |
| "loss_region": 0.047968540340662, |
| "loss_total": 1.2166199684143066, |
| "lr": 0.0011987444736235083, |
| "router/selected_tokens_s0": 1251.25, |
| "router/selected_tokens_s1": 3.625, |
| "step": 1140, |
| "tokens_trained": 3.735217464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3262411347517731, |
| "grad_norm": 2.451178789138794, |
| "loss": 1.249, |
| "loss_ce": 1.2107584476470947, |
| "loss_region": 0.07213801145553589, |
| "loss_total": 1.2828965187072754, |
| "lr": 0.0011983375816396573, |
| "router/selected_tokens_s0": 1354.875, |
| "router/selected_tokens_s1": 211.875, |
| "step": 1150, |
| "tokens_trained": 3.767982904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32907801418439714, |
| "grad_norm": 5.528106689453125, |
| "loss": 1.2417, |
| "loss_ce": 1.1774345636367798, |
| "loss_region": 0.049906354397535324, |
| "loss_total": 1.2273409366607666, |
| "lr": 0.0011979306896558062, |
| "router/selected_tokens_s0": 1311.375, |
| "router/selected_tokens_s1": 18.375, |
| "step": 1160, |
| "tokens_trained": 3.800748344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.33191489361702126, |
| "grad_norm": 2.824122667312622, |
| "loss": 1.2461, |
| "loss_ce": 1.1278353929519653, |
| "loss_region": 0.05486295744776726, |
| "loss_total": 1.182698369026184, |
| "lr": 0.0011975237976719552, |
| "router/selected_tokens_s0": 1045.125, |
| "router/selected_tokens_s1": 62.1875, |
| "step": 1170, |
| "tokens_trained": 3.833513784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3347517730496454, |
| "grad_norm": 7.002549171447754, |
| "loss": 1.2696, |
| "loss_ce": 1.245290994644165, |
| "loss_region": 0.05316196009516716, |
| "loss_total": 1.2984529733657837, |
| "lr": 0.0011971169056881042, |
| "router/selected_tokens_s0": 1337.0, |
| "router/selected_tokens_s1": 41.9375, |
| "step": 1180, |
| "tokens_trained": 3.866278424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3375886524822695, |
| "grad_norm": 3.7295656204223633, |
| "loss": 1.2499, |
| "loss_ce": 1.124242901802063, |
| "loss_region": 0.058403585106134415, |
| "loss_total": 1.1826465129852295, |
| "lr": 0.0011967100137042531, |
| "router/selected_tokens_s0": 1535.5, |
| "router/selected_tokens_s1": 107.8125, |
| "step": 1190, |
| "tokens_trained": 3.899043864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3404255319148936, |
| "grad_norm": 3.8166322708129883, |
| "loss": 1.2437, |
| "loss_ce": 1.1664611101150513, |
| "loss_region": 0.06889785826206207, |
| "loss_total": 1.2353589534759521, |
| "lr": 0.0011963031217204021, |
| "router/selected_tokens_s0": 952.4375, |
| "router/selected_tokens_s1": 147.5, |
| "step": 1200, |
| "tokens_trained": 3.931809304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3432624113475177, |
| "grad_norm": 3.671513080596924, |
| "loss": 1.2376, |
| "loss_ce": 1.2160379886627197, |
| "loss_region": 0.06595063954591751, |
| "loss_total": 1.2819886207580566, |
| "lr": 0.0011958962297365513, |
| "router/selected_tokens_s0": 921.4375, |
| "router/selected_tokens_s1": 125.75, |
| "step": 1210, |
| "tokens_trained": 3.964574744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.34609929078014184, |
| "grad_norm": 5.063389778137207, |
| "loss": 1.2304, |
| "loss_ce": 1.187891960144043, |
| "loss_region": 0.05452219769358635, |
| "loss_total": 1.242414116859436, |
| "lr": 0.0011954893377527003, |
| "router/selected_tokens_s0": 1445.0, |
| "router/selected_tokens_s1": 50.5, |
| "step": 1220, |
| "tokens_trained": 3.997337592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.34893617021276596, |
| "grad_norm": 3.887139320373535, |
| "loss": 1.2444, |
| "loss_ce": 1.176914930343628, |
| "loss_region": 0.1330278515815735, |
| "loss_total": 1.3099427223205566, |
| "lr": 0.0011950824457688492, |
| "router/selected_tokens_s0": 1880.9375, |
| "router/selected_tokens_s1": 972.0625, |
| "step": 1230, |
| "tokens_trained": 4.030103032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3517730496453901, |
| "grad_norm": 5.268446922302246, |
| "loss": 1.2329, |
| "loss_ce": 1.1647382974624634, |
| "loss_region": 0.04794379696249962, |
| "loss_total": 1.2126821279525757, |
| "lr": 0.0011946755537849982, |
| "router/selected_tokens_s0": 1246.9375, |
| "router/selected_tokens_s1": 5.8125, |
| "step": 1240, |
| "tokens_trained": 4.062868472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3546099290780142, |
| "grad_norm": 2.72017765045166, |
| "loss": 1.231, |
| "loss_ce": 1.1569784879684448, |
| "loss_region": 0.04461895301938057, |
| "loss_total": 1.2015974521636963, |
| "lr": 0.001194268661801147, |
| "router/selected_tokens_s0": 858.125, |
| "router/selected_tokens_s1": 2.4375, |
| "step": 1250, |
| "tokens_trained": 4.095633912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3574468085106383, |
| "grad_norm": 2.106952428817749, |
| "loss": 1.2326, |
| "loss_ce": 1.139892339706421, |
| "loss_region": 0.046058911830186844, |
| "loss_total": 1.1859512329101562, |
| "lr": 0.001193861769817296, |
| "router/selected_tokens_s0": 921.5, |
| "router/selected_tokens_s1": 8.6875, |
| "step": 1260, |
| "tokens_trained": 4.128399352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36028368794326243, |
| "grad_norm": 4.2245659828186035, |
| "loss": 1.2321, |
| "loss_ce": 1.185473918914795, |
| "loss_region": 0.04969760775566101, |
| "loss_total": 1.2351715564727783, |
| "lr": 0.0011934548778334449, |
| "router/selected_tokens_s0": 731.8125, |
| "router/selected_tokens_s1": 27.0, |
| "step": 1270, |
| "tokens_trained": 4.161163992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36312056737588655, |
| "grad_norm": 3.381363868713379, |
| "loss": 1.2321, |
| "loss_ce": 1.221541404724121, |
| "loss_region": 0.050035104155540466, |
| "loss_total": 1.2715765237808228, |
| "lr": 0.001193047985849594, |
| "router/selected_tokens_s0": 1033.125, |
| "router/selected_tokens_s1": 29.875, |
| "step": 1280, |
| "tokens_trained": 4.193929432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3659574468085106, |
| "grad_norm": 5.191988468170166, |
| "loss": 1.2362, |
| "loss_ce": 1.2151143550872803, |
| "loss_region": 0.04746318981051445, |
| "loss_total": 1.2625775337219238, |
| "lr": 0.001192641093865743, |
| "router/selected_tokens_s0": 889.4375, |
| "router/selected_tokens_s1": 17.5, |
| "step": 1290, |
| "tokens_trained": 4.226694872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36879432624113473, |
| "grad_norm": 1.7778940200805664, |
| "loss": 1.2405, |
| "loss_ce": 1.1877528429031372, |
| "loss_region": 0.05633685365319252, |
| "loss_total": 1.2440897226333618, |
| "lr": 0.001192234201881892, |
| "router/selected_tokens_s0": 1150.3125, |
| "router/selected_tokens_s1": 73.875, |
| "step": 1300, |
| "tokens_trained": 4.259455304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.37163120567375885, |
| "grad_norm": 4.316074371337891, |
| "loss": 1.2248, |
| "loss_ce": 1.1520181894302368, |
| "loss_region": 0.052013058215379715, |
| "loss_total": 1.204031229019165, |
| "lr": 0.001191827309898041, |
| "router/selected_tokens_s0": 1502.0, |
| "router/selected_tokens_s1": 21.5, |
| "step": 1310, |
| "tokens_trained": 4.292217888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.37446808510638296, |
| "grad_norm": 3.6426265239715576, |
| "loss": 1.2266, |
| "loss_ce": 1.172141432762146, |
| "loss_region": 0.05179239809513092, |
| "loss_total": 1.2239338159561157, |
| "lr": 0.00119142041791419, |
| "router/selected_tokens_s0": 1641.5625, |
| "router/selected_tokens_s1": 12.375, |
| "step": 1320, |
| "tokens_trained": 4.324979816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3773049645390071, |
| "grad_norm": 2.515777587890625, |
| "loss": 1.2224, |
| "loss_ce": 1.098609209060669, |
| "loss_region": 0.049831029027700424, |
| "loss_total": 1.1484402418136597, |
| "lr": 0.0011910135259303389, |
| "router/selected_tokens_s0": 1349.75, |
| "router/selected_tokens_s1": 16.375, |
| "step": 1330, |
| "tokens_trained": 4.357745256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3801418439716312, |
| "grad_norm": 1.4164339303970337, |
| "loss": 1.2221, |
| "loss_ce": 1.1411010026931763, |
| "loss_region": 0.08194732666015625, |
| "loss_total": 1.2230483293533325, |
| "lr": 0.0011906066339464878, |
| "router/selected_tokens_s0": 1443.875, |
| "router/selected_tokens_s1": 315.6875, |
| "step": 1340, |
| "tokens_trained": 4.390510696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3829787234042553, |
| "grad_norm": 0.7069838047027588, |
| "loss": 1.211, |
| "loss_ce": 1.1557698249816895, |
| "loss_region": 0.04843400791287422, |
| "loss_total": 1.2042038440704346, |
| "lr": 0.0011901997419626368, |
| "router/selected_tokens_s0": 1291.625, |
| "router/selected_tokens_s1": 6.375, |
| "step": 1350, |
| "tokens_trained": 4.423276136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.38581560283687943, |
| "grad_norm": 1.344914436340332, |
| "loss": 1.2166, |
| "loss_ce": 1.1191163063049316, |
| "loss_region": 0.05032026395201683, |
| "loss_total": 1.1694365739822388, |
| "lr": 0.0011897928499787858, |
| "router/selected_tokens_s0": 1199.625, |
| "router/selected_tokens_s1": 33.3125, |
| "step": 1360, |
| "tokens_trained": 4.456041576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.38865248226950355, |
| "grad_norm": 0.5033648610115051, |
| "loss": 1.2192, |
| "loss_ce": 1.1610416173934937, |
| "loss_region": 0.053894009441137314, |
| "loss_total": 1.2149356603622437, |
| "lr": 0.0011893859579949347, |
| "router/selected_tokens_s0": 1109.625, |
| "router/selected_tokens_s1": 58.6875, |
| "step": 1370, |
| "tokens_trained": 4.488807016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.39148936170212767, |
| "grad_norm": 1.1606615781784058, |
| "loss": 1.2223, |
| "loss_ce": 1.110776424407959, |
| "loss_region": 0.051735769957304, |
| "loss_total": 1.162512183189392, |
| "lr": 0.0011889790660110837, |
| "router/selected_tokens_s0": 1187.5625, |
| "router/selected_tokens_s1": 40.3125, |
| "step": 1380, |
| "tokens_trained": 4.521572456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3943262411347518, |
| "grad_norm": 4.230033874511719, |
| "loss": 1.2441, |
| "loss_ce": 1.173520803451538, |
| "loss_region": 0.060160037130117416, |
| "loss_total": 1.2336808443069458, |
| "lr": 0.0011885721740272327, |
| "router/selected_tokens_s0": 1960.1875, |
| "router/selected_tokens_s1": 84.4375, |
| "step": 1390, |
| "tokens_trained": 4.554337096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3971631205673759, |
| "grad_norm": 3.069769859313965, |
| "loss": 1.2342, |
| "loss_ce": 1.041337251663208, |
| "loss_region": 0.048631489276885986, |
| "loss_total": 1.0899686813354492, |
| "lr": 0.0011881652820433816, |
| "router/selected_tokens_s0": 1217.0, |
| "router/selected_tokens_s1": 14.1875, |
| "step": 1400, |
| "tokens_trained": 4.587102536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4, |
| "grad_norm": 3.1322920322418213, |
| "loss": 1.2287, |
| "loss_ce": 1.1824651956558228, |
| "loss_region": 0.058100152760744095, |
| "loss_total": 1.240565299987793, |
| "lr": 0.0011877583900595306, |
| "router/selected_tokens_s0": 1269.5625, |
| "router/selected_tokens_s1": 87.75, |
| "step": 1410, |
| "tokens_trained": 4.619867976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.40283687943262414, |
| "grad_norm": 3.767857074737549, |
| "loss": 1.2154, |
| "loss_ce": 1.2003581523895264, |
| "loss_region": 0.07572433352470398, |
| "loss_total": 1.2760825157165527, |
| "lr": 0.0011873514980756796, |
| "router/selected_tokens_s0": 1357.375, |
| "router/selected_tokens_s1": 241.75, |
| "step": 1420, |
| "tokens_trained": 4.652632616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4056737588652482, |
| "grad_norm": 2.0261361598968506, |
| "loss": 1.2207, |
| "loss_ce": 1.1732327938079834, |
| "loss_region": 0.0638844221830368, |
| "loss_total": 1.2371171712875366, |
| "lr": 0.0011869446060918285, |
| "router/selected_tokens_s0": 1132.5, |
| "router/selected_tokens_s1": 132.1875, |
| "step": 1430, |
| "tokens_trained": 4.685398056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4085106382978723, |
| "grad_norm": 1.6216543912887573, |
| "loss": 1.2201, |
| "loss_ce": 1.154977560043335, |
| "loss_region": 0.04831705614924431, |
| "loss_total": 1.2032946348190308, |
| "lr": 0.0011865377141079775, |
| "router/selected_tokens_s0": 1138.3125, |
| "router/selected_tokens_s1": 15.125, |
| "step": 1440, |
| "tokens_trained": 4.718163496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.41134751773049644, |
| "grad_norm": 1.2403500080108643, |
| "loss": 1.2158, |
| "loss_ce": 1.1079224348068237, |
| "loss_region": 0.049330513924360275, |
| "loss_total": 1.1572529077529907, |
| "lr": 0.0011861308221241265, |
| "router/selected_tokens_s0": 1109.0, |
| "router/selected_tokens_s1": 22.5625, |
| "step": 1450, |
| "tokens_trained": 4.750928936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.41418439716312055, |
| "grad_norm": 1.9023810625076294, |
| "loss": 1.2098, |
| "loss_ce": 1.2203904390335083, |
| "loss_region": 0.08725684881210327, |
| "loss_total": 1.3076472282409668, |
| "lr": 0.0011857239301402756, |
| "router/selected_tokens_s0": 1068.0625, |
| "router/selected_tokens_s1": 285.8125, |
| "step": 1460, |
| "tokens_trained": 4.783694376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.41702127659574467, |
| "grad_norm": 1.0339922904968262, |
| "loss": 1.2123, |
| "loss_ce": 1.1580380201339722, |
| "loss_region": 0.04887384921312332, |
| "loss_total": 1.2069119215011597, |
| "lr": 0.0011853170381564246, |
| "router/selected_tokens_s0": 1223.0625, |
| "router/selected_tokens_s1": 14.75, |
| "step": 1470, |
| "tokens_trained": 4.816459816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4198581560283688, |
| "grad_norm": 1.122446060180664, |
| "loss": 1.2059, |
| "loss_ce": 1.1485178470611572, |
| "loss_region": 0.050783149898052216, |
| "loss_total": 1.19930100440979, |
| "lr": 0.0011849101461725736, |
| "router/selected_tokens_s0": 1172.8125, |
| "router/selected_tokens_s1": 31.8125, |
| "step": 1480, |
| "tokens_trained": 4.849225256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4226950354609929, |
| "grad_norm": 1.6927247047424316, |
| "loss": 1.212, |
| "loss_ce": 1.1878578662872314, |
| "loss_region": 0.048136983066797256, |
| "loss_total": 1.235994815826416, |
| "lr": 0.0011845032541887225, |
| "router/selected_tokens_s0": 1040.125, |
| "router/selected_tokens_s1": 16.5, |
| "step": 1490, |
| "tokens_trained": 4.881990696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.425531914893617, |
| "grad_norm": 1.4076164960861206, |
| "loss": 1.2102, |
| "loss_ce": 1.1968106031417847, |
| "loss_region": 0.05341554060578346, |
| "loss_total": 1.2502261400222778, |
| "lr": 0.0011840963622048713, |
| "router/selected_tokens_s0": 1112.4375, |
| "router/selected_tokens_s1": 51.1875, |
| "step": 1500, |
| "tokens_trained": 4.914756056 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "eval_ppl": 3.1645482791951447, |
| "eval_runtime": 2.0297, |
| "step": 1500, |
| "tokens_trained": 4.914756056 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "eval_F": 0.05049732146193486, |
| "eval_F_cds": 0.06451352079777199, |
| "eval_F_dig": 0.0033987030236737243, |
| "eval_F_exon": 0.05309794648074911, |
| "eval_F_intron": 0.04964938402608037, |
| "eval_F_nig": 0.04548039973700498, |
| "eval_F_promoter": 0.0584770387214958, |
| "eval_F_utr": 0.05909124822800865, |
| "eval_G": 0.043056454668858934, |
| "eval_G_cds": 0.040746177623304285, |
| "eval_G_dig": 0.0450969069849207, |
| "eval_G_exon": 0.04159826309611177, |
| "eval_G_intron": 0.043054627385231345, |
| "eval_G_nig": 0.042699612123318444, |
| "eval_G_promoter": 0.04411404367732481, |
| "eval_G_utr": 0.04181456160534422, |
| "eval_avg_bp_per_token": 19.803030557844636, |
| "eval_bp_per_token/cds": 15.500626653669405, |
| "eval_bp_per_token/dig": 294.2298850574713, |
| "eval_bp_per_token/exon": 18.83312004095214, |
| "eval_bp_per_token/intron": 20.141236787040683, |
| "eval_bp_per_token/nig": 21.987493640834323, |
| "eval_bp_per_token/promoter": 17.100729138536323, |
| "eval_bp_per_token/utr": 16.9229797979798, |
| "eval_ppl_cds": 3.7596792967573776, |
| "eval_ppl_dig": 1.3696206224046767, |
| "eval_ppl_exon": 3.370696369681706, |
| "eval_ppl_intron": 3.179431822437879, |
| "eval_ppl_nig": 3.065018439879573, |
| "eval_ppl_promoter": 3.3819504463651193, |
| "eval_ppl_utr": 3.457320751431464, |
| "step": 1500, |
| "tokens_trained": 4.914756056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.42836879432624114, |
| "grad_norm": 1.4229851961135864, |
| "loss": 1.2147, |
| "loss_ce": 1.1474426984786987, |
| "loss_region": 0.11874866485595703, |
| "loss_total": 1.2661913633346558, |
| "lr": 0.0011836894702210202, |
| "router/selected_tokens_s0": 1256.0, |
| "router/selected_tokens_s1": 587.125, |
| "step": 1510, |
| "tokens_trained": 4.947521496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.43120567375886526, |
| "grad_norm": 2.1784424781799316, |
| "loss": 1.2044, |
| "loss_ce": 1.176586627960205, |
| "loss_region": 0.04810425266623497, |
| "loss_total": 1.2246909141540527, |
| "lr": 0.0011832825782371692, |
| "router/selected_tokens_s0": 1154.0625, |
| "router/selected_tokens_s1": 11.1875, |
| "step": 1520, |
| "tokens_trained": 4.980286136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4340425531914894, |
| "grad_norm": 0.44625309109687805, |
| "loss": 1.2082, |
| "loss_ce": 1.1421223878860474, |
| "loss_region": 0.04860352724790573, |
| "loss_total": 1.1907259225845337, |
| "lr": 0.0011828756862533184, |
| "router/selected_tokens_s0": 1218.0625, |
| "router/selected_tokens_s1": 13.375, |
| "step": 1530, |
| "tokens_trained": 5.013051576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4368794326241135, |
| "grad_norm": 0.541300356388092, |
| "loss": 1.206, |
| "loss_ce": 1.1447055339813232, |
| "loss_region": 0.049634430557489395, |
| "loss_total": 1.1943399906158447, |
| "lr": 0.0011824687942694674, |
| "router/selected_tokens_s0": 1159.25, |
| "router/selected_tokens_s1": 23.4375, |
| "step": 1540, |
| "tokens_trained": 5.045813688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4397163120567376, |
| "grad_norm": 2.8496859073638916, |
| "loss": 1.2067, |
| "loss_ce": 1.0945407152175903, |
| "loss_region": 0.057181403040885925, |
| "loss_total": 1.1517220735549927, |
| "lr": 0.0011820619022856163, |
| "router/selected_tokens_s0": 1242.875, |
| "router/selected_tokens_s1": 81.8125, |
| "step": 1550, |
| "tokens_trained": 5.078578552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4425531914893617, |
| "grad_norm": 4.080273628234863, |
| "loss": 1.2214, |
| "loss_ce": 1.145821452140808, |
| "loss_region": 0.06584899872541428, |
| "loss_total": 1.2116703987121582, |
| "lr": 0.0011816550103017653, |
| "router/selected_tokens_s0": 1449.75, |
| "router/selected_tokens_s1": 161.375, |
| "step": 1560, |
| "tokens_trained": 5.111343992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4453900709219858, |
| "grad_norm": 2.364082098007202, |
| "loss": 1.2205, |
| "loss_ce": 1.2248941659927368, |
| "loss_region": 0.05984827131032944, |
| "loss_total": 1.2847424745559692, |
| "lr": 0.0011812481183179143, |
| "router/selected_tokens_s0": 1213.0, |
| "router/selected_tokens_s1": 101.8125, |
| "step": 1570, |
| "tokens_trained": 5.144109432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4482269503546099, |
| "grad_norm": 1.8271722793579102, |
| "loss": 1.2147, |
| "loss_ce": 1.1531490087509155, |
| "loss_region": 0.1430894285440445, |
| "loss_total": 1.2962384223937988, |
| "lr": 0.0011808412263340632, |
| "router/selected_tokens_s0": 1225.4375, |
| "router/selected_tokens_s1": 764.125, |
| "step": 1580, |
| "tokens_trained": 5.176874072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.451063829787234, |
| "grad_norm": 0.6056541800498962, |
| "loss": 1.2109, |
| "loss_ce": 1.139554738998413, |
| "loss_region": 0.05257788673043251, |
| "loss_total": 1.192132592201233, |
| "lr": 0.0011804343343502122, |
| "router/selected_tokens_s0": 1221.1875, |
| "router/selected_tokens_s1": 45.25, |
| "step": 1590, |
| "tokens_trained": 5.209639512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.45390070921985815, |
| "grad_norm": 1.3845616579055786, |
| "loss": 1.2054, |
| "loss_ce": 1.1697837114334106, |
| "loss_region": 0.05317477881908417, |
| "loss_total": 1.2229584455490112, |
| "lr": 0.0011800274423663611, |
| "router/selected_tokens_s0": 1122.25, |
| "router/selected_tokens_s1": 49.6875, |
| "step": 1600, |
| "tokens_trained": 5.242403352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.45673758865248226, |
| "grad_norm": 0.7946324944496155, |
| "loss": 1.204, |
| "loss_ce": 1.1398544311523438, |
| "loss_region": 0.0863887295126915, |
| "loss_total": 1.2262431383132935, |
| "lr": 0.0011796205503825101, |
| "router/selected_tokens_s0": 1121.625, |
| "router/selected_tokens_s1": 284.3125, |
| "step": 1610, |
| "tokens_trained": 5.275167992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4595744680851064, |
| "grad_norm": 1.388248085975647, |
| "loss": 1.2086, |
| "loss_ce": 1.1963084936141968, |
| "loss_region": 0.04745178297162056, |
| "loss_total": 1.2437602281570435, |
| "lr": 0.001179213658398659, |
| "router/selected_tokens_s0": 1122.8125, |
| "router/selected_tokens_s1": 7.625, |
| "step": 1620, |
| "tokens_trained": 5.307932632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4624113475177305, |
| "grad_norm": 1.0179280042648315, |
| "loss": 1.2065, |
| "loss_ce": 1.1650291681289673, |
| "loss_region": 0.047749392688274384, |
| "loss_total": 1.2127785682678223, |
| "lr": 0.001178806766414808, |
| "router/selected_tokens_s0": 1127.125, |
| "router/selected_tokens_s1": 9.625, |
| "step": 1630, |
| "tokens_trained": 5.340694016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4652482269503546, |
| "grad_norm": 0.6571646928787231, |
| "loss": 1.2057, |
| "loss_ce": 1.1609867811203003, |
| "loss_region": 0.05375194177031517, |
| "loss_total": 1.2147387266159058, |
| "lr": 0.001178399874430957, |
| "router/selected_tokens_s0": 1147.8125, |
| "router/selected_tokens_s1": 55.3125, |
| "step": 1640, |
| "tokens_trained": 5.373459456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.46808510638297873, |
| "grad_norm": 0.7328802347183228, |
| "loss": 1.2096, |
| "loss_ce": 1.1533145904541016, |
| "loss_region": 0.04775061458349228, |
| "loss_total": 1.201065182685852, |
| "lr": 0.001177992982447106, |
| "router/selected_tokens_s0": 1126.6875, |
| "router/selected_tokens_s1": 9.625, |
| "step": 1650, |
| "tokens_trained": 5.406224096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.47092198581560285, |
| "grad_norm": 2.1376028060913086, |
| "loss": 1.1969, |
| "loss_ce": 1.1875627040863037, |
| "loss_region": 0.06121131405234337, |
| "loss_total": 1.2487740516662598, |
| "lr": 0.001177586090463255, |
| "router/selected_tokens_s0": 1141.0, |
| "router/selected_tokens_s1": 111.25, |
| "step": 1660, |
| "tokens_trained": 5.438988736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.47375886524822697, |
| "grad_norm": 4.265078067779541, |
| "loss": 1.2112, |
| "loss_ce": 1.149315595626831, |
| "loss_region": 0.049465492367744446, |
| "loss_total": 1.198781132698059, |
| "lr": 0.001177179198479404, |
| "router/selected_tokens_s0": 1495.0625, |
| "router/selected_tokens_s1": 9.875, |
| "step": 1670, |
| "tokens_trained": 5.471754176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4765957446808511, |
| "grad_norm": 2.6948628425598145, |
| "loss": 1.222, |
| "loss_ce": 1.1160846948623657, |
| "loss_region": 0.05118577182292938, |
| "loss_total": 1.1672704219818115, |
| "lr": 0.0011767723064955529, |
| "router/selected_tokens_s0": 1128.25, |
| "router/selected_tokens_s1": 39.1875, |
| "step": 1680, |
| "tokens_trained": 5.504519616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4794326241134752, |
| "grad_norm": 1.4104028940200806, |
| "loss": 1.2114, |
| "loss_ce": 1.1637930870056152, |
| "loss_region": 0.05358956381678581, |
| "loss_total": 1.2173826694488525, |
| "lr": 0.0011763654145117018, |
| "router/selected_tokens_s0": 994.9375, |
| "router/selected_tokens_s1": 52.9375, |
| "step": 1690, |
| "tokens_trained": 5.537285056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.48226950354609927, |
| "grad_norm": 0.7200977206230164, |
| "loss": 1.2053, |
| "loss_ce": 1.1444610357284546, |
| "loss_region": 0.04805755242705345, |
| "loss_total": 1.1925185918807983, |
| "lr": 0.0011759585225278508, |
| "router/selected_tokens_s0": 1050.375, |
| "router/selected_tokens_s1": 14.0625, |
| "step": 1700, |
| "tokens_trained": 5.570050496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4851063829787234, |
| "grad_norm": 1.3361515998840332, |
| "loss": 1.2044, |
| "loss_ce": 1.1669089794158936, |
| "loss_region": 0.05295637622475624, |
| "loss_total": 1.219865322113037, |
| "lr": 0.001175551630544, |
| "router/selected_tokens_s0": 1105.625, |
| "router/selected_tokens_s1": 47.625, |
| "step": 1710, |
| "tokens_trained": 5.602815936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4879432624113475, |
| "grad_norm": 1.1902843713760376, |
| "loss": 1.2051, |
| "loss_ce": 1.1462538242340088, |
| "loss_region": 0.04917926713824272, |
| "loss_total": 1.1954331398010254, |
| "lr": 0.001175144738560149, |
| "router/selected_tokens_s0": 1159.9375, |
| "router/selected_tokens_s1": 20.625, |
| "step": 1720, |
| "tokens_trained": 5.635581376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4907801418439716, |
| "grad_norm": 0.38336893916130066, |
| "loss": 1.1996, |
| "loss_ce": 1.1662453413009644, |
| "loss_region": 0.0531914159655571, |
| "loss_total": 1.219436764717102, |
| "lr": 0.001174737846576298, |
| "router/selected_tokens_s0": 1226.1875, |
| "router/selected_tokens_s1": 49.9375, |
| "step": 1730, |
| "tokens_trained": 5.668346816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49361702127659574, |
| "grad_norm": 1.0268887281417847, |
| "loss": 1.2059, |
| "loss_ce": 1.1835399866104126, |
| "loss_region": 0.050249867141246796, |
| "loss_total": 1.2337898015975952, |
| "lr": 0.0011743309545924469, |
| "router/selected_tokens_s0": 1129.75, |
| "router/selected_tokens_s1": 27.0625, |
| "step": 1740, |
| "tokens_trained": 5.701112256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49645390070921985, |
| "grad_norm": 1.0396302938461304, |
| "loss": 1.2022, |
| "loss_ce": 1.1819915771484375, |
| "loss_region": 0.04947606101632118, |
| "loss_total": 1.231467604637146, |
| "lr": 0.0011739240626085956, |
| "router/selected_tokens_s0": 1276.6875, |
| "router/selected_tokens_s1": 19.0625, |
| "step": 1750, |
| "tokens_trained": 5.733877696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49929078014184397, |
| "grad_norm": 1.0907139778137207, |
| "loss": 1.1996, |
| "loss_ce": 1.0990666151046753, |
| "loss_region": 0.058163322508335114, |
| "loss_total": 1.1572299003601074, |
| "lr": 0.0011735171706247446, |
| "router/selected_tokens_s0": 1144.0, |
| "router/selected_tokens_s1": 87.6875, |
| "step": 1760, |
| "tokens_trained": 5.766643136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.502127659574468, |
| "grad_norm": 1.3510611057281494, |
| "loss": 1.2023, |
| "loss_ce": 1.1430004835128784, |
| "loss_region": 0.051321737468242645, |
| "loss_total": 1.1943222284317017, |
| "lr": 0.0011731102786408936, |
| "router/selected_tokens_s0": 1124.75, |
| "router/selected_tokens_s1": 37.3125, |
| "step": 1770, |
| "tokens_trained": 5.799407776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5049645390070922, |
| "grad_norm": 1.4044612646102905, |
| "loss": 1.2061, |
| "loss_ce": 1.198064923286438, |
| "loss_region": 0.04685606434941292, |
| "loss_total": 1.2449209690093994, |
| "lr": 0.0011727033866570427, |
| "router/selected_tokens_s0": 1031.625, |
| "router/selected_tokens_s1": 5.75, |
| "step": 1780, |
| "tokens_trained": 5.832173216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5078014184397163, |
| "grad_norm": 1.5810678005218506, |
| "loss": 1.2003, |
| "loss_ce": 1.1610219478607178, |
| "loss_region": 0.049070414155721664, |
| "loss_total": 1.210092306137085, |
| "lr": 0.0011722964946731917, |
| "router/selected_tokens_s0": 1013.5, |
| "router/selected_tokens_s1": 20.6875, |
| "step": 1790, |
| "tokens_trained": 5.864938656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5106382978723404, |
| "grad_norm": 1.0402437448501587, |
| "loss": 1.2, |
| "loss_ce": 1.1509393453598022, |
| "loss_region": 0.08059847354888916, |
| "loss_total": 1.2315378189086914, |
| "lr": 0.0011718896026893407, |
| "router/selected_tokens_s0": 1129.0, |
| "router/selected_tokens_s1": 250.875, |
| "step": 1800, |
| "tokens_trained": 5.897704096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5134751773049645, |
| "grad_norm": 1.4817918539047241, |
| "loss": 1.1898, |
| "loss_ce": 1.2002263069152832, |
| "loss_region": 0.047367654740810394, |
| "loss_total": 1.2475939989089966, |
| "lr": 0.0011714827107054896, |
| "router/selected_tokens_s0": 1083.0625, |
| "router/selected_tokens_s1": 8.0, |
| "step": 1810, |
| "tokens_trained": 5.930469536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5163120567375886, |
| "grad_norm": 0.6873403787612915, |
| "loss": 1.1971, |
| "loss_ce": 1.1391804218292236, |
| "loss_region": 0.0709296390414238, |
| "loss_total": 1.210110068321228, |
| "lr": 0.0011710758187216386, |
| "router/selected_tokens_s0": 1092.5, |
| "router/selected_tokens_s1": 174.375, |
| "step": 1820, |
| "tokens_trained": 5.963234976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5191489361702127, |
| "grad_norm": 1.1967116594314575, |
| "loss": 1.2019, |
| "loss_ce": 1.0687319040298462, |
| "loss_region": 0.04903619363903999, |
| "loss_total": 1.1177680492401123, |
| "lr": 0.0011706689267377876, |
| "router/selected_tokens_s0": 1199.9375, |
| "router/selected_tokens_s1": 18.0, |
| "step": 1830, |
| "tokens_trained": 5.996000416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5219858156028369, |
| "grad_norm": 0.7093958854675293, |
| "loss": 1.1966, |
| "loss_ce": 1.1070630550384521, |
| "loss_region": 0.04815437272191048, |
| "loss_total": 1.1552174091339111, |
| "lr": 0.0011702620347539365, |
| "router/selected_tokens_s0": 1127.4375, |
| "router/selected_tokens_s1": 12.8125, |
| "step": 1840, |
| "tokens_trained": 6.02876568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.524822695035461, |
| "grad_norm": 1.3091318607330322, |
| "loss": 1.1942, |
| "loss_ce": 1.1178659200668335, |
| "loss_region": 0.048088669776916504, |
| "loss_total": 1.16595458984375, |
| "lr": 0.0011698551427700855, |
| "router/selected_tokens_s0": 1102.1875, |
| "router/selected_tokens_s1": 13.5, |
| "step": 1850, |
| "tokens_trained": 6.06153112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5276595744680851, |
| "grad_norm": 0.7635153532028198, |
| "loss": 1.2067, |
| "loss_ce": 1.1375563144683838, |
| "loss_region": 0.1209544762969017, |
| "loss_total": 1.2585108280181885, |
| "lr": 0.0011694482507862345, |
| "router/selected_tokens_s0": 1088.3125, |
| "router/selected_tokens_s1": 526.125, |
| "step": 1860, |
| "tokens_trained": 6.09429256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5304964539007092, |
| "grad_norm": 1.4109469652175903, |
| "loss": 1.1868, |
| "loss_ce": 1.1450139284133911, |
| "loss_region": 0.04958714172244072, |
| "loss_total": 1.194601058959961, |
| "lr": 0.0011690413588023834, |
| "router/selected_tokens_s0": 1270.3125, |
| "router/selected_tokens_s1": 21.6875, |
| "step": 1870, |
| "tokens_trained": 6.127058 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.8178255558013916, |
| "loss": 1.2016, |
| "loss_ce": 1.1585605144500732, |
| "loss_region": 0.050786904990673065, |
| "loss_total": 1.2093473672866821, |
| "lr": 0.0011686344668185324, |
| "router/selected_tokens_s0": 1087.125, |
| "router/selected_tokens_s1": 32.3125, |
| "step": 1880, |
| "tokens_trained": 6.15982344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5361702127659574, |
| "grad_norm": 0.5921704769134521, |
| "loss": 1.1935, |
| "loss_ce": 1.1373960971832275, |
| "loss_region": 0.0489933118224144, |
| "loss_total": 1.186389446258545, |
| "lr": 0.0011682275748346814, |
| "router/selected_tokens_s0": 1191.6875, |
| "router/selected_tokens_s1": 18.0, |
| "step": 1890, |
| "tokens_trained": 6.19258888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5390070921985816, |
| "grad_norm": 0.452083945274353, |
| "loss": 1.1861, |
| "loss_ce": 1.1708186864852905, |
| "loss_region": 0.08669115602970123, |
| "loss_total": 1.2575098276138306, |
| "lr": 0.0011678206828508303, |
| "router/selected_tokens_s0": 1147.0625, |
| "router/selected_tokens_s1": 297.9375, |
| "step": 1900, |
| "tokens_trained": 6.22535432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5418439716312057, |
| "grad_norm": 0.6974602937698364, |
| "loss": 1.1975, |
| "loss_ce": 1.1292380094528198, |
| "loss_region": 0.06466572731733322, |
| "loss_total": 1.1939036846160889, |
| "lr": 0.0011674137908669793, |
| "router/selected_tokens_s0": 1140.625, |
| "router/selected_tokens_s1": 135.0, |
| "step": 1910, |
| "tokens_trained": 6.25811976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5446808510638298, |
| "grad_norm": 2.0106568336486816, |
| "loss": 1.1979, |
| "loss_ce": 1.1347987651824951, |
| "loss_region": 0.05081599950790405, |
| "loss_total": 1.185614824295044, |
| "lr": 0.0011670068988831283, |
| "router/selected_tokens_s0": 1291.625, |
| "router/selected_tokens_s1": 32.0625, |
| "step": 1920, |
| "tokens_trained": 6.2908852 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5475177304964539, |
| "grad_norm": 1.8177540302276611, |
| "loss": 1.1976, |
| "loss_ce": 1.1749643087387085, |
| "loss_region": 0.04952458664774895, |
| "loss_total": 1.2244888544082642, |
| "lr": 0.0011666000068992772, |
| "router/selected_tokens_s0": 1120.1875, |
| "router/selected_tokens_s1": 22.8125, |
| "step": 1930, |
| "tokens_trained": 6.32365064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.550354609929078, |
| "grad_norm": 0.6110920310020447, |
| "loss": 1.2036, |
| "loss_ce": 1.1350669860839844, |
| "loss_region": 0.07400424778461456, |
| "loss_total": 1.2090712785720825, |
| "lr": 0.0011661931149154262, |
| "router/selected_tokens_s0": 1123.625, |
| "router/selected_tokens_s1": 201.125, |
| "step": 1940, |
| "tokens_trained": 6.35641608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5531914893617021, |
| "grad_norm": 0.78255295753479, |
| "loss": 1.1923, |
| "loss_ce": 1.1034512519836426, |
| "loss_region": 0.055763859301805496, |
| "loss_total": 1.1592150926589966, |
| "lr": 0.0011657862229315751, |
| "router/selected_tokens_s0": 1213.375, |
| "router/selected_tokens_s1": 71.25, |
| "step": 1950, |
| "tokens_trained": 6.38918152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5560283687943263, |
| "grad_norm": 0.9850241541862488, |
| "loss": 1.1908, |
| "loss_ce": 1.0654675960540771, |
| "loss_region": 0.047422587871551514, |
| "loss_total": 1.1128902435302734, |
| "lr": 0.0011653793309477243, |
| "router/selected_tokens_s0": 1153.6875, |
| "router/selected_tokens_s1": 6.75, |
| "step": 1960, |
| "tokens_trained": 6.42194696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5588652482269504, |
| "grad_norm": 0.7804341912269592, |
| "loss": 1.1988, |
| "loss_ce": 1.11430025100708, |
| "loss_region": 0.0569145642220974, |
| "loss_total": 1.1712148189544678, |
| "lr": 0.0011649724389638733, |
| "router/selected_tokens_s0": 1115.1875, |
| "router/selected_tokens_s1": 77.375, |
| "step": 1970, |
| "tokens_trained": 6.4547124 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5617021276595745, |
| "grad_norm": 0.6811673641204834, |
| "loss": 1.2018, |
| "loss_ce": 1.179452657699585, |
| "loss_region": 0.061086151748895645, |
| "loss_total": 1.2405388355255127, |
| "lr": 0.0011645655469800223, |
| "router/selected_tokens_s0": 1153.8125, |
| "router/selected_tokens_s1": 110.875, |
| "step": 1980, |
| "tokens_trained": 6.48747784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5645390070921986, |
| "grad_norm": 1.0210864543914795, |
| "loss": 1.1931, |
| "loss_ce": 1.1308600902557373, |
| "loss_region": 0.049080073833465576, |
| "loss_total": 1.1799402236938477, |
| "lr": 0.0011641586549961712, |
| "router/selected_tokens_s0": 1134.1875, |
| "router/selected_tokens_s1": 19.5, |
| "step": 1990, |
| "tokens_trained": 6.52024328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5673758865248227, |
| "grad_norm": 0.5659611225128174, |
| "loss": 1.1891, |
| "loss_ce": 1.1048927307128906, |
| "loss_region": 0.048004768788814545, |
| "loss_total": 1.1528974771499634, |
| "lr": 0.00116375176301232, |
| "router/selected_tokens_s0": 1114.1875, |
| "router/selected_tokens_s1": 11.875, |
| "step": 2000, |
| "tokens_trained": 6.55300872 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "eval_ppl": 3.1136783371919927, |
| "eval_runtime": 2.0276, |
| "step": 2000, |
| "tokens_trained": 6.55300872 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "eval_F": 0.01069504411010371, |
| "eval_F_cds": 0.010861557811517384, |
| "eval_F_dig": 0.006484881631377452, |
| "eval_F_exon": 0.009934629863687637, |
| "eval_F_intron": 0.010966237207462225, |
| "eval_F_nig": 0.01021226628824177, |
| "eval_F_promoter": 0.010854231149853663, |
| "eval_F_utr": 0.00875070613188945, |
| "eval_G": 0.04274226492450051, |
| "eval_G_cds": 0.04430924894438954, |
| "eval_G_dig": 0.05328854121904055, |
| "eval_G_exon": 0.0431136795674155, |
| "eval_G_intron": 0.04271742073044739, |
| "eval_G_nig": 0.042630800601355275, |
| "eval_G_promoter": 0.04267057437658576, |
| "eval_G_utr": 0.040218208077082956, |
| "eval_avg_bp_per_token": 93.5012506451741, |
| "eval_bp_per_token/cds": 92.06782464846981, |
| "eval_bp_per_token/dig": 154.20481927710844, |
| "eval_bp_per_token/exon": 100.65800273597812, |
| "eval_bp_per_token/intron": 91.18898133258757, |
| "eval_bp_per_token/nig": 97.92145756631739, |
| "eval_bp_per_token/promoter": 92.12997090203686, |
| "eval_bp_per_token/utr": 114.27649208282583, |
| "eval_ppl_cds": 3.7525472911998365, |
| "eval_ppl_dig": 1.205216056365389, |
| "eval_ppl_exon": 3.3402803721577903, |
| "eval_ppl_intron": 3.1339518485780027, |
| "eval_ppl_nig": 2.9991873798849777, |
| "eval_ppl_promoter": 3.3500096344554424, |
| "eval_ppl_utr": 3.431926820268814, |
| "step": 2000, |
| "tokens_trained": 6.55300872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5702127659574469, |
| "grad_norm": 0.3684820234775543, |
| "loss": 1.1983, |
| "loss_ce": 1.14522385597229, |
| "loss_region": 0.047928690910339355, |
| "loss_total": 1.1931525468826294, |
| "lr": 0.001163344871028469, |
| "router/selected_tokens_s0": 1213.1875, |
| "router/selected_tokens_s1": 10.0, |
| "step": 2010, |
| "tokens_trained": 6.58577416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.573049645390071, |
| "grad_norm": 0.34278973937034607, |
| "loss": 1.1998, |
| "loss_ce": 1.0959526300430298, |
| "loss_region": 0.05933360382914543, |
| "loss_total": 1.155286192893982, |
| "lr": 0.001162937979044618, |
| "router/selected_tokens_s0": 1201.0625, |
| "router/selected_tokens_s1": 95.9375, |
| "step": 2020, |
| "tokens_trained": 6.6185396 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5758865248226951, |
| "grad_norm": 0.885452151298523, |
| "loss": 1.1935, |
| "loss_ce": 1.040181040763855, |
| "loss_region": 0.06413589417934418, |
| "loss_total": 1.1043169498443604, |
| "lr": 0.001162531087060767, |
| "router/selected_tokens_s0": 1029.1875, |
| "router/selected_tokens_s1": 119.9375, |
| "step": 2030, |
| "tokens_trained": 6.65130504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5787234042553191, |
| "grad_norm": 0.6951954364776611, |
| "loss": 1.1923, |
| "loss_ce": 1.115369439125061, |
| "loss_region": 0.06800974160432816, |
| "loss_total": 1.1833791732788086, |
| "lr": 0.001162124195076916, |
| "router/selected_tokens_s0": 1207.75, |
| "router/selected_tokens_s1": 168.9375, |
| "step": 2040, |
| "tokens_trained": 6.68407048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5815602836879432, |
| "grad_norm": 0.2609427571296692, |
| "loss": 1.1918, |
| "loss_ce": 1.1436803340911865, |
| "loss_region": 0.056535884737968445, |
| "loss_total": 1.2002161741256714, |
| "lr": 0.001161717303093065, |
| "router/selected_tokens_s0": 1019.75, |
| "router/selected_tokens_s1": 69.3125, |
| "step": 2050, |
| "tokens_trained": 6.71683512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5843971631205673, |
| "grad_norm": 0.9476459622383118, |
| "loss": 1.1933, |
| "loss_ce": 1.1254041194915771, |
| "loss_region": 0.055064037442207336, |
| "loss_total": 1.180468201637268, |
| "lr": 0.001161310411109214, |
| "router/selected_tokens_s0": 1176.8125, |
| "router/selected_tokens_s1": 65.375, |
| "step": 2060, |
| "tokens_trained": 6.74960056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5872340425531914, |
| "grad_norm": 0.971532940864563, |
| "loss": 1.1914, |
| "loss_ce": 1.1687484979629517, |
| "loss_region": 0.06600674241781235, |
| "loss_total": 1.234755277633667, |
| "lr": 0.001160903519125363, |
| "router/selected_tokens_s0": 1138.625, |
| "router/selected_tokens_s1": 144.75, |
| "step": 2070, |
| "tokens_trained": 6.782364224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5900709219858156, |
| "grad_norm": 0.7967547178268433, |
| "loss": 1.1971, |
| "loss_ce": 1.1594549417495728, |
| "loss_region": 0.06982410699129105, |
| "loss_total": 1.2292790412902832, |
| "lr": 0.001160496627141512, |
| "router/selected_tokens_s0": 1132.0625, |
| "router/selected_tokens_s1": 172.1875, |
| "step": 2080, |
| "tokens_trained": 6.815129664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5929078014184397, |
| "grad_norm": 0.5164719223976135, |
| "loss": 1.1943, |
| "loss_ce": 1.2028053998947144, |
| "loss_region": 0.051641225814819336, |
| "loss_total": 1.2544466257095337, |
| "lr": 0.0011600897351576609, |
| "router/selected_tokens_s0": 1053.375, |
| "router/selected_tokens_s1": 37.375, |
| "step": 2090, |
| "tokens_trained": 6.847895104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5957446808510638, |
| "grad_norm": 0.801796555519104, |
| "loss": 1.1852, |
| "loss_ce": 1.0660555362701416, |
| "loss_region": 0.08737988770008087, |
| "loss_total": 1.153435468673706, |
| "lr": 0.0011596828431738098, |
| "router/selected_tokens_s0": 1178.875, |
| "router/selected_tokens_s1": 311.375, |
| "step": 2100, |
| "tokens_trained": 6.880660544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5985815602836879, |
| "grad_norm": 0.8828497529029846, |
| "loss": 1.1881, |
| "loss_ce": 1.1242519617080688, |
| "loss_region": 0.0549292154610157, |
| "loss_total": 1.1791812181472778, |
| "lr": 0.0011592759511899588, |
| "router/selected_tokens_s0": 1131.3125, |
| "router/selected_tokens_s1": 62.0625, |
| "step": 2110, |
| "tokens_trained": 6.913423848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.601418439716312, |
| "grad_norm": 1.086348533630371, |
| "loss": 1.1924, |
| "loss_ce": 1.0381351709365845, |
| "loss_region": 0.06273744255304337, |
| "loss_total": 1.1008726358413696, |
| "lr": 0.0011588690592061078, |
| "router/selected_tokens_s0": 1137.0, |
| "router/selected_tokens_s1": 121.9375, |
| "step": 2120, |
| "tokens_trained": 6.946189128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6042553191489362, |
| "grad_norm": 1.3790611028671265, |
| "loss": 1.1905, |
| "loss_ce": 1.1533229351043701, |
| "loss_region": 0.05107864364981651, |
| "loss_total": 1.2044016122817993, |
| "lr": 0.0011584621672222567, |
| "router/selected_tokens_s0": 921.25, |
| "router/selected_tokens_s1": 32.25, |
| "step": 2130, |
| "tokens_trained": 6.978954568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6070921985815603, |
| "grad_norm": 0.5741104483604431, |
| "loss": 1.1958, |
| "loss_ce": 1.1531747579574585, |
| "loss_region": 0.06062934547662735, |
| "loss_total": 1.2138041257858276, |
| "lr": 0.0011580552752384057, |
| "router/selected_tokens_s0": 1092.3125, |
| "router/selected_tokens_s1": 102.125, |
| "step": 2140, |
| "tokens_trained": 7.011720008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6099290780141844, |
| "grad_norm": 0.3919357657432556, |
| "loss": 1.1936, |
| "loss_ce": 1.0967808961868286, |
| "loss_region": 0.058883871883153915, |
| "loss_total": 1.1556648015975952, |
| "lr": 0.0011576483832545547, |
| "router/selected_tokens_s0": 1085.1875, |
| "router/selected_tokens_s1": 88.75, |
| "step": 2150, |
| "tokens_trained": 7.044485448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6127659574468085, |
| "grad_norm": 1.0066328048706055, |
| "loss": 1.1928, |
| "loss_ce": 1.1479171514511108, |
| "loss_region": 0.0568971112370491, |
| "loss_total": 1.2048143148422241, |
| "lr": 0.0011572414912707036, |
| "router/selected_tokens_s0": 1115.6875, |
| "router/selected_tokens_s1": 76.6875, |
| "step": 2160, |
| "tokens_trained": 7.077250888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6156028368794326, |
| "grad_norm": 0.6049533486366272, |
| "loss": 1.1869, |
| "loss_ce": 1.1199055910110474, |
| "loss_region": 0.062149304896593094, |
| "loss_total": 1.182054877281189, |
| "lr": 0.0011568345992868526, |
| "router/selected_tokens_s0": 1205.0, |
| "router/selected_tokens_s1": 121.1875, |
| "step": 2170, |
| "tokens_trained": 7.110016328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6184397163120567, |
| "grad_norm": 0.3710135221481323, |
| "loss": 1.1891, |
| "loss_ce": 1.1549140214920044, |
| "loss_region": 0.049698520451784134, |
| "loss_total": 1.2046124935150146, |
| "lr": 0.0011564277073030016, |
| "router/selected_tokens_s0": 1194.3125, |
| "router/selected_tokens_s1": 23.625, |
| "step": 2180, |
| "tokens_trained": 7.1427786 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6212765957446809, |
| "grad_norm": 0.24183577299118042, |
| "loss": 1.1948, |
| "loss_ce": 1.1288636922836304, |
| "loss_region": 0.056807950139045715, |
| "loss_total": 1.1856716871261597, |
| "lr": 0.0011560208153191505, |
| "router/selected_tokens_s0": 1169.4375, |
| "router/selected_tokens_s1": 78.6875, |
| "step": 2190, |
| "tokens_trained": 7.17554404 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.624113475177305, |
| "grad_norm": 0.46073052287101746, |
| "loss": 1.191, |
| "loss_ce": 1.075547218322754, |
| "loss_region": 0.05498499423265457, |
| "loss_total": 1.1305322647094727, |
| "lr": 0.0011556139233352995, |
| "router/selected_tokens_s0": 1175.8125, |
| "router/selected_tokens_s1": 63.0625, |
| "step": 2200, |
| "tokens_trained": 7.20830948 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6269503546099291, |
| "grad_norm": 1.0049269199371338, |
| "loss": 1.1898, |
| "loss_ce": 1.1013010740280151, |
| "loss_region": 0.06765102595090866, |
| "loss_total": 1.1689521074295044, |
| "lr": 0.0011552070313514487, |
| "router/selected_tokens_s0": 1242.0, |
| "router/selected_tokens_s1": 169.625, |
| "step": 2210, |
| "tokens_trained": 7.24107492 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6297872340425532, |
| "grad_norm": 1.004379391670227, |
| "loss": 1.1946, |
| "loss_ce": 1.1723252534866333, |
| "loss_region": 0.059653084725141525, |
| "loss_total": 1.2319782972335815, |
| "lr": 0.0011548001393675976, |
| "router/selected_tokens_s0": 1147.875, |
| "router/selected_tokens_s1": 98.0, |
| "step": 2220, |
| "tokens_trained": 7.27384036 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6326241134751773, |
| "grad_norm": 0.47309768199920654, |
| "loss": 1.1923, |
| "loss_ce": 1.1086316108703613, |
| "loss_region": 0.061595749109983444, |
| "loss_total": 1.1702274084091187, |
| "lr": 0.0011543932473837466, |
| "router/selected_tokens_s0": 1165.5625, |
| "router/selected_tokens_s1": 115.375, |
| "step": 2230, |
| "tokens_trained": 7.3066058 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6354609929078014, |
| "grad_norm": 1.1624172925949097, |
| "loss": 1.1849, |
| "loss_ce": 1.1424520015716553, |
| "loss_region": 0.05363762006163597, |
| "loss_total": 1.1960896253585815, |
| "lr": 0.0011539863553998956, |
| "router/selected_tokens_s0": 1145.5625, |
| "router/selected_tokens_s1": 53.0625, |
| "step": 2240, |
| "tokens_trained": 7.33937124 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6382978723404256, |
| "grad_norm": 0.5090921521186829, |
| "loss": 1.1891, |
| "loss_ce": 1.0883148908615112, |
| "loss_region": 0.05045454576611519, |
| "loss_total": 1.1387693881988525, |
| "lr": 0.0011535794634160443, |
| "router/selected_tokens_s0": 1139.625, |
| "router/selected_tokens_s1": 30.5, |
| "step": 2250, |
| "tokens_trained": 7.37213668 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6411347517730497, |
| "grad_norm": 0.24586611986160278, |
| "loss": 1.1871, |
| "loss_ce": 1.122402548789978, |
| "loss_region": 0.05342685803771019, |
| "loss_total": 1.1758294105529785, |
| "lr": 0.0011531725714321933, |
| "router/selected_tokens_s0": 1129.75, |
| "router/selected_tokens_s1": 52.5, |
| "step": 2260, |
| "tokens_trained": 7.40490212 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6439716312056738, |
| "grad_norm": 1.1430929899215698, |
| "loss": 1.1878, |
| "loss_ce": 1.0667387247085571, |
| "loss_region": 0.05230208858847618, |
| "loss_total": 1.119040846824646, |
| "lr": 0.0011527656794483422, |
| "router/selected_tokens_s0": 1125.25, |
| "router/selected_tokens_s1": 43.8125, |
| "step": 2270, |
| "tokens_trained": 7.43766756 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6468085106382979, |
| "grad_norm": 1.836784839630127, |
| "loss": 1.1938, |
| "loss_ce": 1.1883622407913208, |
| "loss_region": 0.0947345495223999, |
| "loss_total": 1.2830967903137207, |
| "lr": 0.0011523587874644914, |
| "router/selected_tokens_s0": 1179.6875, |
| "router/selected_tokens_s1": 368.125, |
| "step": 2280, |
| "tokens_trained": 7.470433 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.649645390070922, |
| "grad_norm": 1.0409865379333496, |
| "loss": 1.1901, |
| "loss_ce": 1.1272953748703003, |
| "loss_region": 0.05049753561615944, |
| "loss_total": 1.1777929067611694, |
| "lr": 0.0011519518954806404, |
| "router/selected_tokens_s0": 1035.9375, |
| "router/selected_tokens_s1": 29.125, |
| "step": 2290, |
| "tokens_trained": 7.503197136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6524822695035462, |
| "grad_norm": 0.5576117038726807, |
| "loss": 1.1885, |
| "loss_ce": 1.1093772649765015, |
| "loss_region": 0.060273103415966034, |
| "loss_total": 1.1696503162384033, |
| "lr": 0.0011515450034967894, |
| "router/selected_tokens_s0": 1215.9375, |
| "router/selected_tokens_s1": 109.25, |
| "step": 2300, |
| "tokens_trained": 7.535962576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6553191489361702, |
| "grad_norm": 0.5205599069595337, |
| "loss": 1.1924, |
| "loss_ce": 1.1544609069824219, |
| "loss_region": 0.07660765200853348, |
| "loss_total": 1.2310686111450195, |
| "lr": 0.0011511381115129383, |
| "router/selected_tokens_s0": 1092.9375, |
| "router/selected_tokens_s1": 214.5625, |
| "step": 2310, |
| "tokens_trained": 7.568726416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6581560283687943, |
| "grad_norm": 0.447685569524765, |
| "loss": 1.1869, |
| "loss_ce": 1.153815746307373, |
| "loss_region": 0.06783945858478546, |
| "loss_total": 1.221655249595642, |
| "lr": 0.0011507312195290873, |
| "router/selected_tokens_s0": 1045.5625, |
| "router/selected_tokens_s1": 147.75, |
| "step": 2320, |
| "tokens_trained": 7.601491056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6609929078014184, |
| "grad_norm": 0.5006564855575562, |
| "loss": 1.1865, |
| "loss_ce": 1.137509822845459, |
| "loss_region": 0.053064778447151184, |
| "loss_total": 1.1905746459960938, |
| "lr": 0.0011503243275452363, |
| "router/selected_tokens_s0": 1098.0, |
| "router/selected_tokens_s1": 48.3125, |
| "step": 2330, |
| "tokens_trained": 7.634256496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6638297872340425, |
| "grad_norm": 0.992090106010437, |
| "loss": 1.1858, |
| "loss_ce": 1.1077237129211426, |
| "loss_region": 0.048039667308330536, |
| "loss_total": 1.1557633876800537, |
| "lr": 0.0011499174355613852, |
| "router/selected_tokens_s0": 1037.875, |
| "router/selected_tokens_s1": 13.0625, |
| "step": 2340, |
| "tokens_trained": 7.667021936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.38324639201164246, |
| "loss": 1.1885, |
| "loss_ce": 1.105891466140747, |
| "loss_region": 0.05229361355304718, |
| "loss_total": 1.1581851243972778, |
| "lr": 0.0011495105435775342, |
| "router/selected_tokens_s0": 1107.125, |
| "router/selected_tokens_s1": 42.0625, |
| "step": 2350, |
| "tokens_trained": 7.699787376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6695035460992907, |
| "grad_norm": 0.492826372385025, |
| "loss": 1.1861, |
| "loss_ce": 1.1159106492996216, |
| "loss_region": 0.06743160635232925, |
| "loss_total": 1.1833422183990479, |
| "lr": 0.0011491036515936831, |
| "router/selected_tokens_s0": 1241.4375, |
| "router/selected_tokens_s1": 166.9375, |
| "step": 2360, |
| "tokens_trained": 7.732552016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6723404255319149, |
| "grad_norm": 0.8682832717895508, |
| "loss": 1.1788, |
| "loss_ce": 1.1449956893920898, |
| "loss_region": 0.0556202307343483, |
| "loss_total": 1.2006158828735352, |
| "lr": 0.0011486967596098321, |
| "router/selected_tokens_s0": 1085.375, |
| "router/selected_tokens_s1": 66.125, |
| "step": 2370, |
| "tokens_trained": 7.765317456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.675177304964539, |
| "grad_norm": 0.831288754940033, |
| "loss": 1.1866, |
| "loss_ce": 1.192410945892334, |
| "loss_region": 0.06446831673383713, |
| "loss_total": 1.256879210472107, |
| "lr": 0.001148289867625981, |
| "router/selected_tokens_s0": 1148.75, |
| "router/selected_tokens_s1": 134.0625, |
| "step": 2380, |
| "tokens_trained": 7.798082896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6780141843971631, |
| "grad_norm": 0.7338893413543701, |
| "loss": 1.1885, |
| "loss_ce": 1.0737974643707275, |
| "loss_region": 0.06012767180800438, |
| "loss_total": 1.1339250802993774, |
| "lr": 0.00114788297564213, |
| "router/selected_tokens_s0": 1175.625, |
| "router/selected_tokens_s1": 104.1875, |
| "step": 2390, |
| "tokens_trained": 7.830848336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6808510638297872, |
| "grad_norm": 1.1017957925796509, |
| "loss": 1.1932, |
| "loss_ce": 1.0649081468582153, |
| "loss_region": 0.05181884765625, |
| "loss_total": 1.1167269945144653, |
| "lr": 0.001147476083658279, |
| "router/selected_tokens_s0": 1137.1875, |
| "router/selected_tokens_s1": 38.1875, |
| "step": 2400, |
| "tokens_trained": 7.863613776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6836879432624113, |
| "grad_norm": 0.42400914430618286, |
| "loss": 1.1884, |
| "loss_ce": 1.120243787765503, |
| "loss_region": 0.05012480542063713, |
| "loss_total": 1.1703685522079468, |
| "lr": 0.001147069191674428, |
| "router/selected_tokens_s0": 1099.8125, |
| "router/selected_tokens_s1": 26.5625, |
| "step": 2410, |
| "tokens_trained": 7.896379216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6865248226950355, |
| "grad_norm": 0.6636138558387756, |
| "loss": 1.1873, |
| "loss_ce": 1.103990912437439, |
| "loss_region": 0.05863885208964348, |
| "loss_total": 1.1626297235488892, |
| "lr": 0.001146662299690577, |
| "router/selected_tokens_s0": 1077.1875, |
| "router/selected_tokens_s1": 86.1875, |
| "step": 2420, |
| "tokens_trained": 7.929144656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6893617021276596, |
| "grad_norm": 0.20607493817806244, |
| "loss": 1.1821, |
| "loss_ce": 1.1684622764587402, |
| "loss_region": 0.058312416076660156, |
| "loss_total": 1.2267746925354004, |
| "lr": 0.001146255407706726, |
| "router/selected_tokens_s0": 1127.375, |
| "router/selected_tokens_s1": 87.875, |
| "step": 2430, |
| "tokens_trained": 7.961910096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6921985815602837, |
| "grad_norm": 1.6605172157287598, |
| "loss": 1.191, |
| "loss_ce": 1.1512646675109863, |
| "loss_region": 0.09868698567152023, |
| "loss_total": 1.2499516010284424, |
| "lr": 0.0011458485157228749, |
| "router/selected_tokens_s0": 1190.125, |
| "router/selected_tokens_s1": 405.4375, |
| "step": 2440, |
| "tokens_trained": 7.994675536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6950354609929078, |
| "grad_norm": 0.6036321520805359, |
| "loss": 1.1824, |
| "loss_ce": 1.0932222604751587, |
| "loss_region": 0.06464701145887375, |
| "loss_total": 1.1578692197799683, |
| "lr": 0.0011454416237390238, |
| "router/selected_tokens_s0": 1218.6875, |
| "router/selected_tokens_s1": 147.1875, |
| "step": 2450, |
| "tokens_trained": 8.027440976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6978723404255319, |
| "grad_norm": 0.5354553461074829, |
| "loss": 1.1819, |
| "loss_ce": 1.1074717044830322, |
| "loss_region": 0.05404704064130783, |
| "loss_total": 1.1615186929702759, |
| "lr": 0.001145034731755173, |
| "router/selected_tokens_s0": 1221.5625, |
| "router/selected_tokens_s1": 58.6875, |
| "step": 2460, |
| "tokens_trained": 8.060206416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.700709219858156, |
| "grad_norm": 0.3824005722999573, |
| "loss": 1.1821, |
| "loss_ce": 1.1669650077819824, |
| "loss_region": 0.05842549353837967, |
| "loss_total": 1.2253905534744263, |
| "lr": 0.001144627839771322, |
| "router/selected_tokens_s0": 1060.1875, |
| "router/selected_tokens_s1": 83.875, |
| "step": 2470, |
| "tokens_trained": 8.092971856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7035460992907802, |
| "grad_norm": 1.2816966772079468, |
| "loss": 1.1884, |
| "loss_ce": 1.1886110305786133, |
| "loss_region": 0.05507363751530647, |
| "loss_total": 1.2436846494674683, |
| "lr": 0.001144220947787471, |
| "router/selected_tokens_s0": 1096.6875, |
| "router/selected_tokens_s1": 62.4375, |
| "step": 2480, |
| "tokens_trained": 8.125737296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7063829787234043, |
| "grad_norm": 0.8357634544372559, |
| "loss": 1.1849, |
| "loss_ce": 1.1635318994522095, |
| "loss_region": 0.05851307883858681, |
| "loss_total": 1.2220449447631836, |
| "lr": 0.00114381405580362, |
| "router/selected_tokens_s0": 1201.75, |
| "router/selected_tokens_s1": 92.6875, |
| "step": 2490, |
| "tokens_trained": 8.158502728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7092198581560284, |
| "grad_norm": 0.6387074589729309, |
| "loss": 1.1841, |
| "loss_ce": 1.1415513753890991, |
| "loss_region": 0.06358432024717331, |
| "loss_total": 1.205135703086853, |
| "lr": 0.0011434071638197687, |
| "router/selected_tokens_s0": 1124.5, |
| "router/selected_tokens_s1": 125.5, |
| "step": 2500, |
| "tokens_trained": 8.191268168 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "eval_ppl": 3.085261030890577, |
| "eval_runtime": 1.9932, |
| "step": 2500, |
| "tokens_trained": 8.191268168 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "eval_F": 0.00680101536465145, |
| "eval_F_cds": 0.005210672895517025, |
| "eval_F_dig": 0.02410344558168607, |
| "eval_F_exon": 0.00631956619235944, |
| "eval_F_intron": 0.006854377447718121, |
| "eval_F_nig": 0.007284422172562524, |
| "eval_F_promoter": 0.005771903227844103, |
| "eval_F_utr": 0.005851568412189169, |
| "eval_G": 0.042142292222616534, |
| "eval_G_cds": 0.04027827688437696, |
| "eval_G_dig": 0.05870552936264552, |
| "eval_G_exon": 0.04071618352563841, |
| "eval_G_intron": 0.04184660939886382, |
| "eval_G_nig": 0.0422879405323239, |
| "eval_G_promoter": 0.0431433928215408, |
| "eval_G_utr": 0.03945561894458597, |
| "eval_avg_bp_per_token": 147.03686822963817, |
| "eval_bp_per_token/cds": 191.91379310344828, |
| "eval_bp_per_token/dig": 41.487844408427875, |
| "eval_bp_per_token/exon": 158.23870967741937, |
| "eval_bp_per_token/intron": 145.89217002237137, |
| "eval_bp_per_token/nig": 137.27924827951296, |
| "eval_bp_per_token/promoter": 173.2530779753762, |
| "eval_bp_per_token/utr": 170.8943533697632, |
| "eval_ppl_cds": 3.733989189183948, |
| "eval_ppl_dig": 1.1368362702896024, |
| "eval_ppl_exon": 3.317643101731427, |
| "eval_ppl_intron": 3.108811822776569, |
| "eval_ppl_nig": 2.961271125541008, |
| "eval_ppl_promoter": 3.3320430827111296, |
| "eval_ppl_utr": 3.4181474720617597, |
| "step": 2500, |
| "tokens_trained": 8.191268168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7120567375886525, |
| "grad_norm": 0.5213884711265564, |
| "loss": 1.183, |
| "loss_ce": 1.0932352542877197, |
| "loss_region": 0.05128668248653412, |
| "loss_total": 1.144521951675415, |
| "lr": 0.0011430002718359176, |
| "router/selected_tokens_s0": 1219.3125, |
| "router/selected_tokens_s1": 36.625, |
| "step": 2510, |
| "tokens_trained": 8.224033608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7148936170212766, |
| "grad_norm": 1.4375683069229126, |
| "loss": 1.1813, |
| "loss_ce": 1.1448945999145508, |
| "loss_region": 0.049260806292295456, |
| "loss_total": 1.1941554546356201, |
| "lr": 0.0011425933798520666, |
| "router/selected_tokens_s0": 1164.6875, |
| "router/selected_tokens_s1": 21.0625, |
| "step": 2520, |
| "tokens_trained": 8.25679904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7177304964539007, |
| "grad_norm": 1.0103474855422974, |
| "loss": 1.1899, |
| "loss_ce": 1.1087768077850342, |
| "loss_region": 0.07257764041423798, |
| "loss_total": 1.1813544034957886, |
| "lr": 0.0011421864878682158, |
| "router/selected_tokens_s0": 1230.375, |
| "router/selected_tokens_s1": 206.625, |
| "step": 2530, |
| "tokens_trained": 8.28956448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7205673758865249, |
| "grad_norm": 0.5543859004974365, |
| "loss": 1.1836, |
| "loss_ce": 1.1301989555358887, |
| "loss_region": 0.0570821538567543, |
| "loss_total": 1.1872811317443848, |
| "lr": 0.0011417795958843647, |
| "router/selected_tokens_s0": 1275.3125, |
| "router/selected_tokens_s1": 85.4375, |
| "step": 2540, |
| "tokens_trained": 8.322329544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.723404255319149, |
| "grad_norm": 0.3695959150791168, |
| "loss": 1.1844, |
| "loss_ce": 1.1506540775299072, |
| "loss_region": 0.06053993105888367, |
| "loss_total": 1.2111940383911133, |
| "lr": 0.0011413727039005137, |
| "router/selected_tokens_s0": 1140.1875, |
| "router/selected_tokens_s1": 104.0625, |
| "step": 2550, |
| "tokens_trained": 8.355094984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7262411347517731, |
| "grad_norm": 0.6553955078125, |
| "loss": 1.1802, |
| "loss_ce": 1.0622395277023315, |
| "loss_region": 0.06116200238466263, |
| "loss_total": 1.1234015226364136, |
| "lr": 0.0011409658119166627, |
| "router/selected_tokens_s0": 1214.0625, |
| "router/selected_tokens_s1": 113.875, |
| "step": 2560, |
| "tokens_trained": 8.3878584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7290780141843972, |
| "grad_norm": 0.6573439836502075, |
| "loss": 1.1833, |
| "loss_ce": 1.0715194940567017, |
| "loss_region": 0.058225441724061966, |
| "loss_total": 1.1297448873519897, |
| "lr": 0.0011405589199328116, |
| "router/selected_tokens_s0": 1251.125, |
| "router/selected_tokens_s1": 93.75, |
| "step": 2570, |
| "tokens_trained": 8.42062384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7319148936170212, |
| "grad_norm": 1.2631888389587402, |
| "loss": 1.1845, |
| "loss_ce": 1.1702756881713867, |
| "loss_region": 0.06478434056043625, |
| "loss_total": 1.2350599765777588, |
| "lr": 0.0011401520279489606, |
| "router/selected_tokens_s0": 1134.0625, |
| "router/selected_tokens_s1": 133.8125, |
| "step": 2580, |
| "tokens_trained": 8.453389256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7347517730496453, |
| "grad_norm": 0.5682166218757629, |
| "loss": 1.1859, |
| "loss_ce": 1.0891146659851074, |
| "loss_region": 0.04887212812900543, |
| "loss_total": 1.1379867792129517, |
| "lr": 0.0011397451359651096, |
| "router/selected_tokens_s0": 1188.375, |
| "router/selected_tokens_s1": 17.375, |
| "step": 2590, |
| "tokens_trained": 8.486154696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7375886524822695, |
| "grad_norm": 0.6613340973854065, |
| "loss": 1.1837, |
| "loss_ce": 1.175453543663025, |
| "loss_region": 0.060148756951093674, |
| "loss_total": 1.2356022596359253, |
| "lr": 0.0011393382439812585, |
| "router/selected_tokens_s0": 1084.1875, |
| "router/selected_tokens_s1": 97.0625, |
| "step": 2600, |
| "tokens_trained": 8.518920136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7404255319148936, |
| "grad_norm": 0.6439635157585144, |
| "loss": 1.1797, |
| "loss_ce": 1.1248722076416016, |
| "loss_region": 0.057590316981077194, |
| "loss_total": 1.1824625730514526, |
| "lr": 0.0011389313519974075, |
| "router/selected_tokens_s0": 1128.875, |
| "router/selected_tokens_s1": 82.875, |
| "step": 2610, |
| "tokens_trained": 8.551685576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7432624113475177, |
| "grad_norm": 0.5331895351409912, |
| "loss": 1.1837, |
| "loss_ce": 1.1927634477615356, |
| "loss_region": 0.06320803612470627, |
| "loss_total": 1.2559714317321777, |
| "lr": 0.0011385244600135565, |
| "router/selected_tokens_s0": 1140.3125, |
| "router/selected_tokens_s1": 122.25, |
| "step": 2620, |
| "tokens_trained": 8.584451016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7460992907801418, |
| "grad_norm": 0.9306845664978027, |
| "loss": 1.1796, |
| "loss_ce": 1.0927690267562866, |
| "loss_region": 0.054515350610017776, |
| "loss_total": 1.1472843885421753, |
| "lr": 0.0011381175680297054, |
| "router/selected_tokens_s0": 1099.1875, |
| "router/selected_tokens_s1": 58.125, |
| "step": 2630, |
| "tokens_trained": 8.617213968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7489361702127659, |
| "grad_norm": 0.803848922252655, |
| "loss": 1.1831, |
| "loss_ce": 1.100754976272583, |
| "loss_region": 0.06052076816558838, |
| "loss_total": 1.1612757444381714, |
| "lr": 0.0011377106760458544, |
| "router/selected_tokens_s0": 1250.6875, |
| "router/selected_tokens_s1": 111.0625, |
| "step": 2640, |
| "tokens_trained": 8.649979408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.75177304964539, |
| "grad_norm": 0.5596192479133606, |
| "loss": 1.1831, |
| "loss_ce": 1.1097190380096436, |
| "loss_region": 0.06077639386057854, |
| "loss_total": 1.1704953908920288, |
| "lr": 0.0011373037840620034, |
| "router/selected_tokens_s0": 1206.5625, |
| "router/selected_tokens_s1": 113.125, |
| "step": 2650, |
| "tokens_trained": 8.682744848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7546099290780142, |
| "grad_norm": 0.3111552894115448, |
| "loss": 1.1797, |
| "loss_ce": 1.1402360200881958, |
| "loss_region": 0.059760384261608124, |
| "loss_total": 1.1999963521957397, |
| "lr": 0.0011368968920781523, |
| "router/selected_tokens_s0": 1139.125, |
| "router/selected_tokens_s1": 98.9375, |
| "step": 2660, |
| "tokens_trained": 8.715510288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7574468085106383, |
| "grad_norm": 0.43657973408699036, |
| "loss": 1.1812, |
| "loss_ce": 1.1205332279205322, |
| "loss_region": 0.06205965578556061, |
| "loss_total": 1.1825928688049316, |
| "lr": 0.0011364900000943013, |
| "router/selected_tokens_s0": 1233.3125, |
| "router/selected_tokens_s1": 122.6875, |
| "step": 2670, |
| "tokens_trained": 8.748275728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7602836879432624, |
| "grad_norm": 0.826763927936554, |
| "loss": 1.1865, |
| "loss_ce": 1.163713812828064, |
| "loss_region": 0.0501738004386425, |
| "loss_total": 1.2138875722885132, |
| "lr": 0.0011360831081104503, |
| "router/selected_tokens_s0": 1197.4375, |
| "router/selected_tokens_s1": 27.1875, |
| "step": 2680, |
| "tokens_trained": 8.781041168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7631205673758865, |
| "grad_norm": 0.6090939044952393, |
| "loss": 1.1884, |
| "loss_ce": 1.1616290807724, |
| "loss_region": 0.05640416964888573, |
| "loss_total": 1.2180331945419312, |
| "lr": 0.0011356762161265992, |
| "router/selected_tokens_s0": 1076.5, |
| "router/selected_tokens_s1": 70.0625, |
| "step": 2690, |
| "tokens_trained": 8.813806448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7659574468085106, |
| "grad_norm": 0.6091976761817932, |
| "loss": 1.1743, |
| "loss_ce": 1.102664589881897, |
| "loss_region": 0.06204209849238396, |
| "loss_total": 1.1647067070007324, |
| "lr": 0.0011352693241427482, |
| "router/selected_tokens_s0": 1058.5625, |
| "router/selected_tokens_s1": 106.5625, |
| "step": 2700, |
| "tokens_trained": 8.846571888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7687943262411348, |
| "grad_norm": 0.4059768617153168, |
| "loss": 1.1818, |
| "loss_ce": 1.071120262145996, |
| "loss_region": 0.06557711958885193, |
| "loss_total": 1.1366974115371704, |
| "lr": 0.0011348624321588974, |
| "router/selected_tokens_s0": 1169.625, |
| "router/selected_tokens_s1": 145.1875, |
| "step": 2710, |
| "tokens_trained": 8.879337328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7716312056737589, |
| "grad_norm": 0.8343297839164734, |
| "loss": 1.177, |
| "loss_ce": 1.1214219331741333, |
| "loss_region": 0.05871148779988289, |
| "loss_total": 1.1801334619522095, |
| "lr": 0.0011344555401750463, |
| "router/selected_tokens_s0": 1201.0625, |
| "router/selected_tokens_s1": 94.125, |
| "step": 2720, |
| "tokens_trained": 8.912102768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.774468085106383, |
| "grad_norm": 0.7600639462471008, |
| "loss": 1.1865, |
| "loss_ce": 1.1025841236114502, |
| "loss_region": 0.05945834890007973, |
| "loss_total": 1.162042498588562, |
| "lr": 0.0011340486481911953, |
| "router/selected_tokens_s0": 1277.125, |
| "router/selected_tokens_s1": 104.8125, |
| "step": 2730, |
| "tokens_trained": 8.944868208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7773049645390071, |
| "grad_norm": 0.7523510456085205, |
| "loss": 1.1837, |
| "loss_ce": 1.153456211090088, |
| "loss_region": 0.057424623519182205, |
| "loss_total": 1.2108808755874634, |
| "lr": 0.001133641756207344, |
| "router/selected_tokens_s0": 1197.8125, |
| "router/selected_tokens_s1": 84.5, |
| "step": 2740, |
| "tokens_trained": 8.977633592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7801418439716312, |
| "grad_norm": 0.4456590414047241, |
| "loss": 1.1812, |
| "loss_ce": 1.1360499858856201, |
| "loss_region": 0.05489301681518555, |
| "loss_total": 1.1909430027008057, |
| "lr": 0.001133234864223493, |
| "router/selected_tokens_s0": 1138.875, |
| "router/selected_tokens_s1": 62.375, |
| "step": 2750, |
| "tokens_trained": 9.010399032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7829787234042553, |
| "grad_norm": 0.5763502717018127, |
| "loss": 1.1776, |
| "loss_ce": 1.0765600204467773, |
| "loss_region": 0.060738228261470795, |
| "loss_total": 1.1372982263565063, |
| "lr": 0.001132827972239642, |
| "router/selected_tokens_s0": 1198.5625, |
| "router/selected_tokens_s1": 111.3125, |
| "step": 2760, |
| "tokens_trained": 9.043164472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7858156028368795, |
| "grad_norm": 0.8224549293518066, |
| "loss": 1.1853, |
| "loss_ce": 1.1184666156768799, |
| "loss_region": 0.06167374178767204, |
| "loss_total": 1.1801403760910034, |
| "lr": 0.001132421080255791, |
| "router/selected_tokens_s0": 1232.4375, |
| "router/selected_tokens_s1": 119.0625, |
| "step": 2770, |
| "tokens_trained": 9.075929912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7886524822695036, |
| "grad_norm": 0.3950735330581665, |
| "loss": 1.183, |
| "loss_ce": 1.0782346725463867, |
| "loss_region": 0.07110589742660522, |
| "loss_total": 1.1493406295776367, |
| "lr": 0.0011320141882719401, |
| "router/selected_tokens_s0": 1241.25, |
| "router/selected_tokens_s1": 193.5625, |
| "step": 2780, |
| "tokens_trained": 9.108695352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7914893617021277, |
| "grad_norm": 0.29622024297714233, |
| "loss": 1.1782, |
| "loss_ce": 1.0680986642837524, |
| "loss_region": 0.06274113059043884, |
| "loss_total": 1.1308398246765137, |
| "lr": 0.001131607296288089, |
| "router/selected_tokens_s0": 1081.9375, |
| "router/selected_tokens_s1": 116.3125, |
| "step": 2790, |
| "tokens_trained": 9.141456136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7943262411347518, |
| "grad_norm": 0.7131189107894897, |
| "loss": 1.1795, |
| "loss_ce": 1.1580817699432373, |
| "loss_region": 0.07048571854829788, |
| "loss_total": 1.2285674810409546, |
| "lr": 0.001131200404304238, |
| "router/selected_tokens_s0": 1201.375, |
| "router/selected_tokens_s1": 186.6875, |
| "step": 2800, |
| "tokens_trained": 9.174221576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7971631205673759, |
| "grad_norm": 0.4363972246646881, |
| "loss": 1.1811, |
| "loss_ce": 1.112588882446289, |
| "loss_region": 0.06408771872520447, |
| "loss_total": 1.176676630973816, |
| "lr": 0.001130793512320387, |
| "router/selected_tokens_s0": 1164.625, |
| "router/selected_tokens_s1": 133.25, |
| "step": 2810, |
| "tokens_trained": 9.206987016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8, |
| "grad_norm": 1.0044184923171997, |
| "loss": 1.1797, |
| "loss_ce": 1.0698281526565552, |
| "loss_region": 0.06786457449197769, |
| "loss_total": 1.1376926898956299, |
| "lr": 0.001130386620336536, |
| "router/selected_tokens_s0": 1299.0, |
| "router/selected_tokens_s1": 176.875, |
| "step": 2820, |
| "tokens_trained": 9.239752456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8028368794326242, |
| "grad_norm": 0.6597614288330078, |
| "loss": 1.1803, |
| "loss_ce": 1.121903896331787, |
| "loss_region": 0.05995731055736542, |
| "loss_total": 1.181861162185669, |
| "lr": 0.001129979728352685, |
| "router/selected_tokens_s0": 1300.3125, |
| "router/selected_tokens_s1": 108.9375, |
| "step": 2830, |
| "tokens_trained": 9.272517896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8056737588652483, |
| "grad_norm": 0.5861411690711975, |
| "loss": 1.1824, |
| "loss_ce": 1.1209691762924194, |
| "loss_region": 0.05859311297535896, |
| "loss_total": 1.1795623302459717, |
| "lr": 0.001129572836368834, |
| "router/selected_tokens_s0": 1184.3125, |
| "router/selected_tokens_s1": 92.75, |
| "step": 2840, |
| "tokens_trained": 9.305283336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8085106382978723, |
| "grad_norm": 0.8662428855895996, |
| "loss": 1.1801, |
| "loss_ce": 1.1683619022369385, |
| "loss_region": 0.05122409388422966, |
| "loss_total": 1.2195860147476196, |
| "lr": 0.0011291659443849829, |
| "router/selected_tokens_s0": 1079.9375, |
| "router/selected_tokens_s1": 34.8125, |
| "step": 2850, |
| "tokens_trained": 9.338048776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8113475177304964, |
| "grad_norm": 0.602667510509491, |
| "loss": 1.1796, |
| "loss_ce": 1.1227927207946777, |
| "loss_region": 0.060959592461586, |
| "loss_total": 1.1837522983551025, |
| "lr": 0.0011287590524011318, |
| "router/selected_tokens_s0": 1168.9375, |
| "router/selected_tokens_s1": 109.5625, |
| "step": 2860, |
| "tokens_trained": 9.370814216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8141843971631205, |
| "grad_norm": 0.3715594708919525, |
| "loss": 1.1777, |
| "loss_ce": 1.1824853420257568, |
| "loss_region": 0.05484374985098839, |
| "loss_total": 1.237329125404358, |
| "lr": 0.0011283521604172808, |
| "router/selected_tokens_s0": 933.75, |
| "router/selected_tokens_s1": 54.3125, |
| "step": 2870, |
| "tokens_trained": 9.403579656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8170212765957446, |
| "grad_norm": 0.36632469296455383, |
| "loss": 1.1781, |
| "loss_ce": 1.083885908126831, |
| "loss_region": 0.06635577231645584, |
| "loss_total": 1.150241732597351, |
| "lr": 0.0011279452684334298, |
| "router/selected_tokens_s0": 1268.1875, |
| "router/selected_tokens_s1": 160.25, |
| "step": 2880, |
| "tokens_trained": 9.436345096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8198581560283688, |
| "grad_norm": 0.25131088495254517, |
| "loss": 1.1792, |
| "loss_ce": 1.1564133167266846, |
| "loss_region": 0.0669500008225441, |
| "loss_total": 1.2233632802963257, |
| "lr": 0.0011275383764495787, |
| "router/selected_tokens_s0": 1166.75, |
| "router/selected_tokens_s1": 155.375, |
| "step": 2890, |
| "tokens_trained": 9.469110536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8226950354609929, |
| "grad_norm": 0.860545814037323, |
| "loss": 1.1846, |
| "loss_ce": 1.1339890956878662, |
| "loss_region": 0.0660734623670578, |
| "loss_total": 1.2000625133514404, |
| "lr": 0.0011271314844657277, |
| "router/selected_tokens_s0": 1135.4375, |
| "router/selected_tokens_s1": 145.1875, |
| "step": 2900, |
| "tokens_trained": 9.501875176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.825531914893617, |
| "grad_norm": 0.1526106297969818, |
| "loss": 1.178, |
| "loss_ce": 1.1420540809631348, |
| "loss_region": 0.062213458120822906, |
| "loss_total": 1.2042675018310547, |
| "lr": 0.0011267245924818767, |
| "router/selected_tokens_s0": 1171.875, |
| "router/selected_tokens_s1": 119.25, |
| "step": 2910, |
| "tokens_trained": 9.534640608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8283687943262411, |
| "grad_norm": 0.25495970249176025, |
| "loss": 1.1782, |
| "loss_ce": 1.0953720808029175, |
| "loss_region": 0.06660070270299911, |
| "loss_total": 1.1619727611541748, |
| "lr": 0.0011263177004980256, |
| "router/selected_tokens_s0": 1110.8125, |
| "router/selected_tokens_s1": 146.125, |
| "step": 2920, |
| "tokens_trained": 9.567405248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8312056737588652, |
| "grad_norm": 0.32902929186820984, |
| "loss": 1.1811, |
| "loss_ce": 1.1160013675689697, |
| "loss_region": 0.06135803461074829, |
| "loss_total": 1.1773593425750732, |
| "lr": 0.0011259108085141746, |
| "router/selected_tokens_s0": 1077.6875, |
| "router/selected_tokens_s1": 106.3125, |
| "step": 2930, |
| "tokens_trained": 9.600170688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8340425531914893, |
| "grad_norm": 0.25194478034973145, |
| "loss": 1.1796, |
| "loss_ce": 1.133124589920044, |
| "loss_region": 0.04943273961544037, |
| "loss_total": 1.1825573444366455, |
| "lr": 0.0011255039165303236, |
| "router/selected_tokens_s0": 1135.625, |
| "router/selected_tokens_s1": 21.9375, |
| "step": 2940, |
| "tokens_trained": 9.632932368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8368794326241135, |
| "grad_norm": 0.5080149173736572, |
| "loss": 1.1806, |
| "loss_ce": 1.1138414144515991, |
| "loss_region": 0.060192253440618515, |
| "loss_total": 1.1740336418151855, |
| "lr": 0.0011250970245464725, |
| "router/selected_tokens_s0": 1200.0, |
| "router/selected_tokens_s1": 106.4375, |
| "step": 2950, |
| "tokens_trained": 9.665697808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8397163120567376, |
| "grad_norm": 0.8673221468925476, |
| "loss": 1.1768, |
| "loss_ce": 1.1572374105453491, |
| "loss_region": 0.055664122104644775, |
| "loss_total": 1.2129015922546387, |
| "lr": 0.0011246901325626217, |
| "router/selected_tokens_s0": 1227.5, |
| "router/selected_tokens_s1": 72.0, |
| "step": 2960, |
| "tokens_trained": 9.698463248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8425531914893617, |
| "grad_norm": 0.33562785387039185, |
| "loss": 1.1762, |
| "loss_ce": 1.088935136795044, |
| "loss_region": 0.053266432136297226, |
| "loss_total": 1.142201542854309, |
| "lr": 0.0011242832405787707, |
| "router/selected_tokens_s0": 1065.8125, |
| "router/selected_tokens_s1": 48.4375, |
| "step": 2970, |
| "tokens_trained": 9.731228528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8453900709219858, |
| "grad_norm": 0.5114731788635254, |
| "loss": 1.178, |
| "loss_ce": 1.1897213459014893, |
| "loss_region": 0.05429379269480705, |
| "loss_total": 1.244015097618103, |
| "lr": 0.0011238763485949196, |
| "router/selected_tokens_s0": 1213.8125, |
| "router/selected_tokens_s1": 60.6875, |
| "step": 2980, |
| "tokens_trained": 9.763992368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8482269503546099, |
| "grad_norm": 0.26606935262680054, |
| "loss": 1.1757, |
| "loss_ce": 1.068314790725708, |
| "loss_region": 0.05756586790084839, |
| "loss_total": 1.1258807182312012, |
| "lr": 0.0011234694566110684, |
| "router/selected_tokens_s0": 1233.625, |
| "router/selected_tokens_s1": 87.25, |
| "step": 2990, |
| "tokens_trained": 9.796757008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.770910918712616, |
| "loss": 1.1768, |
| "loss_ce": 1.161916732788086, |
| "loss_region": 0.060496315360069275, |
| "loss_total": 1.2224130630493164, |
| "lr": 0.0011230625646272174, |
| "router/selected_tokens_s0": 1139.5625, |
| "router/selected_tokens_s1": 104.625, |
| "step": 3000, |
| "tokens_trained": 9.829522448 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "eval_ppl": 3.0685293746125137, |
| "eval_runtime": 2.0114, |
| "step": 3000, |
| "tokens_trained": 9.829522448 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "eval_F": 0.00586938687465393, |
| "eval_F_cds": 0.004950139250741174, |
| "eval_F_dig": 0.010235174623017424, |
| "eval_F_exon": 0.005857490384746062, |
| "eval_F_intron": 0.0057576003851945445, |
| "eval_F_nig": 0.00577470736019713, |
| "eval_F_promoter": 0.006416735097803887, |
| "eval_F_utr": 0.005499834791784355, |
| "eval_G": 0.04278435178691461, |
| "eval_G_cds": 0.037835809058036116, |
| "eval_G_dig": 0.05814518285119931, |
| "eval_G_exon": 0.042680271146763434, |
| "eval_G_intron": 0.043167546982243406, |
| "eval_G_nig": 0.04354123851445412, |
| "eval_G_promoter": 0.04100065054817289, |
| "eval_G_utr": 0.039970458206318414, |
| "eval_avg_bp_per_token": 170.3755471152107, |
| "eval_bp_per_token/cds": 202.01451905626135, |
| "eval_bp_per_token/dig": 97.70229007633588, |
| "eval_bp_per_token/exon": 170.7215777262181, |
| "eval_bp_per_token/intron": 173.6834676076969, |
| "eval_bp_per_token/nig": 173.16894824707848, |
| "eval_bp_per_token/promoter": 155.8424938474159, |
| "eval_bp_per_token/utr": 181.82364341085272, |
| "eval_ppl_cds": 3.743007889924154, |
| "eval_ppl_dig": 1.1305635385016664, |
| "eval_ppl_exon": 3.308204537970067, |
| "eval_ppl_intron": 3.091991802801204, |
| "eval_ppl_nig": 2.9335253140217743, |
| "eval_ppl_promoter": 3.326653273455676, |
| "eval_ppl_utr": 3.4125763618627047, |
| "step": 3000, |
| "tokens_trained": 9.829522448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8539007092198582, |
| "grad_norm": 0.29383108019828796, |
| "loss": 1.17, |
| "loss_ce": 1.1111640930175781, |
| "loss_region": 0.057751886546611786, |
| "loss_total": 1.1689159870147705, |
| "lr": 0.0011226556726433663, |
| "router/selected_tokens_s0": 1105.25, |
| "router/selected_tokens_s1": 82.75, |
| "step": 3010, |
| "tokens_trained": 9.862287888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8567375886524823, |
| "grad_norm": 0.34174391627311707, |
| "loss": 1.1735, |
| "loss_ce": 1.113343358039856, |
| "loss_region": 0.058337848633527756, |
| "loss_total": 1.1716811656951904, |
| "lr": 0.0011222487806595153, |
| "router/selected_tokens_s0": 1087.4375, |
| "router/selected_tokens_s1": 85.375, |
| "step": 3020, |
| "tokens_trained": 9.895048312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8595744680851064, |
| "grad_norm": 1.0063222646713257, |
| "loss": 1.1733, |
| "loss_ce": 1.1626818180084229, |
| "loss_region": 0.06576195359230042, |
| "loss_total": 1.2284437417984009, |
| "lr": 0.0011218418886756645, |
| "router/selected_tokens_s0": 1179.3125, |
| "router/selected_tokens_s1": 148.0, |
| "step": 3030, |
| "tokens_trained": 9.927812952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8624113475177305, |
| "grad_norm": 0.5856843590736389, |
| "loss": 1.1789, |
| "loss_ce": 1.1485878229141235, |
| "loss_region": 0.054565493017435074, |
| "loss_total": 1.203153371810913, |
| "lr": 0.0011214349966918134, |
| "router/selected_tokens_s0": 1048.875, |
| "router/selected_tokens_s1": 56.625, |
| "step": 3040, |
| "tokens_trained": 9.960578392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8652482269503546, |
| "grad_norm": 0.4607069194316864, |
| "loss": 1.1797, |
| "loss_ce": 1.0936951637268066, |
| "loss_region": 0.05745574086904526, |
| "loss_total": 1.1511509418487549, |
| "lr": 0.0011210281047079624, |
| "router/selected_tokens_s0": 1223.0625, |
| "router/selected_tokens_s1": 84.8125, |
| "step": 3050, |
| "tokens_trained": 9.993343832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8680851063829788, |
| "grad_norm": 0.3751443028450012, |
| "loss": 1.1749, |
| "loss_ce": 1.165441870689392, |
| "loss_region": 0.0589909553527832, |
| "loss_total": 1.2244328260421753, |
| "lr": 0.0011206212127241114, |
| "router/selected_tokens_s0": 1145.6875, |
| "router/selected_tokens_s1": 93.25, |
| "step": 3060, |
| "tokens_trained": 10.026108952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8709219858156029, |
| "grad_norm": 0.7878003716468811, |
| "loss": 1.1757, |
| "loss_ce": 1.1027661561965942, |
| "loss_region": 0.059977948665618896, |
| "loss_total": 1.1627440452575684, |
| "lr": 0.0011202143207402603, |
| "router/selected_tokens_s0": 1153.9375, |
| "router/selected_tokens_s1": 99.5, |
| "step": 3070, |
| "tokens_trained": 10.058874392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.873758865248227, |
| "grad_norm": 0.39917951822280884, |
| "loss": 1.1833, |
| "loss_ce": 1.0899752378463745, |
| "loss_region": 0.06287148594856262, |
| "loss_total": 1.1528466939926147, |
| "lr": 0.0011198074287564093, |
| "router/selected_tokens_s0": 1250.375, |
| "router/selected_tokens_s1": 131.25, |
| "step": 3080, |
| "tokens_trained": 10.091635328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8765957446808511, |
| "grad_norm": 0.45243528485298157, |
| "loss": 1.1808, |
| "loss_ce": 1.1440991163253784, |
| "loss_region": 0.05864295735955238, |
| "loss_total": 1.202742099761963, |
| "lr": 0.0011194005367725583, |
| "router/selected_tokens_s0": 1195.125, |
| "router/selected_tokens_s1": 93.625, |
| "step": 3090, |
| "tokens_trained": 10.124400768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8794326241134752, |
| "grad_norm": 0.42869076132774353, |
| "loss": 1.1785, |
| "loss_ce": 1.150432825088501, |
| "loss_region": 0.06204836070537567, |
| "loss_total": 1.212481141090393, |
| "lr": 0.0011189936447887072, |
| "router/selected_tokens_s0": 1170.875, |
| "router/selected_tokens_s1": 117.6875, |
| "step": 3100, |
| "tokens_trained": 10.157166208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8822695035460993, |
| "grad_norm": 0.19406476616859436, |
| "loss": 1.1745, |
| "loss_ce": 1.101540207862854, |
| "loss_region": 0.059803955256938934, |
| "loss_total": 1.1613441705703735, |
| "lr": 0.0011185867528048562, |
| "router/selected_tokens_s0": 1204.625, |
| "router/selected_tokens_s1": 102.625, |
| "step": 3110, |
| "tokens_trained": 10.189931648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8851063829787233, |
| "grad_norm": 0.6000205278396606, |
| "loss": 1.1775, |
| "loss_ce": 1.1167505979537964, |
| "loss_region": 0.054472386837005615, |
| "loss_total": 1.1712229251861572, |
| "lr": 0.0011181798608210052, |
| "router/selected_tokens_s0": 1106.0625, |
| "router/selected_tokens_s1": 58.25, |
| "step": 3120, |
| "tokens_trained": 10.22269608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8879432624113475, |
| "grad_norm": 0.16923663020133972, |
| "loss": 1.1796, |
| "loss_ce": 1.0365309715270996, |
| "loss_region": 0.06018022075295448, |
| "loss_total": 1.0967111587524414, |
| "lr": 0.0011177729688371541, |
| "router/selected_tokens_s0": 1182.1875, |
| "router/selected_tokens_s1": 104.125, |
| "step": 3130, |
| "tokens_trained": 10.25546152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8907801418439716, |
| "grad_norm": 0.4746827781200409, |
| "loss": 1.1731, |
| "loss_ce": 1.1875536441802979, |
| "loss_region": 0.060806721448898315, |
| "loss_total": 1.2483603954315186, |
| "lr": 0.001117366076853303, |
| "router/selected_tokens_s0": 1150.125, |
| "router/selected_tokens_s1": 104.5, |
| "step": 3140, |
| "tokens_trained": 10.28822616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8936170212765957, |
| "grad_norm": 0.4047585129737854, |
| "loss": 1.1774, |
| "loss_ce": 1.0648419857025146, |
| "loss_region": 0.06984846293926239, |
| "loss_total": 1.1346904039382935, |
| "lr": 0.001116959184869452, |
| "router/selected_tokens_s0": 1133.0625, |
| "router/selected_tokens_s1": 173.875, |
| "step": 3150, |
| "tokens_trained": 10.3209916 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8964539007092198, |
| "grad_norm": 0.38032910227775574, |
| "loss": 1.1728, |
| "loss_ce": 1.1230207681655884, |
| "loss_region": 0.05686573311686516, |
| "loss_total": 1.1798864603042603, |
| "lr": 0.001116552292885601, |
| "router/selected_tokens_s0": 1178.75, |
| "router/selected_tokens_s1": 79.1875, |
| "step": 3160, |
| "tokens_trained": 10.35375624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8992907801418439, |
| "grad_norm": 0.4329375922679901, |
| "loss": 1.1768, |
| "loss_ce": 1.0851696729660034, |
| "loss_region": 0.06354688107967377, |
| "loss_total": 1.1487165689468384, |
| "lr": 0.00111614540090175, |
| "router/selected_tokens_s0": 1165.875, |
| "router/selected_tokens_s1": 129.125, |
| "step": 3170, |
| "tokens_trained": 10.38652168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.902127659574468, |
| "grad_norm": 0.476487398147583, |
| "loss": 1.1823, |
| "loss_ce": 1.1869922876358032, |
| "loss_region": 0.06617600470781326, |
| "loss_total": 1.2531683444976807, |
| "lr": 0.001115738508917899, |
| "router/selected_tokens_s0": 1150.25, |
| "router/selected_tokens_s1": 150.25, |
| "step": 3180, |
| "tokens_trained": 10.41928712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9049645390070922, |
| "grad_norm": 0.3666301667690277, |
| "loss": 1.1782, |
| "loss_ce": 1.0683258771896362, |
| "loss_region": 0.06172625347971916, |
| "loss_total": 1.130052089691162, |
| "lr": 0.001115331616934048, |
| "router/selected_tokens_s0": 1138.4375, |
| "router/selected_tokens_s1": 112.625, |
| "step": 3190, |
| "tokens_trained": 10.45205256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9078014184397163, |
| "grad_norm": 0.5340037941932678, |
| "loss": 1.1745, |
| "loss_ce": 1.140019416809082, |
| "loss_region": 0.05862601846456528, |
| "loss_total": 1.1986454725265503, |
| "lr": 0.0011149247249501969, |
| "router/selected_tokens_s0": 1137.0625, |
| "router/selected_tokens_s1": 89.75, |
| "step": 3200, |
| "tokens_trained": 10.4848172 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9106382978723404, |
| "grad_norm": 0.5549784302711487, |
| "loss": 1.1714, |
| "loss_ce": 1.1285208463668823, |
| "loss_region": 0.055352479219436646, |
| "loss_total": 1.1838732957839966, |
| "lr": 0.001114517832966346, |
| "router/selected_tokens_s0": 1075.1875, |
| "router/selected_tokens_s1": 63.3125, |
| "step": 3210, |
| "tokens_trained": 10.51758264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9134751773049645, |
| "grad_norm": 0.33541953563690186, |
| "loss": 1.1744, |
| "loss_ce": 1.1551395654678345, |
| "loss_region": 0.059686705470085144, |
| "loss_total": 1.214826226234436, |
| "lr": 0.001114110940982495, |
| "router/selected_tokens_s0": 1107.1875, |
| "router/selected_tokens_s1": 96.4375, |
| "step": 3220, |
| "tokens_trained": 10.55034808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9163120567375886, |
| "grad_norm": 0.40579476952552795, |
| "loss": 1.1768, |
| "loss_ce": 1.1274559497833252, |
| "loss_region": 0.057040877640247345, |
| "loss_total": 1.1844968795776367, |
| "lr": 0.001113704048998644, |
| "router/selected_tokens_s0": 1134.5, |
| "router/selected_tokens_s1": 78.5, |
| "step": 3230, |
| "tokens_trained": 10.58311352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9191489361702128, |
| "grad_norm": 0.5760875940322876, |
| "loss": 1.1751, |
| "loss_ce": 1.1354435682296753, |
| "loss_region": 0.06056402251124382, |
| "loss_total": 1.1960076093673706, |
| "lr": 0.0011132971570147927, |
| "router/selected_tokens_s0": 1090.8125, |
| "router/selected_tokens_s1": 101.25, |
| "step": 3240, |
| "tokens_trained": 10.61587896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9219858156028369, |
| "grad_norm": 0.38739120960235596, |
| "loss": 1.1773, |
| "loss_ce": 1.105566143989563, |
| "loss_region": 0.060275085270404816, |
| "loss_total": 1.1658412218093872, |
| "lr": 0.0011128902650309417, |
| "router/selected_tokens_s0": 1192.9375, |
| "router/selected_tokens_s1": 105.75, |
| "step": 3250, |
| "tokens_trained": 10.6486444 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.924822695035461, |
| "grad_norm": 0.3400448262691498, |
| "loss": 1.1631, |
| "loss_ce": 1.1183065176010132, |
| "loss_region": 0.05988531932234764, |
| "loss_total": 1.1781917810440063, |
| "lr": 0.0011124833730470907, |
| "router/selected_tokens_s0": 1182.9375, |
| "router/selected_tokens_s1": 102.9375, |
| "step": 3260, |
| "tokens_trained": 10.68140984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9276595744680851, |
| "grad_norm": 0.5918727517127991, |
| "loss": 1.1708, |
| "loss_ce": 1.0822169780731201, |
| "loss_region": 0.05995599180459976, |
| "loss_total": 1.142172932624817, |
| "lr": 0.0011120764810632396, |
| "router/selected_tokens_s0": 1097.0625, |
| "router/selected_tokens_s1": 96.125, |
| "step": 3270, |
| "tokens_trained": 10.714171944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9304964539007092, |
| "grad_norm": 0.6673521995544434, |
| "loss": 1.172, |
| "loss_ce": 1.0720325708389282, |
| "loss_region": 0.05878286063671112, |
| "loss_total": 1.1308153867721558, |
| "lr": 0.0011116695890793888, |
| "router/selected_tokens_s0": 1054.0, |
| "router/selected_tokens_s1": 84.75, |
| "step": 3280, |
| "tokens_trained": 10.746937384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9333333333333333, |
| "grad_norm": 0.3518356382846832, |
| "loss": 1.1647, |
| "loss_ce": 1.1526938676834106, |
| "loss_region": 0.061706800013780594, |
| "loss_total": 1.2144006490707397, |
| "lr": 0.0011112626970955378, |
| "router/selected_tokens_s0": 1207.1875, |
| "router/selected_tokens_s1": 118.6875, |
| "step": 3290, |
| "tokens_trained": 10.779702824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9361702127659575, |
| "grad_norm": 0.9049731492996216, |
| "loss": 1.1696, |
| "loss_ce": 1.2116420269012451, |
| "loss_region": 0.060429565608501434, |
| "loss_total": 1.2720715999603271, |
| "lr": 0.0011108558051116867, |
| "router/selected_tokens_s0": 1074.625, |
| "router/selected_tokens_s1": 98.875, |
| "step": 3300, |
| "tokens_trained": 10.812468264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9390070921985816, |
| "grad_norm": 0.40723493695259094, |
| "loss": 1.1751, |
| "loss_ce": 1.027906060218811, |
| "loss_region": 0.062023263424634933, |
| "loss_total": 1.0899293422698975, |
| "lr": 0.0011104489131278357, |
| "router/selected_tokens_s0": 1176.875, |
| "router/selected_tokens_s1": 119.5625, |
| "step": 3310, |
| "tokens_trained": 10.845230648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9418439716312057, |
| "grad_norm": 0.2104732096195221, |
| "loss": 1.1716, |
| "loss_ce": 1.0564253330230713, |
| "loss_region": 0.07024026662111282, |
| "loss_total": 1.1266655921936035, |
| "lr": 0.0011100420211439847, |
| "router/selected_tokens_s0": 1101.1875, |
| "router/selected_tokens_s1": 170.5625, |
| "step": 3320, |
| "tokens_trained": 10.877996088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9446808510638298, |
| "grad_norm": 0.6985082030296326, |
| "loss": 1.1758, |
| "loss_ce": 1.1633176803588867, |
| "loss_region": 0.057024937123060226, |
| "loss_total": 1.2203426361083984, |
| "lr": 0.0011096351291601336, |
| "router/selected_tokens_s0": 1152.0625, |
| "router/selected_tokens_s1": 78.9375, |
| "step": 3330, |
| "tokens_trained": 10.910761528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9475177304964539, |
| "grad_norm": 0.5326957106590271, |
| "loss": 1.1734, |
| "loss_ce": 1.1307435035705566, |
| "loss_region": 0.05962308123707771, |
| "loss_total": 1.1903666257858276, |
| "lr": 0.0011092282371762826, |
| "router/selected_tokens_s0": 1052.8125, |
| "router/selected_tokens_s1": 91.3125, |
| "step": 3340, |
| "tokens_trained": 10.943526968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.950354609929078, |
| "grad_norm": 0.4644201397895813, |
| "loss": 1.1747, |
| "loss_ce": 1.1294314861297607, |
| "loss_region": 0.059848666191101074, |
| "loss_total": 1.1892801523208618, |
| "lr": 0.0011088213451924316, |
| "router/selected_tokens_s0": 1206.9375, |
| "router/selected_tokens_s1": 103.0, |
| "step": 3350, |
| "tokens_trained": 10.976292408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9531914893617022, |
| "grad_norm": 0.519744873046875, |
| "loss": 1.1695, |
| "loss_ce": 1.0610030889511108, |
| "loss_region": 0.06155804172158241, |
| "loss_total": 1.1225610971450806, |
| "lr": 0.0011084144532085805, |
| "router/selected_tokens_s0": 1226.5, |
| "router/selected_tokens_s1": 118.375, |
| "step": 3360, |
| "tokens_trained": 11.009057848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9560283687943263, |
| "grad_norm": 0.4544057250022888, |
| "loss": 1.1712, |
| "loss_ce": 1.036798357963562, |
| "loss_region": 0.05716359615325928, |
| "loss_total": 1.0939619541168213, |
| "lr": 0.0011080075612247295, |
| "router/selected_tokens_s0": 1070.8125, |
| "router/selected_tokens_s1": 76.125, |
| "step": 3370, |
| "tokens_trained": 11.041823288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9588652482269504, |
| "grad_norm": 0.48027774691581726, |
| "loss": 1.1747, |
| "loss_ce": 1.0815472602844238, |
| "loss_region": 0.058336447924375534, |
| "loss_total": 1.1398837566375732, |
| "lr": 0.0011076006692408785, |
| "router/selected_tokens_s0": 1209.5625, |
| "router/selected_tokens_s1": 91.6875, |
| "step": 3380, |
| "tokens_trained": 11.074587928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9617021276595744, |
| "grad_norm": 0.40910616517066956, |
| "loss": 1.1696, |
| "loss_ce": 1.1013418436050415, |
| "loss_region": 0.059236638247966766, |
| "loss_total": 1.1605784893035889, |
| "lr": 0.0011071937772570274, |
| "router/selected_tokens_s0": 1173.5, |
| "router/selected_tokens_s1": 97.125, |
| "step": 3390, |
| "tokens_trained": 11.107353368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9645390070921985, |
| "grad_norm": 0.6562957763671875, |
| "loss": 1.1791, |
| "loss_ce": 1.15192711353302, |
| "loss_region": 0.06069476902484894, |
| "loss_total": 1.2126219272613525, |
| "lr": 0.0011067868852731764, |
| "router/selected_tokens_s0": 1176.25, |
| "router/selected_tokens_s1": 108.5, |
| "step": 3400, |
| "tokens_trained": 11.140118808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9673758865248226, |
| "grad_norm": 0.6277413964271545, |
| "loss": 1.1721, |
| "loss_ce": 1.1012977361679077, |
| "loss_region": 0.05599859729409218, |
| "loss_total": 1.1572962999343872, |
| "lr": 0.0011063799932893254, |
| "router/selected_tokens_s0": 1253.3125, |
| "router/selected_tokens_s1": 75.5, |
| "step": 3410, |
| "tokens_trained": 11.172884248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9702127659574468, |
| "grad_norm": 0.6205341815948486, |
| "loss": 1.1722, |
| "loss_ce": 1.1080474853515625, |
| "loss_region": 0.06207379698753357, |
| "loss_total": 1.1701213121414185, |
| "lr": 0.0011059731013054743, |
| "router/selected_tokens_s0": 1329.125, |
| "router/selected_tokens_s1": 130.3125, |
| "step": 3420, |
| "tokens_trained": 11.205649688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9730496453900709, |
| "grad_norm": 0.29150086641311646, |
| "loss": 1.1658, |
| "loss_ce": 1.180068850517273, |
| "loss_region": 0.058709632605314255, |
| "loss_total": 1.2387784719467163, |
| "lr": 0.0011055662093216233, |
| "router/selected_tokens_s0": 1133.125, |
| "router/selected_tokens_s1": 90.125, |
| "step": 3430, |
| "tokens_trained": 11.238415128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.975886524822695, |
| "grad_norm": 0.041430745273828506, |
| "loss": 1.174, |
| "loss_ce": 1.1881239414215088, |
| "loss_region": 0.06059211492538452, |
| "loss_total": 1.248716115951538, |
| "lr": 0.0011051593173377723, |
| "router/selected_tokens_s0": 1123.5625, |
| "router/selected_tokens_s1": 103.875, |
| "step": 3440, |
| "tokens_trained": 11.27117724 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9787234042553191, |
| "grad_norm": 0.3598027527332306, |
| "loss": 1.1723, |
| "loss_ce": 1.0409948825836182, |
| "loss_region": 0.06112197786569595, |
| "loss_total": 1.1021168231964111, |
| "lr": 0.0011047524253539212, |
| "router/selected_tokens_s0": 1166.875, |
| "router/selected_tokens_s1": 111.4375, |
| "step": 3450, |
| "tokens_trained": 11.30394268 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9815602836879432, |
| "grad_norm": 0.21557830274105072, |
| "loss": 1.1673, |
| "loss_ce": 1.0955760478973389, |
| "loss_region": 0.06010117009282112, |
| "loss_total": 1.1556771993637085, |
| "lr": 0.0011043455333700704, |
| "router/selected_tokens_s0": 1128.0, |
| "router/selected_tokens_s1": 100.25, |
| "step": 3460, |
| "tokens_trained": 11.33670812 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9843971631205674, |
| "grad_norm": 0.7135093808174133, |
| "loss": 1.1701, |
| "loss_ce": 1.1059309244155884, |
| "loss_region": 0.05792264640331268, |
| "loss_total": 1.1638535261154175, |
| "lr": 0.0011039386413862194, |
| "router/selected_tokens_s0": 1225.5625, |
| "router/selected_tokens_s1": 90.0, |
| "step": 3470, |
| "tokens_trained": 11.369471136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9872340425531915, |
| "grad_norm": 0.7960752844810486, |
| "loss": 1.1644, |
| "loss_ce": 1.0885987281799316, |
| "loss_region": 0.057787343859672546, |
| "loss_total": 1.1463860273361206, |
| "lr": 0.0011035317494023683, |
| "router/selected_tokens_s0": 1155.75, |
| "router/selected_tokens_s1": 84.8125, |
| "step": 3480, |
| "tokens_trained": 11.402236576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9900709219858156, |
| "grad_norm": 0.406982958316803, |
| "loss": 1.1727, |
| "loss_ce": 1.0770787000656128, |
| "loss_region": 0.06020737439393997, |
| "loss_total": 1.1372860670089722, |
| "lr": 0.001103124857418517, |
| "router/selected_tokens_s0": 1090.6875, |
| "router/selected_tokens_s1": 98.5625, |
| "step": 3490, |
| "tokens_trained": 11.435000416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9929078014184397, |
| "grad_norm": 0.3447991609573364, |
| "loss": 1.1623, |
| "loss_ce": 1.0214577913284302, |
| "loss_region": 0.05888646841049194, |
| "loss_total": 1.0803442001342773, |
| "lr": 0.001102717965434666, |
| "router/selected_tokens_s0": 1209.5625, |
| "router/selected_tokens_s1": 96.125, |
| "step": 3500, |
| "tokens_trained": 11.467765856 |
| }, |
| { |
| "epoch": 0.9929078014184397, |
| "eval_ppl": 3.038240767914852, |
| "eval_runtime": 2.008, |
| "step": 3500, |
| "tokens_trained": 11.467765856 |
| }, |
| { |
| "epoch": 0.9929078014184397, |
| "eval_F": 0.00850206537874902, |
| "eval_F_cds": 0.00689964962716737, |
| "eval_F_dig": 0.014923040862567388, |
| "eval_F_exon": 0.00789606012421685, |
| "eval_F_intron": 0.008428047437812242, |
| "eval_F_nig": 0.00872954509959683, |
| "eval_F_promoter": 0.008634167140420694, |
| "eval_F_utr": 0.007599577919655515, |
| "eval_G": 0.046269897373441414, |
| "eval_G_cds": 0.04158991148032522, |
| "eval_G_dig": 0.05773201885889523, |
| "eval_G_exon": 0.04576241340325628, |
| "eval_G_intron": 0.04624071162133245, |
| "eval_G_nig": 0.04734882132161264, |
| "eval_G_promoter": 0.04556376903981245, |
| "eval_G_utr": 0.04369505411235224, |
| "eval_avg_bp_per_token": 117.61847921088776, |
| "eval_bp_per_token/cds": 144.93489583333334, |
| "eval_bp_per_token/dig": 67.01047120418848, |
| "eval_bp_per_token/exon": 126.64543889845095, |
| "eval_bp_per_token/intron": 118.65144416647715, |
| "eval_bp_per_token/nig": 114.55350635008283, |
| "eval_bp_per_token/promoter": 115.81893004115226, |
| "eval_bp_per_token/utr": 131.58625525946704, |
| "eval_ppl_cds": 3.7194951394016527, |
| "eval_ppl_dig": 1.1234349213687769, |
| "eval_ppl_exon": 3.2792592230741118, |
| "eval_ppl_intron": 3.0595564329951, |
| "eval_ppl_nig": 2.8979520169575914, |
| "eval_ppl_promoter": 3.3032037253575854, |
| "eval_ppl_utr": 3.397446939966626, |
| "step": 3500, |
| "tokens_trained": 11.467765856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9957446808510638, |
| "grad_norm": 0.3863132894039154, |
| "loss": 1.1693, |
| "loss_ce": 1.1104393005371094, |
| "loss_region": 0.06132273003458977, |
| "loss_total": 1.1717619895935059, |
| "lr": 0.001102311073450815, |
| "router/selected_tokens_s0": 1200.25, |
| "router/selected_tokens_s1": 114.75, |
| "step": 3510, |
| "tokens_trained": 11.500531296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9985815602836879, |
| "grad_norm": 0.36480513215065, |
| "loss": 1.1735, |
| "loss_ce": 1.152591347694397, |
| "loss_region": 0.06367353349924088, |
| "loss_total": 1.2162648439407349, |
| "lr": 0.001101904181466964, |
| "router/selected_tokens_s0": 1090.5, |
| "router/selected_tokens_s1": 124.8125, |
| "step": 3520, |
| "tokens_trained": 11.533296736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.001418439716312, |
| "grad_norm": 0.4523515999317169, |
| "loss": 1.1662, |
| "loss_ce": 1.1153082847595215, |
| "loss_region": 0.058803122490644455, |
| "loss_total": 1.1741113662719727, |
| "lr": 0.0011014972894831132, |
| "router/selected_tokens_s0": 1205.0, |
| "router/selected_tokens_s1": 95.3125, |
| "step": 3530, |
| "tokens_trained": 11.566062176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.004255319148936, |
| "grad_norm": 0.43514934182167053, |
| "loss": 1.1669, |
| "loss_ce": 1.129390001296997, |
| "loss_region": 0.058623556047677994, |
| "loss_total": 1.1880135536193848, |
| "lr": 0.0011010903974992621, |
| "router/selected_tokens_s0": 1195.875, |
| "router/selected_tokens_s1": 92.9375, |
| "step": 3540, |
| "tokens_trained": 11.598827616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0070921985815602, |
| "grad_norm": 0.6533243060112, |
| "loss": 1.1678, |
| "loss_ce": 1.1146398782730103, |
| "loss_region": 0.061271265149116516, |
| "loss_total": 1.1759111881256104, |
| "lr": 0.001100683505515411, |
| "router/selected_tokens_s0": 1133.4375, |
| "router/selected_tokens_s1": 109.125, |
| "step": 3550, |
| "tokens_trained": 11.631593056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0099290780141843, |
| "grad_norm": 0.49821382761001587, |
| "loss": 1.1726, |
| "loss_ce": 1.166283130645752, |
| "loss_region": 0.06056985631585121, |
| "loss_total": 1.2268530130386353, |
| "lr": 0.00110027661353156, |
| "router/selected_tokens_s0": 1262.8125, |
| "router/selected_tokens_s1": 112.0, |
| "step": 3560, |
| "tokens_trained": 11.664358496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0127659574468084, |
| "grad_norm": 0.26887351274490356, |
| "loss": 1.1715, |
| "loss_ce": 1.030143141746521, |
| "loss_region": 0.06456618756055832, |
| "loss_total": 1.0947092771530151, |
| "lr": 0.001099869721547709, |
| "router/selected_tokens_s0": 1178.4375, |
| "router/selected_tokens_s1": 138.125, |
| "step": 3570, |
| "tokens_trained": 11.697123936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0156028368794325, |
| "grad_norm": 0.40560099482536316, |
| "loss": 1.1731, |
| "loss_ce": 1.1501858234405518, |
| "loss_region": 0.06369136273860931, |
| "loss_total": 1.2138772010803223, |
| "lr": 0.001099462829563858, |
| "router/selected_tokens_s0": 1185.0, |
| "router/selected_tokens_s1": 132.1875, |
| "step": 3580, |
| "tokens_trained": 11.729887776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0184397163120567, |
| "grad_norm": 0.6615747809410095, |
| "loss": 1.1718, |
| "loss_ce": 1.0754194259643555, |
| "loss_region": 0.058519624173641205, |
| "loss_total": 1.1339390277862549, |
| "lr": 0.001099055937580007, |
| "router/selected_tokens_s0": 1184.375, |
| "router/selected_tokens_s1": 91.1875, |
| "step": 3590, |
| "tokens_trained": 11.762653216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0212765957446808, |
| "grad_norm": 0.22958390414714813, |
| "loss": 1.1725, |
| "loss_ce": 1.121917486190796, |
| "loss_region": 0.057618919759988785, |
| "loss_total": 1.1795364618301392, |
| "lr": 0.001098649045596156, |
| "router/selected_tokens_s0": 1027.0625, |
| "router/selected_tokens_s1": 76.125, |
| "step": 3600, |
| "tokens_trained": 11.795417056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0241134751773049, |
| "grad_norm": 0.37797319889068604, |
| "loss": 1.1711, |
| "loss_ce": 1.1041591167449951, |
| "loss_region": 0.05944184213876724, |
| "loss_total": 1.1636009216308594, |
| "lr": 0.0010982421536123049, |
| "router/selected_tokens_s0": 1129.5625, |
| "router/selected_tokens_s1": 95.1875, |
| "step": 3610, |
| "tokens_trained": 11.828182496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.026950354609929, |
| "grad_norm": 0.2652418613433838, |
| "loss": 1.1693, |
| "loss_ce": 1.1603299379348755, |
| "loss_region": 0.06184834986925125, |
| "loss_total": 1.222178339958191, |
| "lr": 0.0010978352616284538, |
| "router/selected_tokens_s0": 1040.3125, |
| "router/selected_tokens_s1": 106.3125, |
| "step": 3620, |
| "tokens_trained": 11.860946336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0297872340425531, |
| "grad_norm": 0.1825254261493683, |
| "loss": 1.1628, |
| "loss_ce": 1.0894384384155273, |
| "loss_region": 0.06096874922513962, |
| "loss_total": 1.1504071950912476, |
| "lr": 0.0010974283696446028, |
| "router/selected_tokens_s0": 1087.5, |
| "router/selected_tokens_s1": 103.625, |
| "step": 3630, |
| "tokens_trained": 11.893711776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0326241134751772, |
| "grad_norm": 0.6617575287818909, |
| "loss": 1.1621, |
| "loss_ce": 1.007035493850708, |
| "loss_region": 0.06269294023513794, |
| "loss_total": 1.0697283744812012, |
| "lr": 0.0010970214776607518, |
| "router/selected_tokens_s0": 1297.125, |
| "router/selected_tokens_s1": 135.875, |
| "step": 3640, |
| "tokens_trained": 11.926477216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0354609929078014, |
| "grad_norm": 0.3751685321331024, |
| "loss": 1.1598, |
| "loss_ce": 1.1335476636886597, |
| "loss_region": 0.059122197329998016, |
| "loss_total": 1.1926698684692383, |
| "lr": 0.0010966145856769007, |
| "router/selected_tokens_s0": 1139.875, |
| "router/selected_tokens_s1": 93.25, |
| "step": 3650, |
| "tokens_trained": 11.959242656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0382978723404255, |
| "grad_norm": 0.22427456080913544, |
| "loss": 1.1729, |
| "loss_ce": 1.051111102104187, |
| "loss_region": 0.05901859700679779, |
| "loss_total": 1.110129714012146, |
| "lr": 0.0010962076936930497, |
| "router/selected_tokens_s0": 1197.5625, |
| "router/selected_tokens_s1": 96.0, |
| "step": 3660, |
| "tokens_trained": 11.992008096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0411347517730496, |
| "grad_norm": 0.5057801604270935, |
| "loss": 1.1656, |
| "loss_ce": 1.129104495048523, |
| "loss_region": 0.05949552729725838, |
| "loss_total": 1.1886000633239746, |
| "lr": 0.0010958008017091987, |
| "router/selected_tokens_s0": 1171.25, |
| "router/selected_tokens_s1": 99.0, |
| "step": 3670, |
| "tokens_trained": 12.024773536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0439716312056737, |
| "grad_norm": 0.2510657012462616, |
| "loss": 1.1706, |
| "loss_ce": 1.0839104652404785, |
| "loss_region": 0.060210950672626495, |
| "loss_total": 1.1441214084625244, |
| "lr": 0.0010953939097253476, |
| "router/selected_tokens_s0": 1161.6875, |
| "router/selected_tokens_s1": 102.1875, |
| "step": 3680, |
| "tokens_trained": 12.057538976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0468085106382978, |
| "grad_norm": 0.2641873061656952, |
| "loss": 1.1578, |
| "loss_ce": 1.1306475400924683, |
| "loss_region": 0.06020496413111687, |
| "loss_total": 1.1908525228500366, |
| "lr": 0.0010949870177414966, |
| "router/selected_tokens_s0": 1051.5625, |
| "router/selected_tokens_s1": 95.3125, |
| "step": 3690, |
| "tokens_trained": 12.090304416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.049645390070922, |
| "grad_norm": 0.44993430376052856, |
| "loss": 1.1666, |
| "loss_ce": 1.0936851501464844, |
| "loss_region": 0.06084345653653145, |
| "loss_total": 1.1545286178588867, |
| "lr": 0.0010945801257576456, |
| "router/selected_tokens_s0": 1209.5625, |
| "router/selected_tokens_s1": 111.5, |
| "step": 3700, |
| "tokens_trained": 12.123069856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.052482269503546, |
| "grad_norm": 1.0144351720809937, |
| "loss": 1.1686, |
| "loss_ce": 1.113016128540039, |
| "loss_region": 0.06407901644706726, |
| "loss_total": 1.1770951747894287, |
| "lr": 0.0010941732337737947, |
| "router/selected_tokens_s0": 981.5, |
| "router/selected_tokens_s1": 114.9375, |
| "step": 3710, |
| "tokens_trained": 12.155835296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0553191489361702, |
| "grad_norm": 0.5086681842803955, |
| "loss": 1.1676, |
| "loss_ce": 1.087952733039856, |
| "loss_region": 0.06016521528363228, |
| "loss_total": 1.1481178998947144, |
| "lr": 0.0010937663417899437, |
| "router/selected_tokens_s0": 1133.25, |
| "router/selected_tokens_s1": 100.625, |
| "step": 3720, |
| "tokens_trained": 12.188600576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0581560283687943, |
| "grad_norm": 0.2624291181564331, |
| "loss": 1.166, |
| "loss_ce": 1.1549427509307861, |
| "loss_region": 0.060712967067956924, |
| "loss_total": 1.2156556844711304, |
| "lr": 0.0010933594498060927, |
| "router/selected_tokens_s0": 1177.875, |
| "router/selected_tokens_s1": 108.5, |
| "step": 3730, |
| "tokens_trained": 12.221366016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0609929078014184, |
| "grad_norm": 0.1420605629682541, |
| "loss": 1.1632, |
| "loss_ce": 1.1388192176818848, |
| "loss_region": 0.05755767226219177, |
| "loss_total": 1.196376919746399, |
| "lr": 0.0010929525578222414, |
| "router/selected_tokens_s0": 1136.75, |
| "router/selected_tokens_s1": 82.0625, |
| "step": 3740, |
| "tokens_trained": 12.254131456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0638297872340425, |
| "grad_norm": 0.24459026753902435, |
| "loss": 1.1666, |
| "loss_ce": 1.0850238800048828, |
| "loss_region": 0.05748440697789192, |
| "loss_total": 1.1425082683563232, |
| "lr": 0.0010925456658383904, |
| "router/selected_tokens_s0": 1181.0, |
| "router/selected_tokens_s1": 83.125, |
| "step": 3750, |
| "tokens_trained": 12.286896896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0666666666666667, |
| "grad_norm": 0.3674156963825226, |
| "loss": 1.1656, |
| "loss_ce": 1.1350717544555664, |
| "loss_region": 0.060446847230196, |
| "loss_total": 1.1955186128616333, |
| "lr": 0.0010921387738545394, |
| "router/selected_tokens_s0": 1207.0, |
| "router/selected_tokens_s1": 108.0625, |
| "step": 3760, |
| "tokens_trained": 12.319662336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0695035460992908, |
| "grad_norm": 0.3036234378814697, |
| "loss": 1.159, |
| "loss_ce": 1.0529462099075317, |
| "loss_region": 0.06247229501605034, |
| "loss_total": 1.115418553352356, |
| "lr": 0.0010917318818706883, |
| "router/selected_tokens_s0": 1305.6875, |
| "router/selected_tokens_s1": 132.8125, |
| "step": 3770, |
| "tokens_trained": 12.352427776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0723404255319149, |
| "grad_norm": 0.6468788385391235, |
| "loss": 1.1664, |
| "loss_ce": 1.1280831098556519, |
| "loss_region": 0.06378360092639923, |
| "loss_total": 1.1918667554855347, |
| "lr": 0.0010913249898868375, |
| "router/selected_tokens_s0": 1151.0, |
| "router/selected_tokens_s1": 129.5625, |
| "step": 3780, |
| "tokens_trained": 12.385193216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.075177304964539, |
| "grad_norm": 0.2749446630477905, |
| "loss": 1.166, |
| "loss_ce": 1.115018367767334, |
| "loss_region": 0.05860990658402443, |
| "loss_total": 1.173628330230713, |
| "lr": 0.0010909180979029865, |
| "router/selected_tokens_s0": 1103.9375, |
| "router/selected_tokens_s1": 87.1875, |
| "step": 3790, |
| "tokens_trained": 12.417958656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0780141843971631, |
| "grad_norm": 0.3598608076572418, |
| "loss": 1.1649, |
| "loss_ce": 1.0172104835510254, |
| "loss_region": 0.05948943644762039, |
| "loss_total": 1.07669997215271, |
| "lr": 0.0010905112059191354, |
| "router/selected_tokens_s0": 1112.375, |
| "router/selected_tokens_s1": 94.125, |
| "step": 3800, |
| "tokens_trained": 12.450724096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0808510638297872, |
| "grad_norm": 0.3002602458000183, |
| "loss": 1.1664, |
| "loss_ce": 1.0674883127212524, |
| "loss_region": 0.05848415940999985, |
| "loss_total": 1.1259725093841553, |
| "lr": 0.0010901043139352844, |
| "router/selected_tokens_s0": 1162.125, |
| "router/selected_tokens_s1": 90.0, |
| "step": 3810, |
| "tokens_trained": 12.483489536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0836879432624114, |
| "grad_norm": 0.3532373309135437, |
| "loss": 1.1578, |
| "loss_ce": 1.1755656003952026, |
| "loss_region": 0.0601661391556263, |
| "loss_total": 1.2357317209243774, |
| "lr": 0.0010896974219514334, |
| "router/selected_tokens_s0": 1189.0625, |
| "router/selected_tokens_s1": 103.5625, |
| "step": 3820, |
| "tokens_trained": 12.516254976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0865248226950355, |
| "grad_norm": 0.3577895760536194, |
| "loss": 1.1682, |
| "loss_ce": 1.184386134147644, |
| "loss_region": 0.06025141850113869, |
| "loss_total": 1.2446376085281372, |
| "lr": 0.0010892905299675823, |
| "router/selected_tokens_s0": 1258.5, |
| "router/selected_tokens_s1": 109.8125, |
| "step": 3830, |
| "tokens_trained": 12.549020416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0893617021276596, |
| "grad_norm": 0.3110039234161377, |
| "loss": 1.1665, |
| "loss_ce": 1.101610541343689, |
| "loss_region": 0.05868781730532646, |
| "loss_total": 1.1602983474731445, |
| "lr": 0.0010888836379837313, |
| "router/selected_tokens_s0": 1235.0, |
| "router/selected_tokens_s1": 95.5625, |
| "step": 3840, |
| "tokens_trained": 12.581785856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0921985815602837, |
| "grad_norm": 0.35310038924217224, |
| "loss": 1.1656, |
| "loss_ce": 1.0989956855773926, |
| "loss_region": 0.05978355556726456, |
| "loss_total": 1.158779263496399, |
| "lr": 0.0010884767459998803, |
| "router/selected_tokens_s0": 1111.4375, |
| "router/selected_tokens_s1": 96.6875, |
| "step": 3850, |
| "tokens_trained": 12.614549696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0950354609929078, |
| "grad_norm": 0.12972678244113922, |
| "loss": 1.1596, |
| "loss_ce": 1.0401544570922852, |
| "loss_region": 0.05898905545473099, |
| "loss_total": 1.0991435050964355, |
| "lr": 0.0010880698540160292, |
| "router/selected_tokens_s0": 1188.875, |
| "router/selected_tokens_s1": 95.0625, |
| "step": 3860, |
| "tokens_trained": 12.647315136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.097872340425532, |
| "grad_norm": 0.5242721438407898, |
| "loss": 1.1574, |
| "loss_ce": 1.0777604579925537, |
| "loss_region": 0.06169040501117706, |
| "loss_total": 1.1394509077072144, |
| "lr": 0.0010876629620321782, |
| "router/selected_tokens_s0": 1112.3125, |
| "router/selected_tokens_s1": 110.4375, |
| "step": 3870, |
| "tokens_trained": 12.680079808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.100709219858156, |
| "grad_norm": 0.27978047728538513, |
| "loss": 1.1627, |
| "loss_ce": 1.0561823844909668, |
| "loss_region": 0.06105148047208786, |
| "loss_total": 1.1172338724136353, |
| "lr": 0.0010872560700483272, |
| "router/selected_tokens_s0": 1237.75, |
| "router/selected_tokens_s1": 114.375, |
| "step": 3880, |
| "tokens_trained": 12.712844448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1035460992907802, |
| "grad_norm": 0.6515991687774658, |
| "loss": 1.1645, |
| "loss_ce": 1.1110643148422241, |
| "loss_region": 0.05981144309043884, |
| "loss_total": 1.1708757877349854, |
| "lr": 0.0010868491780644761, |
| "router/selected_tokens_s0": 1082.875, |
| "router/selected_tokens_s1": 95.0, |
| "step": 3890, |
| "tokens_trained": 12.745609888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1063829787234043, |
| "grad_norm": 0.38111910223960876, |
| "loss": 1.1597, |
| "loss_ce": 1.1041885614395142, |
| "loss_region": 0.0614677332341671, |
| "loss_total": 1.165656328201294, |
| "lr": 0.001086442286080625, |
| "router/selected_tokens_s0": 1140.0, |
| "router/selected_tokens_s1": 111.4375, |
| "step": 3900, |
| "tokens_trained": 12.778375328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1092198581560284, |
| "grad_norm": 0.18037572503089905, |
| "loss": 1.1681, |
| "loss_ce": 1.0915865898132324, |
| "loss_region": 0.06151479482650757, |
| "loss_total": 1.1531014442443848, |
| "lr": 0.001086035394096774, |
| "router/selected_tokens_s0": 1100.375, |
| "router/selected_tokens_s1": 108.5, |
| "step": 3910, |
| "tokens_trained": 12.811139968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1120567375886525, |
| "grad_norm": 0.4105333983898163, |
| "loss": 1.1675, |
| "loss_ce": 1.118082880973816, |
| "loss_region": 0.058428194373846054, |
| "loss_total": 1.1765110492706299, |
| "lr": 0.001085628502112923, |
| "router/selected_tokens_s0": 1064.75, |
| "router/selected_tokens_s1": 84.5625, |
| "step": 3920, |
| "tokens_trained": 12.843905408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1148936170212767, |
| "grad_norm": 0.31400883197784424, |
| "loss": 1.1628, |
| "loss_ce": 1.0323964357376099, |
| "loss_region": 0.05940539017319679, |
| "loss_total": 1.0918018817901611, |
| "lr": 0.001085221610129072, |
| "router/selected_tokens_s0": 1196.875, |
| "router/selected_tokens_s1": 98.8125, |
| "step": 3930, |
| "tokens_trained": 12.876670848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1177304964539008, |
| "grad_norm": 0.30934834480285645, |
| "loss": 1.1643, |
| "loss_ce": 1.0789693593978882, |
| "loss_region": 0.05930301174521446, |
| "loss_total": 1.1382724046707153, |
| "lr": 0.001084814718145221, |
| "router/selected_tokens_s0": 1260.875, |
| "router/selected_tokens_s1": 103.0, |
| "step": 3940, |
| "tokens_trained": 12.909436288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1205673758865249, |
| "grad_norm": 0.13591480255126953, |
| "loss": 1.1572, |
| "loss_ce": 1.131282925605774, |
| "loss_region": 0.0612853579223156, |
| "loss_total": 1.192568302154541, |
| "lr": 0.00108440782616137, |
| "router/selected_tokens_s0": 1195.6875, |
| "router/selected_tokens_s1": 114.375, |
| "step": 3950, |
| "tokens_trained": 12.942201728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.123404255319149, |
| "grad_norm": 0.5181418657302856, |
| "loss": 1.155, |
| "loss_ce": 1.0501888990402222, |
| "loss_region": 0.060443535447120667, |
| "loss_total": 1.1106324195861816, |
| "lr": 0.001084000934177519, |
| "router/selected_tokens_s0": 1220.875, |
| "router/selected_tokens_s1": 108.0, |
| "step": 3960, |
| "tokens_trained": 12.974967168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1262411347517731, |
| "grad_norm": 0.5863410234451294, |
| "loss": 1.1628, |
| "loss_ce": 1.0781110525131226, |
| "loss_region": 0.057768795639276505, |
| "loss_total": 1.1358798742294312, |
| "lr": 0.001083594042193668, |
| "router/selected_tokens_s0": 1121.0, |
| "router/selected_tokens_s1": 81.8125, |
| "step": 3970, |
| "tokens_trained": 13.007732608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1290780141843972, |
| "grad_norm": 0.3233559727668762, |
| "loss": 1.1578, |
| "loss_ce": 1.1254554986953735, |
| "loss_region": 0.06000566482543945, |
| "loss_total": 1.185461163520813, |
| "lr": 0.001083187150209817, |
| "router/selected_tokens_s0": 1204.9375, |
| "router/selected_tokens_s1": 104.3125, |
| "step": 3980, |
| "tokens_trained": 13.040497248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1319148936170214, |
| "grad_norm": 0.6287395358085632, |
| "loss": 1.1612, |
| "loss_ce": 1.0396240949630737, |
| "loss_region": 0.06029755249619484, |
| "loss_total": 1.099921703338623, |
| "lr": 0.0010827802582259658, |
| "router/selected_tokens_s0": 1285.6875, |
| "router/selected_tokens_s1": 113.0625, |
| "step": 3990, |
| "tokens_trained": 13.073261888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1347517730496455, |
| "grad_norm": 0.3079366385936737, |
| "loss": 1.1648, |
| "loss_ce": 1.0844305753707886, |
| "loss_region": 0.058176081627607346, |
| "loss_total": 1.1426066160202026, |
| "lr": 0.0010823733662421147, |
| "router/selected_tokens_s0": 1028.875, |
| "router/selected_tokens_s1": 79.375, |
| "step": 4000, |
| "tokens_trained": 13.106027328 |
| }, |
| { |
| "epoch": 1.1347517730496455, |
| "eval_ppl": 3.0168909900495806, |
| "eval_runtime": 2.0409, |
| "step": 4000, |
| "tokens_trained": 13.106027328 |
| }, |
| { |
| "epoch": 1.1347517730496455, |
| "eval_F": 0.008799303274372658, |
| "eval_F_cds": 0.007753121911777918, |
| "eval_F_dig": 0.010547699038987422, |
| "eval_F_exon": 0.00823582174746198, |
| "eval_F_intron": 0.008802201374555693, |
| "eval_F_nig": 0.008589756691044479, |
| "eval_F_promoter": 0.00924873139199461, |
| "eval_F_utr": 0.008601485808081346, |
| "eval_G": 0.04391245460885404, |
| "eval_G_cds": 0.038505093460830114, |
| "eval_G_dig": 0.05892092399796859, |
| "eval_G_exon": 0.0432158734931572, |
| "eval_G_intron": 0.04394914080325982, |
| "eval_G_nig": 0.044819965408996784, |
| "eval_G_promoter": 0.043103368419918464, |
| "eval_G_utr": 0.04227885581319747, |
| "eval_avg_bp_per_token": 113.64536132223428, |
| "eval_bp_per_token/cds": 128.9803012746234, |
| "eval_bp_per_token/dig": 94.80740740740741, |
| "eval_bp_per_token/exon": 121.42079207920793, |
| "eval_bp_per_token/intron": 113.60794390488219, |
| "eval_bp_per_token/nig": 116.41773288439956, |
| "eval_bp_per_token/promoter": 108.12293682413204, |
| "eval_bp_per_token/utr": 116.25898389095416, |
| "eval_ppl_cds": 3.71392302020704, |
| "eval_ppl_dig": 1.1094123517044365, |
| "eval_ppl_exon": 3.2568894446430092, |
| "eval_ppl_intron": 3.037550000900063, |
| "eval_ppl_nig": 2.8726060355692917, |
| "eval_ppl_promoter": 3.291369088440733, |
| "eval_ppl_utr": 3.3899334301119, |
| "step": 4000, |
| "tokens_trained": 13.106027328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1375886524822696, |
| "grad_norm": 0.3257603943347931, |
| "loss": 1.158, |
| "loss_ce": 1.1083933115005493, |
| "loss_region": 0.06035665422677994, |
| "loss_total": 1.1687499284744263, |
| "lr": 0.0010819664742582637, |
| "router/selected_tokens_s0": 1088.5, |
| "router/selected_tokens_s1": 99.5625, |
| "step": 4010, |
| "tokens_trained": 13.138791968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1404255319148937, |
| "grad_norm": 0.3850681185722351, |
| "loss": 1.1573, |
| "loss_ce": 1.131569504737854, |
| "loss_region": 0.059893906116485596, |
| "loss_total": 1.1914634704589844, |
| "lr": 0.0010815595822744127, |
| "router/selected_tokens_s0": 1119.5, |
| "router/selected_tokens_s1": 98.0, |
| "step": 4020, |
| "tokens_trained": 13.171557408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1432624113475178, |
| "grad_norm": 0.39153483510017395, |
| "loss": 1.1633, |
| "loss_ce": 1.0569000244140625, |
| "loss_region": 0.058693308383226395, |
| "loss_total": 1.1155933141708374, |
| "lr": 0.0010811526902905618, |
| "router/selected_tokens_s0": 1058.75, |
| "router/selected_tokens_s1": 85.125, |
| "step": 4030, |
| "tokens_trained": 13.204322848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1460992907801417, |
| "grad_norm": 0.26381781697273254, |
| "loss": 1.1584, |
| "loss_ce": 1.073149561882019, |
| "loss_region": 0.06016290560364723, |
| "loss_total": 1.133312463760376, |
| "lr": 0.0010807457983067108, |
| "router/selected_tokens_s0": 1159.875, |
| "router/selected_tokens_s1": 102.625, |
| "step": 4040, |
| "tokens_trained": 13.237088288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.148936170212766, |
| "grad_norm": 0.45886608958244324, |
| "loss": 1.1609, |
| "loss_ce": 1.0923855304718018, |
| "loss_region": 0.06084444373846054, |
| "loss_total": 1.1532299518585205, |
| "lr": 0.0010803389063228598, |
| "router/selected_tokens_s0": 1083.25, |
| "router/selected_tokens_s1": 102.1875, |
| "step": 4050, |
| "tokens_trained": 13.269853704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.15177304964539, |
| "grad_norm": 0.5070511698722839, |
| "loss": 1.1679, |
| "loss_ce": 1.1500152349472046, |
| "loss_region": 0.06119634956121445, |
| "loss_total": 1.2112115621566772, |
| "lr": 0.0010799320143390087, |
| "router/selected_tokens_s0": 1188.3125, |
| "router/selected_tokens_s1": 112.9375, |
| "step": 4060, |
| "tokens_trained": 13.302619144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1546099290780143, |
| "grad_norm": 0.33511748909950256, |
| "loss": 1.1592, |
| "loss_ce": 1.1047053337097168, |
| "loss_region": 0.059827499091625214, |
| "loss_total": 1.1645327806472778, |
| "lr": 0.0010795251223551577, |
| "router/selected_tokens_s0": 1166.8125, |
| "router/selected_tokens_s1": 100.875, |
| "step": 4070, |
| "tokens_trained": 13.335384584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1574468085106382, |
| "grad_norm": 0.457485556602478, |
| "loss": 1.1593, |
| "loss_ce": 1.0707032680511475, |
| "loss_region": 0.06051037833094597, |
| "loss_total": 1.131213665008545, |
| "lr": 0.0010791182303713067, |
| "router/selected_tokens_s0": 1190.8125, |
| "router/selected_tokens_s1": 107.6875, |
| "step": 4080, |
| "tokens_trained": 13.368150024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1602836879432625, |
| "grad_norm": 0.22557711601257324, |
| "loss": 1.1636, |
| "loss_ce": 1.1058050394058228, |
| "loss_region": 0.06027519330382347, |
| "loss_total": 1.1660802364349365, |
| "lr": 0.0010787113383874556, |
| "router/selected_tokens_s0": 1245.875, |
| "router/selected_tokens_s1": 109.8125, |
| "step": 4090, |
| "tokens_trained": 13.400914384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1631205673758864, |
| "grad_norm": 0.2770388126373291, |
| "loss": 1.1638, |
| "loss_ce": 1.115969181060791, |
| "loss_region": 0.06104355677962303, |
| "loss_total": 1.1770126819610596, |
| "lr": 0.0010783044464036046, |
| "router/selected_tokens_s0": 1222.9375, |
| "router/selected_tokens_s1": 115.0625, |
| "step": 4100, |
| "tokens_trained": 13.433677248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1659574468085105, |
| "grad_norm": 0.32032445073127747, |
| "loss": 1.1597, |
| "loss_ce": 1.1212356090545654, |
| "loss_region": 0.05913654342293739, |
| "loss_total": 1.1803721189498901, |
| "lr": 0.0010778975544197536, |
| "router/selected_tokens_s0": 1088.9375, |
| "router/selected_tokens_s1": 90.5625, |
| "step": 4110, |
| "tokens_trained": 13.466442688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1687943262411347, |
| "grad_norm": 0.52224200963974, |
| "loss": 1.1598, |
| "loss_ce": 1.11467707157135, |
| "loss_region": 0.058027200400829315, |
| "loss_total": 1.1727042198181152, |
| "lr": 0.0010774906624359025, |
| "router/selected_tokens_s0": 1134.0, |
| "router/selected_tokens_s1": 84.875, |
| "step": 4120, |
| "tokens_trained": 13.499207928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1716312056737588, |
| "grad_norm": 0.31186333298683167, |
| "loss": 1.1604, |
| "loss_ce": 1.1230143308639526, |
| "loss_region": 0.06094019487500191, |
| "loss_total": 1.1839544773101807, |
| "lr": 0.0010770837704520515, |
| "router/selected_tokens_s0": 1114.4375, |
| "router/selected_tokens_s1": 105.6875, |
| "step": 4130, |
| "tokens_trained": 13.531973368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.174468085106383, |
| "grad_norm": 0.5124596953392029, |
| "loss": 1.1671, |
| "loss_ce": 1.1359376907348633, |
| "loss_region": 0.05806201323866844, |
| "loss_total": 1.1939996480941772, |
| "lr": 0.0010766768784682005, |
| "router/selected_tokens_s0": 1088.4375, |
| "router/selected_tokens_s1": 82.125, |
| "step": 4140, |
| "tokens_trained": 13.564734752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.177304964539007, |
| "grad_norm": 0.4620847702026367, |
| "loss": 1.1632, |
| "loss_ce": 1.1566661596298218, |
| "loss_region": 0.06023990362882614, |
| "loss_total": 1.2169060707092285, |
| "lr": 0.0010762699864843494, |
| "router/selected_tokens_s0": 1154.9375, |
| "router/selected_tokens_s1": 103.625, |
| "step": 4150, |
| "tokens_trained": 13.597500192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1801418439716311, |
| "grad_norm": 0.1790580153465271, |
| "loss": 1.1489, |
| "loss_ce": 1.0375577211380005, |
| "loss_region": 0.05896264687180519, |
| "loss_total": 1.0965204238891602, |
| "lr": 0.0010758630945004984, |
| "router/selected_tokens_s0": 1118.625, |
| "router/selected_tokens_s1": 90.9375, |
| "step": 4160, |
| "tokens_trained": 13.630265632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1829787234042553, |
| "grad_norm": 0.43203845620155334, |
| "loss": 1.156, |
| "loss_ce": 1.1000916957855225, |
| "loss_region": 0.05994558706879616, |
| "loss_total": 1.1600372791290283, |
| "lr": 0.0010754562025166474, |
| "router/selected_tokens_s0": 1172.6875, |
| "router/selected_tokens_s1": 102.4375, |
| "step": 4170, |
| "tokens_trained": 13.663031072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1858156028368794, |
| "grad_norm": 0.2970203459262848, |
| "loss": 1.1578, |
| "loss_ce": 1.1184784173965454, |
| "loss_region": 0.06149717792868614, |
| "loss_total": 1.1799756288528442, |
| "lr": 0.0010750493105327963, |
| "router/selected_tokens_s0": 1102.0, |
| "router/selected_tokens_s1": 108.875, |
| "step": 4180, |
| "tokens_trained": 13.695793424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1886524822695035, |
| "grad_norm": 0.18619275093078613, |
| "loss": 1.1542, |
| "loss_ce": 1.0847108364105225, |
| "loss_region": 0.05977150425314903, |
| "loss_total": 1.1444823741912842, |
| "lr": 0.0010746424185489453, |
| "router/selected_tokens_s0": 1201.125, |
| "router/selected_tokens_s1": 102.5, |
| "step": 4190, |
| "tokens_trained": 13.728557088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1914893617021276, |
| "grad_norm": 0.4238639175891876, |
| "loss": 1.1619, |
| "loss_ce": 1.0785303115844727, |
| "loss_region": 0.06018838286399841, |
| "loss_total": 1.1387187242507935, |
| "lr": 0.0010742355265650943, |
| "router/selected_tokens_s0": 1141.3125, |
| "router/selected_tokens_s1": 101.8125, |
| "step": 4200, |
| "tokens_trained": 13.761322528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1943262411347517, |
| "grad_norm": 0.25106480717658997, |
| "loss": 1.1582, |
| "loss_ce": 1.0537445545196533, |
| "loss_region": 0.06022384390234947, |
| "loss_total": 1.1139683723449707, |
| "lr": 0.0010738286345812434, |
| "router/selected_tokens_s0": 1278.625, |
| "router/selected_tokens_s1": 112.1875, |
| "step": 4210, |
| "tokens_trained": 13.794087968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1971631205673758, |
| "grad_norm": 0.5989421606063843, |
| "loss": 1.1702, |
| "loss_ce": 1.1273736953735352, |
| "loss_region": 0.06084655970335007, |
| "loss_total": 1.1882202625274658, |
| "lr": 0.0010734217425973924, |
| "router/selected_tokens_s0": 1064.9375, |
| "router/selected_tokens_s1": 101.0625, |
| "step": 4220, |
| "tokens_trained": 13.826853408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2, |
| "grad_norm": 0.27547699213027954, |
| "loss": 1.1629, |
| "loss_ce": 1.063729166984558, |
| "loss_region": 0.05869840458035469, |
| "loss_total": 1.1224275827407837, |
| "lr": 0.0010730148506135414, |
| "router/selected_tokens_s0": 1075.6875, |
| "router/selected_tokens_s1": 86.0625, |
| "step": 4230, |
| "tokens_trained": 13.859618048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.202836879432624, |
| "grad_norm": 0.3737616240978241, |
| "loss": 1.155, |
| "loss_ce": 1.0955439805984497, |
| "loss_region": 0.05996520072221756, |
| "loss_total": 1.1555092334747314, |
| "lr": 0.0010726079586296901, |
| "router/selected_tokens_s0": 1201.9375, |
| "router/selected_tokens_s1": 104.0625, |
| "step": 4240, |
| "tokens_trained": 13.892383488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2056737588652482, |
| "grad_norm": 0.22606416046619415, |
| "loss": 1.1549, |
| "loss_ce": 1.1174266338348389, |
| "loss_region": 0.060742564499378204, |
| "loss_total": 1.1781692504882812, |
| "lr": 0.001072201066645839, |
| "router/selected_tokens_s0": 1183.0, |
| "router/selected_tokens_s1": 108.875, |
| "step": 4250, |
| "tokens_trained": 13.925146528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2085106382978723, |
| "grad_norm": 0.2320932000875473, |
| "loss": 1.1593, |
| "loss_ce": 1.0501296520233154, |
| "loss_region": 0.0594555027782917, |
| "loss_total": 1.109585165977478, |
| "lr": 0.001071794174661988, |
| "router/selected_tokens_s0": 1174.625, |
| "router/selected_tokens_s1": 96.5, |
| "step": 4260, |
| "tokens_trained": 13.957911968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2113475177304964, |
| "grad_norm": 0.28906700015068054, |
| "loss": 1.1569, |
| "loss_ce": 1.0986992120742798, |
| "loss_region": 0.05964815616607666, |
| "loss_total": 1.1583473682403564, |
| "lr": 0.0010713872826781372, |
| "router/selected_tokens_s0": 1062.25, |
| "router/selected_tokens_s1": 92.6875, |
| "step": 4270, |
| "tokens_trained": 13.990673272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2141843971631205, |
| "grad_norm": 0.2225026935338974, |
| "loss": 1.1536, |
| "loss_ce": 1.060550570487976, |
| "loss_region": 0.059843163937330246, |
| "loss_total": 1.1203937530517578, |
| "lr": 0.0010709803906942862, |
| "router/selected_tokens_s0": 1115.375, |
| "router/selected_tokens_s1": 97.375, |
| "step": 4280, |
| "tokens_trained": 14.023438712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2170212765957447, |
| "grad_norm": 0.20924420654773712, |
| "loss": 1.16, |
| "loss_ce": 1.0435292720794678, |
| "loss_region": 0.05888236686587334, |
| "loss_total": 1.1024116277694702, |
| "lr": 0.0010705734987104352, |
| "router/selected_tokens_s0": 1131.125, |
| "router/selected_tokens_s1": 91.0625, |
| "step": 4290, |
| "tokens_trained": 14.056204152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2198581560283688, |
| "grad_norm": 0.2678433656692505, |
| "loss": 1.1598, |
| "loss_ce": 1.075086236000061, |
| "loss_region": 0.06008180230855942, |
| "loss_total": 1.1351680755615234, |
| "lr": 0.0010701666067265841, |
| "router/selected_tokens_s0": 1239.25, |
| "router/selected_tokens_s1": 107.4375, |
| "step": 4300, |
| "tokens_trained": 14.088969592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.222695035460993, |
| "grad_norm": 0.1258992999792099, |
| "loss": 1.1546, |
| "loss_ce": 1.0750923156738281, |
| "loss_region": 0.05863800272345543, |
| "loss_total": 1.1337302923202515, |
| "lr": 0.001069759714742733, |
| "router/selected_tokens_s0": 1133.375, |
| "router/selected_tokens_s1": 89.625, |
| "step": 4310, |
| "tokens_trained": 14.121735032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.225531914893617, |
| "grad_norm": 0.2875233292579651, |
| "loss": 1.1516, |
| "loss_ce": 1.1637287139892578, |
| "loss_region": 0.05875501036643982, |
| "loss_total": 1.22248375415802, |
| "lr": 0.001069352822758882, |
| "router/selected_tokens_s0": 1170.875, |
| "router/selected_tokens_s1": 92.9375, |
| "step": 4320, |
| "tokens_trained": 14.154499672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2283687943262411, |
| "grad_norm": 0.2648707926273346, |
| "loss": 1.156, |
| "loss_ce": 1.1075119972229004, |
| "loss_region": 0.062029022723436356, |
| "loss_total": 1.1695410013198853, |
| "lr": 0.001068945930775031, |
| "router/selected_tokens_s0": 1142.9375, |
| "router/selected_tokens_s1": 116.0, |
| "step": 4330, |
| "tokens_trained": 14.187265112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2312056737588652, |
| "grad_norm": 0.1432296186685562, |
| "loss": 1.1563, |
| "loss_ce": 1.0992697477340698, |
| "loss_region": 0.05967455357313156, |
| "loss_total": 1.1589442491531372, |
| "lr": 0.00106853903879118, |
| "router/selected_tokens_s0": 1193.75, |
| "router/selected_tokens_s1": 100.875, |
| "step": 4340, |
| "tokens_trained": 14.220030552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2340425531914894, |
| "grad_norm": 0.3481175899505615, |
| "loss": 1.1505, |
| "loss_ce": 1.050426721572876, |
| "loss_region": 0.060125932097435, |
| "loss_total": 1.1105526685714722, |
| "lr": 0.001068132146807329, |
| "router/selected_tokens_s0": 1168.6875, |
| "router/selected_tokens_s1": 103.3125, |
| "step": 4350, |
| "tokens_trained": 14.252795992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2368794326241135, |
| "grad_norm": 0.4038959741592407, |
| "loss": 1.1534, |
| "loss_ce": 1.0327410697937012, |
| "loss_region": 0.05936980992555618, |
| "loss_total": 1.0921108722686768, |
| "lr": 0.001067725254823478, |
| "router/selected_tokens_s0": 1323.1875, |
| "router/selected_tokens_s1": 103.6875, |
| "step": 4360, |
| "tokens_trained": 14.285561432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2397163120567376, |
| "grad_norm": 0.2256237119436264, |
| "loss": 1.155, |
| "loss_ce": 1.1195412874221802, |
| "loss_region": 0.059909120202064514, |
| "loss_total": 1.1794503927230835, |
| "lr": 0.0010673183628396269, |
| "router/selected_tokens_s0": 1134.25, |
| "router/selected_tokens_s1": 99.75, |
| "step": 4370, |
| "tokens_trained": 14.318326792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2425531914893617, |
| "grad_norm": 0.29938000440597534, |
| "loss": 1.1542, |
| "loss_ce": 1.0233904123306274, |
| "loss_region": 0.060831498354673386, |
| "loss_total": 1.0842219591140747, |
| "lr": 0.0010669114708557758, |
| "router/selected_tokens_s0": 1160.1875, |
| "router/selected_tokens_s1": 107.8125, |
| "step": 4380, |
| "tokens_trained": 14.351092216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2453900709219858, |
| "grad_norm": 0.13847291469573975, |
| "loss": 1.1538, |
| "loss_ce": 1.1036982536315918, |
| "loss_region": 0.060592759400606155, |
| "loss_total": 1.1642910242080688, |
| "lr": 0.0010665045788719248, |
| "router/selected_tokens_s0": 1177.9375, |
| "router/selected_tokens_s1": 108.25, |
| "step": 4390, |
| "tokens_trained": 14.383857656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.24822695035461, |
| "grad_norm": 0.23555094003677368, |
| "loss": 1.1552, |
| "loss_ce": 1.0954946279525757, |
| "loss_region": 0.058905620127916336, |
| "loss_total": 1.1544002294540405, |
| "lr": 0.0010660976868880738, |
| "router/selected_tokens_s0": 1160.3125, |
| "router/selected_tokens_s1": 93.0, |
| "step": 4400, |
| "tokens_trained": 14.416623096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.251063829787234, |
| "grad_norm": 0.3022359609603882, |
| "loss": 1.1594, |
| "loss_ce": 0.9607714414596558, |
| "loss_region": 0.05570393428206444, |
| "loss_total": 1.0164753198623657, |
| "lr": 0.0010656907949042227, |
| "router/selected_tokens_s0": 1184.0, |
| "router/selected_tokens_s1": 67.9375, |
| "step": 4410, |
| "tokens_trained": 14.449387936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2539007092198582, |
| "grad_norm": 0.2786804437637329, |
| "loss": 1.1606, |
| "loss_ce": 1.0460023880004883, |
| "loss_region": 0.06192931532859802, |
| "loss_total": 1.1079317331314087, |
| "lr": 0.0010652839029203717, |
| "router/selected_tokens_s0": 1173.0625, |
| "router/selected_tokens_s1": 117.5, |
| "step": 4420, |
| "tokens_trained": 14.48214904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2567375886524823, |
| "grad_norm": 0.3700876533985138, |
| "loss": 1.1532, |
| "loss_ce": 1.1097447872161865, |
| "loss_region": 0.05792572721838951, |
| "loss_total": 1.167670488357544, |
| "lr": 0.0010648770109365207, |
| "router/selected_tokens_s0": 1125.5, |
| "router/selected_tokens_s1": 83.5625, |
| "step": 4430, |
| "tokens_trained": 14.51491448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2595744680851064, |
| "grad_norm": 0.2731934189796448, |
| "loss": 1.1575, |
| "loss_ce": 1.1346114873886108, |
| "loss_region": 0.061083775013685226, |
| "loss_total": 1.1956952810287476, |
| "lr": 0.0010644701189526696, |
| "router/selected_tokens_s0": 1189.125, |
| "router/selected_tokens_s1": 112.0625, |
| "step": 4440, |
| "tokens_trained": 14.54767992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2624113475177305, |
| "grad_norm": 0.36961379647254944, |
| "loss": 1.1538, |
| "loss_ce": 1.0246363878250122, |
| "loss_region": 0.05892600491642952, |
| "loss_total": 1.0835623741149902, |
| "lr": 0.0010640632269688186, |
| "router/selected_tokens_s0": 1223.9375, |
| "router/selected_tokens_s1": 95.8125, |
| "step": 4450, |
| "tokens_trained": 14.58044536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2652482269503547, |
| "grad_norm": 0.14416196942329407, |
| "loss": 1.1549, |
| "loss_ce": 1.117681622505188, |
| "loss_region": 0.06037626788020134, |
| "loss_total": 1.1780579090118408, |
| "lr": 0.0010636563349849678, |
| "router/selected_tokens_s0": 1238.3125, |
| "router/selected_tokens_s1": 109.9375, |
| "step": 4460, |
| "tokens_trained": 14.613210784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2680851063829788, |
| "grad_norm": 0.4883098304271698, |
| "loss": 1.1595, |
| "loss_ce": 1.0619388818740845, |
| "loss_region": 0.061121564358472824, |
| "loss_total": 1.1230604648590088, |
| "lr": 0.0010632494430011167, |
| "router/selected_tokens_s0": 1183.125, |
| "router/selected_tokens_s1": 112.5625, |
| "step": 4470, |
| "tokens_trained": 14.645976224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.270921985815603, |
| "grad_norm": 0.1840326339006424, |
| "loss": 1.1592, |
| "loss_ce": 1.0874097347259521, |
| "loss_region": 0.05963713675737381, |
| "loss_total": 1.1470469236373901, |
| "lr": 0.0010628425510172657, |
| "router/selected_tokens_s0": 1231.875, |
| "router/selected_tokens_s1": 103.1875, |
| "step": 4480, |
| "tokens_trained": 14.678741664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.273758865248227, |
| "grad_norm": 0.20033717155456543, |
| "loss": 1.159, |
| "loss_ce": 1.1302992105484009, |
| "loss_region": 0.05996955186128616, |
| "loss_total": 1.1902687549591064, |
| "lr": 0.0010624356590334145, |
| "router/selected_tokens_s0": 1110.875, |
| "router/selected_tokens_s1": 98.25, |
| "step": 4490, |
| "tokens_trained": 14.711504648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2765957446808511, |
| "grad_norm": 0.2271454632282257, |
| "loss": 1.1473, |
| "loss_ce": 1.1464464664459229, |
| "loss_region": 0.06053486838936806, |
| "loss_total": 1.2069813013076782, |
| "lr": 0.0010620287670495634, |
| "router/selected_tokens_s0": 1056.5, |
| "router/selected_tokens_s1": 98.4375, |
| "step": 4500, |
| "tokens_trained": 14.744270088 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "eval_ppl": 2.9954127013498857, |
| "eval_runtime": 2.0087, |
| "step": 4500, |
| "tokens_trained": 14.744270088 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "eval_F": 0.008274678388596935, |
| "eval_F_cds": 0.0066031803072500225, |
| "eval_F_dig": 0.014141729822642394, |
| "eval_F_exon": 0.008004783843655291, |
| "eval_F_intron": 0.007904768622592151, |
| "eval_F_nig": 0.009468013519949251, |
| "eval_F_promoter": 0.00811172172741246, |
| "eval_F_utr": 0.007279820082923866, |
| "eval_G": 0.04156769006890505, |
| "eval_G_cds": 0.035999211801500314, |
| "eval_G_dig": 0.05599459369384327, |
| "eval_G_exon": 0.04039431247536728, |
| "eval_G_intron": 0.04149980615442974, |
| "eval_G_nig": 0.04239440492074479, |
| "eval_G_promoter": 0.04131640431714937, |
| "eval_G_utr": 0.03994073071993477, |
| "eval_avg_bp_per_token": 120.85061836095859, |
| "eval_bp_per_token/cds": 151.4421768707483, |
| "eval_bp_per_token/dig": 70.71270718232044, |
| "eval_bp_per_token/exon": 124.92529711375212, |
| "eval_bp_per_token/intron": 126.50591658583899, |
| "eval_bp_per_token/nig": 105.61877609204765, |
| "eval_bp_per_token/promoter": 123.27839065541856, |
| "eval_bp_per_token/utr": 137.36603221083456, |
| "eval_ppl_cds": 3.7043501190815795, |
| "eval_ppl_dig": 1.0892991213759757, |
| "eval_ppl_exon": 3.23873666990306, |
| "eval_ppl_intron": 3.016729298210796, |
| "eval_ppl_nig": 2.8426754714039, |
| "eval_ppl_promoter": 3.277698950952891, |
| "eval_ppl_utr": 3.3800323407396746, |
| "step": 4500, |
| "tokens_trained": 14.744270088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2794326241134752, |
| "grad_norm": 0.07886938750743866, |
| "loss": 1.1559, |
| "loss_ce": 1.0758130550384521, |
| "loss_region": 0.06231748312711716, |
| "loss_total": 1.13813054561615, |
| "lr": 0.0010616218750657124, |
| "router/selected_tokens_s0": 1096.1875, |
| "router/selected_tokens_s1": 114.75, |
| "step": 4510, |
| "tokens_trained": 14.777033928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2822695035460994, |
| "grad_norm": 0.5984835624694824, |
| "loss": 1.1483, |
| "loss_ce": 1.105970025062561, |
| "loss_region": 0.05724368616938591, |
| "loss_total": 1.1632137298583984, |
| "lr": 0.0010612149830818616, |
| "router/selected_tokens_s0": 1149.875, |
| "router/selected_tokens_s1": 79.625, |
| "step": 4520, |
| "tokens_trained": 14.809799368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2851063829787235, |
| "grad_norm": 0.4306280016899109, |
| "loss": 1.1542, |
| "loss_ce": 1.1445868015289307, |
| "loss_region": 0.06630415469408035, |
| "loss_total": 1.2108910083770752, |
| "lr": 0.0010608080910980105, |
| "router/selected_tokens_s0": 1140.625, |
| "router/selected_tokens_s1": 148.4375, |
| "step": 4530, |
| "tokens_trained": 14.842564808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2879432624113476, |
| "grad_norm": 0.1167333573102951, |
| "loss": 1.16, |
| "loss_ce": 1.031152367591858, |
| "loss_region": 0.060010798275470734, |
| "loss_total": 1.091163158416748, |
| "lr": 0.0010604011991141595, |
| "router/selected_tokens_s0": 1240.1875, |
| "router/selected_tokens_s1": 106.9375, |
| "step": 4540, |
| "tokens_trained": 14.875330248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2907801418439715, |
| "grad_norm": 0.5103814005851746, |
| "loss": 1.1598, |
| "loss_ce": 1.1224370002746582, |
| "loss_region": 0.05744566395878792, |
| "loss_total": 1.1798826456069946, |
| "lr": 0.0010599943071303085, |
| "router/selected_tokens_s0": 1076.0, |
| "router/selected_tokens_s1": 77.125, |
| "step": 4550, |
| "tokens_trained": 14.908095688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2936170212765958, |
| "grad_norm": 0.12912671267986298, |
| "loss": 1.1535, |
| "loss_ce": 1.1043033599853516, |
| "loss_region": 0.06229260936379433, |
| "loss_total": 1.1665959358215332, |
| "lr": 0.0010595874151464574, |
| "router/selected_tokens_s0": 1132.125, |
| "router/selected_tokens_s1": 117.3125, |
| "step": 4560, |
| "tokens_trained": 14.940861128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2964539007092197, |
| "grad_norm": 0.25441470742225647, |
| "loss": 1.1488, |
| "loss_ce": 0.9986916184425354, |
| "loss_region": 0.062223441898822784, |
| "loss_total": 1.0609151124954224, |
| "lr": 0.0010591805231626064, |
| "router/selected_tokens_s0": 1216.125, |
| "router/selected_tokens_s1": 121.75, |
| "step": 4570, |
| "tokens_trained": 14.973625768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.299290780141844, |
| "grad_norm": 0.2726980745792389, |
| "loss": 1.1528, |
| "loss_ce": 1.057322382926941, |
| "loss_region": 0.058404162526130676, |
| "loss_total": 1.1157265901565552, |
| "lr": 0.0010587736311787554, |
| "router/selected_tokens_s0": 1172.5625, |
| "router/selected_tokens_s1": 90.375, |
| "step": 4580, |
| "tokens_trained": 15.006387136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.302127659574468, |
| "grad_norm": 0.31760281324386597, |
| "loss": 1.1548, |
| "loss_ce": 1.113600730895996, |
| "loss_region": 0.058456115424633026, |
| "loss_total": 1.172056794166565, |
| "lr": 0.0010583667391949043, |
| "router/selected_tokens_s0": 1064.375, |
| "router/selected_tokens_s1": 83.8125, |
| "step": 4590, |
| "tokens_trained": 15.039152576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3049645390070923, |
| "grad_norm": 0.1993570178747177, |
| "loss": 1.1595, |
| "loss_ce": 1.0727800130844116, |
| "loss_region": 0.05827302485704422, |
| "loss_total": 1.13105309009552, |
| "lr": 0.0010579598472110533, |
| "router/selected_tokens_s0": 1154.6875, |
| "router/selected_tokens_s1": 88.0, |
| "step": 4600, |
| "tokens_trained": 15.071918016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3078014184397162, |
| "grad_norm": 0.16275088489055634, |
| "loss": 1.1532, |
| "loss_ce": 1.1068168878555298, |
| "loss_region": 0.060309939086437225, |
| "loss_total": 1.1671267747879028, |
| "lr": 0.0010575529552272023, |
| "router/selected_tokens_s0": 1102.875, |
| "router/selected_tokens_s1": 100.75, |
| "step": 4610, |
| "tokens_trained": 15.104683456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3106382978723405, |
| "grad_norm": 0.2760368585586548, |
| "loss": 1.156, |
| "loss_ce": 1.1134077310562134, |
| "loss_region": 0.06077075004577637, |
| "loss_total": 1.1741784811019897, |
| "lr": 0.0010571460632433512, |
| "router/selected_tokens_s0": 1141.5625, |
| "router/selected_tokens_s1": 106.25, |
| "step": 4620, |
| "tokens_trained": 15.137448096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3134751773049644, |
| "grad_norm": 0.0931464284658432, |
| "loss": 1.1559, |
| "loss_ce": 1.072306752204895, |
| "loss_region": 0.059092313051223755, |
| "loss_total": 1.1313990354537964, |
| "lr": 0.0010567391712595002, |
| "router/selected_tokens_s0": 1171.75, |
| "router/selected_tokens_s1": 95.125, |
| "step": 4630, |
| "tokens_trained": 15.170210824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3163120567375888, |
| "grad_norm": 0.16534562408924103, |
| "loss": 1.1556, |
| "loss_ce": 1.1088744401931763, |
| "loss_region": 0.059825487434864044, |
| "loss_total": 1.1686999797821045, |
| "lr": 0.0010563322792756492, |
| "router/selected_tokens_s0": 1191.25, |
| "router/selected_tokens_s1": 102.6875, |
| "step": 4640, |
| "tokens_trained": 15.202976264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3191489361702127, |
| "grad_norm": 0.4237407445907593, |
| "loss": 1.1544, |
| "loss_ce": 1.0543153285980225, |
| "loss_region": 0.058653030544519424, |
| "loss_total": 1.1129683256149292, |
| "lr": 0.0010559253872917981, |
| "router/selected_tokens_s0": 1030.9375, |
| "router/selected_tokens_s1": 83.25, |
| "step": 4650, |
| "tokens_trained": 15.235741704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.321985815602837, |
| "grad_norm": 0.17439590394496918, |
| "loss": 1.1521, |
| "loss_ce": 1.1104930639266968, |
| "loss_region": 0.05929058790206909, |
| "loss_total": 1.169783592224121, |
| "lr": 0.001055518495307947, |
| "router/selected_tokens_s0": 1081.25, |
| "router/selected_tokens_s1": 91.1875, |
| "step": 4660, |
| "tokens_trained": 15.268507144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.324822695035461, |
| "grad_norm": 0.3554675877094269, |
| "loss": 1.1561, |
| "loss_ce": 1.1227957010269165, |
| "loss_region": 0.059759471565485, |
| "loss_total": 1.1825551986694336, |
| "lr": 0.001055111603324096, |
| "router/selected_tokens_s0": 1069.25, |
| "router/selected_tokens_s1": 94.1875, |
| "step": 4670, |
| "tokens_trained": 15.30127256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.327659574468085, |
| "grad_norm": 0.23464754223823547, |
| "loss": 1.1521, |
| "loss_ce": 1.1011435985565186, |
| "loss_region": 0.05946500226855278, |
| "loss_total": 1.1606086492538452, |
| "lr": 0.001054704711340245, |
| "router/selected_tokens_s0": 1173.1875, |
| "router/selected_tokens_s1": 97.4375, |
| "step": 4680, |
| "tokens_trained": 15.334037984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3304964539007091, |
| "grad_norm": 0.3527086675167084, |
| "loss": 1.1585, |
| "loss_ce": 1.0524513721466064, |
| "loss_region": 0.060822706669569016, |
| "loss_total": 1.113274097442627, |
| "lr": 0.001054297819356394, |
| "router/selected_tokens_s0": 1101.25, |
| "router/selected_tokens_s1": 103.0625, |
| "step": 4690, |
| "tokens_trained": 15.366803424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.19632120430469513, |
| "loss": 1.151, |
| "loss_ce": 1.102612853050232, |
| "loss_region": 0.06399323046207428, |
| "loss_total": 1.166606068611145, |
| "lr": 0.0010538909273725432, |
| "router/selected_tokens_s0": 1176.5625, |
| "router/selected_tokens_s1": 134.375, |
| "step": 4700, |
| "tokens_trained": 15.399568864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3361702127659574, |
| "grad_norm": 0.2851898670196533, |
| "loss": 1.151, |
| "loss_ce": 1.15898597240448, |
| "loss_region": 0.0606791190803051, |
| "loss_total": 1.2196650505065918, |
| "lr": 0.0010534840353886921, |
| "router/selected_tokens_s0": 1198.0625, |
| "router/selected_tokens_s1": 109.25, |
| "step": 4710, |
| "tokens_trained": 15.432334288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3390070921985815, |
| "grad_norm": 0.1474008411169052, |
| "loss": 1.1528, |
| "loss_ce": 0.9653110504150391, |
| "loss_region": 0.05874474346637726, |
| "loss_total": 1.0240558385849, |
| "lr": 0.001053077143404841, |
| "router/selected_tokens_s0": 1149.625, |
| "router/selected_tokens_s1": 91.8125, |
| "step": 4720, |
| "tokens_trained": 15.465099728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3418439716312056, |
| "grad_norm": 0.31901198625564575, |
| "loss": 1.15, |
| "loss_ce": 1.0836107730865479, |
| "loss_region": 0.058840882033109665, |
| "loss_total": 1.1424516439437866, |
| "lr": 0.0010526702514209898, |
| "router/selected_tokens_s0": 1199.3125, |
| "router/selected_tokens_s1": 94.9375, |
| "step": 4730, |
| "tokens_trained": 15.497864368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3446808510638297, |
| "grad_norm": 0.2621752917766571, |
| "loss": 1.1487, |
| "loss_ce": 1.131207823753357, |
| "loss_region": 0.06052137166261673, |
| "loss_total": 1.191729187965393, |
| "lr": 0.0010522633594371388, |
| "router/selected_tokens_s0": 1059.5, |
| "router/selected_tokens_s1": 98.25, |
| "step": 4740, |
| "tokens_trained": 15.530629808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3475177304964538, |
| "grad_norm": 0.37631991505622864, |
| "loss": 1.151, |
| "loss_ce": 1.1528443098068237, |
| "loss_region": 0.06003797799348831, |
| "loss_total": 1.2128822803497314, |
| "lr": 0.0010518564674532878, |
| "router/selected_tokens_s0": 1172.875, |
| "router/selected_tokens_s1": 102.75, |
| "step": 4750, |
| "tokens_trained": 15.563395248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.350354609929078, |
| "grad_norm": 0.251646488904953, |
| "loss": 1.1564, |
| "loss_ce": 1.126339077949524, |
| "loss_region": 0.06007569283246994, |
| "loss_total": 1.1864147186279297, |
| "lr": 0.0010514495754694367, |
| "router/selected_tokens_s0": 1109.3125, |
| "router/selected_tokens_s1": 99.0625, |
| "step": 4760, |
| "tokens_trained": 15.596158264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.353191489361702, |
| "grad_norm": 0.38772666454315186, |
| "loss": 1.1487, |
| "loss_ce": 1.1305615901947021, |
| "loss_region": 0.06020316854119301, |
| "loss_total": 1.1907647848129272, |
| "lr": 0.001051042683485586, |
| "router/selected_tokens_s0": 1102.625, |
| "router/selected_tokens_s1": 99.3125, |
| "step": 4770, |
| "tokens_trained": 15.628923704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3560283687943262, |
| "grad_norm": 0.245202898979187, |
| "loss": 1.1532, |
| "loss_ce": 1.166459321975708, |
| "loss_region": 0.05978211760520935, |
| "loss_total": 1.2262414693832397, |
| "lr": 0.0010506357915017349, |
| "router/selected_tokens_s0": 1139.75, |
| "router/selected_tokens_s1": 98.4375, |
| "step": 4780, |
| "tokens_trained": 15.661689144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3588652482269503, |
| "grad_norm": 0.4050356149673462, |
| "loss": 1.1557, |
| "loss_ce": 1.081703782081604, |
| "loss_region": 0.060910604894161224, |
| "loss_total": 1.1426143646240234, |
| "lr": 0.0010502288995178838, |
| "router/selected_tokens_s0": 1165.0, |
| "router/selected_tokens_s1": 109.375, |
| "step": 4790, |
| "tokens_trained": 15.694454584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3617021276595744, |
| "grad_norm": 0.14703965187072754, |
| "loss": 1.1565, |
| "loss_ce": 1.0962260961532593, |
| "loss_region": 0.0606972873210907, |
| "loss_total": 1.1569234132766724, |
| "lr": 0.0010498220075340328, |
| "router/selected_tokens_s0": 1093.625, |
| "router/selected_tokens_s1": 102.75, |
| "step": 4800, |
| "tokens_trained": 15.727220024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3645390070921986, |
| "grad_norm": 0.5923725962638855, |
| "loss": 1.1497, |
| "loss_ce": 1.1643304824829102, |
| "loss_region": 0.05974184721708298, |
| "loss_total": 1.2240723371505737, |
| "lr": 0.0010494151155501818, |
| "router/selected_tokens_s0": 1086.3125, |
| "router/selected_tokens_s1": 94.1875, |
| "step": 4810, |
| "tokens_trained": 15.759985464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3673758865248227, |
| "grad_norm": 0.08134971559047699, |
| "loss": 1.1468, |
| "loss_ce": 1.1429718732833862, |
| "loss_region": 0.06080538406968117, |
| "loss_total": 1.2037773132324219, |
| "lr": 0.0010490082235663307, |
| "router/selected_tokens_s0": 1200.8125, |
| "router/selected_tokens_s1": 110.4375, |
| "step": 4820, |
| "tokens_trained": 15.792750904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3702127659574468, |
| "grad_norm": 0.32185447216033936, |
| "loss": 1.1552, |
| "loss_ce": 1.062546730041504, |
| "loss_region": 0.06265987455844879, |
| "loss_total": 1.1252065896987915, |
| "lr": 0.0010486013315824797, |
| "router/selected_tokens_s0": 1130.375, |
| "router/selected_tokens_s1": 122.375, |
| "step": 4830, |
| "tokens_trained": 15.825515544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.373049645390071, |
| "grad_norm": 0.19900231063365936, |
| "loss": 1.1491, |
| "loss_ce": 1.0900709629058838, |
| "loss_region": 0.06174982339143753, |
| "loss_total": 1.1518207788467407, |
| "lr": 0.0010481944395986287, |
| "router/selected_tokens_s0": 1148.875, |
| "router/selected_tokens_s1": 114.8125, |
| "step": 4840, |
| "tokens_trained": 15.858280984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.375886524822695, |
| "grad_norm": 0.20787717401981354, |
| "loss": 1.1468, |
| "loss_ce": 1.1013059616088867, |
| "loss_region": 0.05997852236032486, |
| "loss_total": 1.1612844467163086, |
| "lr": 0.0010477875476147776, |
| "router/selected_tokens_s0": 1236.5625, |
| "router/selected_tokens_s1": 106.375, |
| "step": 4850, |
| "tokens_trained": 15.891045624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3787234042553191, |
| "grad_norm": 0.09310303628444672, |
| "loss": 1.1474, |
| "loss_ce": 1.0274229049682617, |
| "loss_region": 0.05929532274603844, |
| "loss_total": 1.086718201637268, |
| "lr": 0.0010473806556309266, |
| "router/selected_tokens_s0": 1027.0, |
| "router/selected_tokens_s1": 88.0, |
| "step": 4860, |
| "tokens_trained": 15.923810264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3815602836879433, |
| "grad_norm": 0.2037133127450943, |
| "loss": 1.1491, |
| "loss_ce": 1.0613118410110474, |
| "loss_region": 0.058327823877334595, |
| "loss_total": 1.1196396350860596, |
| "lr": 0.0010469737636470756, |
| "router/selected_tokens_s0": 1138.3125, |
| "router/selected_tokens_s1": 87.4375, |
| "step": 4870, |
| "tokens_trained": 15.956575704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3843971631205674, |
| "grad_norm": 0.5275424122810364, |
| "loss": 1.1516, |
| "loss_ce": 1.0648127794265747, |
| "loss_region": 0.0595739372074604, |
| "loss_total": 1.1243866682052612, |
| "lr": 0.0010465668716632245, |
| "router/selected_tokens_s0": 1167.5625, |
| "router/selected_tokens_s1": 98.625, |
| "step": 4880, |
| "tokens_trained": 15.989339352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3872340425531915, |
| "grad_norm": 0.11888913810253143, |
| "loss": 1.1478, |
| "loss_ce": 1.0950626134872437, |
| "loss_region": 0.061253614723682404, |
| "loss_total": 1.1563162803649902, |
| "lr": 0.0010461599796793735, |
| "router/selected_tokens_s0": 1192.125, |
| "router/selected_tokens_s1": 114.375, |
| "step": 4890, |
| "tokens_trained": 16.022103992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3900709219858156, |
| "grad_norm": 0.3772846460342407, |
| "loss": 1.1548, |
| "loss_ce": 1.051941156387329, |
| "loss_region": 0.06041061133146286, |
| "loss_total": 1.1123517751693726, |
| "lr": 0.0010457530876955225, |
| "router/selected_tokens_s0": 1167.25, |
| "router/selected_tokens_s1": 105.8125, |
| "step": 4900, |
| "tokens_trained": 16.054869432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3929078014184397, |
| "grad_norm": 0.12513701617717743, |
| "loss": 1.1473, |
| "loss_ce": 1.0917131900787354, |
| "loss_region": 0.05870034545660019, |
| "loss_total": 1.1504135131835938, |
| "lr": 0.0010453461957116714, |
| "router/selected_tokens_s0": 1071.0625, |
| "router/selected_tokens_s1": 86.3125, |
| "step": 4910, |
| "tokens_trained": 16.087634872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3957446808510638, |
| "grad_norm": 0.12635989487171173, |
| "loss": 1.1531, |
| "loss_ce": 1.0633745193481445, |
| "loss_region": 0.05978614464402199, |
| "loss_total": 1.123160719871521, |
| "lr": 0.0010449393037278204, |
| "router/selected_tokens_s0": 1174.75, |
| "router/selected_tokens_s1": 101.1875, |
| "step": 4920, |
| "tokens_trained": 16.120400312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.398581560283688, |
| "grad_norm": 0.23901410400867462, |
| "loss": 1.1491, |
| "loss_ce": 1.1176151037216187, |
| "loss_region": 0.060138314962387085, |
| "loss_total": 1.1777534484863281, |
| "lr": 0.0010445324117439694, |
| "router/selected_tokens_s0": 1158.0, |
| "router/selected_tokens_s1": 102.875, |
| "step": 4930, |
| "tokens_trained": 16.153164952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.401418439716312, |
| "grad_norm": 0.2883402407169342, |
| "loss": 1.1479, |
| "loss_ce": 1.0584120750427246, |
| "loss_region": 0.0600699856877327, |
| "loss_total": 1.1184821128845215, |
| "lr": 0.0010441255197601183, |
| "router/selected_tokens_s0": 1139.75, |
| "router/selected_tokens_s1": 100.75, |
| "step": 4940, |
| "tokens_trained": 16.185929592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4042553191489362, |
| "grad_norm": 0.22571124136447906, |
| "loss": 1.1499, |
| "loss_ce": 1.0989696979522705, |
| "loss_region": 0.060364458709955215, |
| "loss_total": 1.1593341827392578, |
| "lr": 0.0010437186277762675, |
| "router/selected_tokens_s0": 1116.0625, |
| "router/selected_tokens_s1": 101.9375, |
| "step": 4950, |
| "tokens_trained": 16.218695032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4070921985815603, |
| "grad_norm": 0.16999152302742004, |
| "loss": 1.1483, |
| "loss_ce": 0.9374291300773621, |
| "loss_region": 0.061014510691165924, |
| "loss_total": 0.9984436631202698, |
| "lr": 0.0010433117357924165, |
| "router/selected_tokens_s0": 1167.875, |
| "router/selected_tokens_s1": 109.6875, |
| "step": 4960, |
| "tokens_trained": 16.251457848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4099290780141844, |
| "grad_norm": 0.28172990679740906, |
| "loss": 1.149, |
| "loss_ce": 0.9092438220977783, |
| "loss_region": 0.06049266830086708, |
| "loss_total": 0.9697365164756775, |
| "lr": 0.0010429048438085654, |
| "router/selected_tokens_s0": 1190.75, |
| "router/selected_tokens_s1": 107.8125, |
| "step": 4970, |
| "tokens_trained": 16.284222488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4127659574468086, |
| "grad_norm": 0.1335156410932541, |
| "loss": 1.149, |
| "loss_ce": 1.0608166456222534, |
| "loss_region": 0.06012177839875221, |
| "loss_total": 1.1209384202957153, |
| "lr": 0.0010424979518247142, |
| "router/selected_tokens_s0": 1143.9375, |
| "router/selected_tokens_s1": 101.1875, |
| "step": 4980, |
| "tokens_trained": 16.316987928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4156028368794327, |
| "grad_norm": 0.23958002030849457, |
| "loss": 1.1491, |
| "loss_ce": 1.0881507396697998, |
| "loss_region": 0.06042102724313736, |
| "loss_total": 1.1485717296600342, |
| "lr": 0.0010420910598408631, |
| "router/selected_tokens_s0": 1250.75, |
| "router/selected_tokens_s1": 110.6875, |
| "step": 4990, |
| "tokens_trained": 16.349753368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4184397163120568, |
| "grad_norm": 0.21790561079978943, |
| "loss": 1.1451, |
| "loss_ce": 0.9417971968650818, |
| "loss_region": 0.05981254577636719, |
| "loss_total": 1.0016098022460938, |
| "lr": 0.0010416841678570121, |
| "router/selected_tokens_s0": 1176.1875, |
| "router/selected_tokens_s1": 100.5, |
| "step": 5000, |
| "tokens_trained": 16.382518808 |
| }, |
| { |
| "epoch": 1.4184397163120568, |
| "eval_ppl": 2.9786423782514446, |
| "eval_runtime": 2.0133, |
| "step": 5000, |
| "tokens_trained": 16.382518808 |
| }, |
| { |
| "epoch": 1.4184397163120568, |
| "eval_F": 0.00745578798615381, |
| "eval_F_cds": 0.0064055340939717905, |
| "eval_F_dig": 0.006016095007422455, |
| "eval_F_exon": 0.007338851062094834, |
| "eval_F_intron": 0.0075233309514243915, |
| "eval_F_nig": 0.007254536374872021, |
| "eval_F_promoter": 0.007780093908576001, |
| "eval_F_utr": 0.006906769273403609, |
| "eval_G": 0.04323960886173385, |
| "eval_G_cds": 0.038285373478348754, |
| "eval_G_dig": 0.04923907629502305, |
| "eval_G_exon": 0.04191793100460717, |
| "eval_G_intron": 0.0434455328629159, |
| "eval_G_nig": 0.04368769436312883, |
| "eval_G_promoter": 0.04268276778535126, |
| "eval_G_utr": 0.04234763705620277, |
| "eval_avg_bp_per_token": 134.1239855340718, |
| "eval_bp_per_token/cds": 156.11500701262273, |
| "eval_bp_per_token/dig": 166.2207792207792, |
| "eval_bp_per_token/exon": 136.26111111111112, |
| "eval_bp_per_token/intron": 132.91984713375797, |
| "eval_bp_per_token/nig": 137.84478405315613, |
| "eval_bp_per_token/promoter": 128.53315290933693, |
| "eval_bp_per_token/utr": 144.7854938271605, |
| "eval_ppl_cds": 3.705528766814986, |
| "eval_ppl_dig": 1.111293449006548, |
| "eval_ppl_exon": 3.22906865700876, |
| "eval_ppl_intron": 3.0000279510956616, |
| "eval_ppl_nig": 2.818634276623224, |
| "eval_ppl_promoter": 3.2703718751969526, |
| "eval_ppl_utr": 3.3763462248263023, |
| "step": 5000, |
| "tokens_trained": 16.382518808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.421276595744681, |
| "grad_norm": 0.22912006080150604, |
| "loss": 1.152, |
| "loss_ce": 1.0372974872589111, |
| "loss_region": 0.06194760277867317, |
| "loss_total": 1.0992450714111328, |
| "lr": 0.001041277275873161, |
| "router/selected_tokens_s0": 1172.375, |
| "router/selected_tokens_s1": 118.1875, |
| "step": 5010, |
| "tokens_trained": 16.415284248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.424113475177305, |
| "grad_norm": 0.1888241171836853, |
| "loss": 1.1498, |
| "loss_ce": 1.0258610248565674, |
| "loss_region": 0.05821385234594345, |
| "loss_total": 1.084074854850769, |
| "lr": 0.0010408703838893103, |
| "router/selected_tokens_s0": 1103.25, |
| "router/selected_tokens_s1": 84.0, |
| "step": 5020, |
| "tokens_trained": 16.448049688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4269503546099291, |
| "grad_norm": 0.4081474840641022, |
| "loss": 1.1433, |
| "loss_ce": 1.069995641708374, |
| "loss_region": 0.059390418231487274, |
| "loss_total": 1.129386067390442, |
| "lr": 0.0010404634919054592, |
| "router/selected_tokens_s0": 1208.75, |
| "router/selected_tokens_s1": 100.0, |
| "step": 5030, |
| "tokens_trained": 16.480814968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4297872340425533, |
| "grad_norm": 0.07389332354068756, |
| "loss": 1.1419, |
| "loss_ce": 1.0573478937149048, |
| "loss_region": 0.06128464266657829, |
| "loss_total": 1.1186325550079346, |
| "lr": 0.0010400565999216082, |
| "router/selected_tokens_s0": 1172.6875, |
| "router/selected_tokens_s1": 112.9375, |
| "step": 5040, |
| "tokens_trained": 16.513576176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4326241134751774, |
| "grad_norm": 0.17676013708114624, |
| "loss": 1.1429, |
| "loss_ce": 1.131528615951538, |
| "loss_region": 0.06025056540966034, |
| "loss_total": 1.1917791366577148, |
| "lr": 0.0010396497079377572, |
| "router/selected_tokens_s0": 1229.875, |
| "router/selected_tokens_s1": 108.375, |
| "step": 5050, |
| "tokens_trained": 16.546341616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4354609929078015, |
| "grad_norm": 0.3632008731365204, |
| "loss": 1.151, |
| "loss_ce": 1.1298102140426636, |
| "loss_region": 0.06115425005555153, |
| "loss_total": 1.1909644603729248, |
| "lr": 0.0010392428159539061, |
| "router/selected_tokens_s0": 1190.9375, |
| "router/selected_tokens_s1": 113.125, |
| "step": 5060, |
| "tokens_trained": 16.579105456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4382978723404256, |
| "grad_norm": 0.1857968270778656, |
| "loss": 1.1465, |
| "loss_ce": 1.088350534439087, |
| "loss_region": 0.06075309216976166, |
| "loss_total": 1.1491036415100098, |
| "lr": 0.001038835923970055, |
| "router/selected_tokens_s0": 1197.25, |
| "router/selected_tokens_s1": 110.3125, |
| "step": 5070, |
| "tokens_trained": 16.611870896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4411347517730497, |
| "grad_norm": 0.13964325189590454, |
| "loss": 1.1525, |
| "loss_ce": 1.1231189966201782, |
| "loss_region": 0.06125170364975929, |
| "loss_total": 1.184370756149292, |
| "lr": 0.001038429031986204, |
| "router/selected_tokens_s0": 1174.625, |
| "router/selected_tokens_s1": 112.375, |
| "step": 5080, |
| "tokens_trained": 16.644635536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4439716312056738, |
| "grad_norm": 0.16897931694984436, |
| "loss": 1.1499, |
| "loss_ce": 0.9999305605888367, |
| "loss_region": 0.06121806427836418, |
| "loss_total": 1.0611486434936523, |
| "lr": 0.001038022140002353, |
| "router/selected_tokens_s0": 1144.0625, |
| "router/selected_tokens_s1": 109.5, |
| "step": 5090, |
| "tokens_trained": 16.67739828 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4468085106382977, |
| "grad_norm": 0.186988964676857, |
| "loss": 1.1429, |
| "loss_ce": 1.0923733711242676, |
| "loss_region": 0.060399770736694336, |
| "loss_total": 1.152773141860962, |
| "lr": 0.001037615248018502, |
| "router/selected_tokens_s0": 1190.125, |
| "router/selected_tokens_s1": 106.75, |
| "step": 5100, |
| "tokens_trained": 16.71016372 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.449645390070922, |
| "grad_norm": 0.28807076811790466, |
| "loss": 1.1477, |
| "loss_ce": 1.1130632162094116, |
| "loss_region": 0.060432370752096176, |
| "loss_total": 1.1734955310821533, |
| "lr": 0.001037208356034651, |
| "router/selected_tokens_s0": 1142.375, |
| "router/selected_tokens_s1": 104.1875, |
| "step": 5110, |
| "tokens_trained": 16.74292916 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.452482269503546, |
| "grad_norm": 0.30830657482147217, |
| "loss": 1.1468, |
| "loss_ce": 1.2115405797958374, |
| "loss_region": 0.05969396233558655, |
| "loss_total": 1.2712345123291016, |
| "lr": 0.0010368014640508, |
| "router/selected_tokens_s0": 1125.125, |
| "router/selected_tokens_s1": 97.1875, |
| "step": 5120, |
| "tokens_trained": 16.775691312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4553191489361703, |
| "grad_norm": 0.21221929788589478, |
| "loss": 1.1512, |
| "loss_ce": 1.0514652729034424, |
| "loss_region": 0.06042499095201492, |
| "loss_total": 1.1118903160095215, |
| "lr": 0.0010363945720669489, |
| "router/selected_tokens_s0": 1199.625, |
| "router/selected_tokens_s1": 107.875, |
| "step": 5130, |
| "tokens_trained": 16.808456752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4581560283687942, |
| "grad_norm": 0.22854673862457275, |
| "loss": 1.1463, |
| "loss_ce": 1.0557891130447388, |
| "loss_region": 0.05936175957322121, |
| "loss_total": 1.1151509284973145, |
| "lr": 0.0010359876800830978, |
| "router/selected_tokens_s0": 1169.375, |
| "router/selected_tokens_s1": 97.5625, |
| "step": 5140, |
| "tokens_trained": 16.841222192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4609929078014185, |
| "grad_norm": 0.2368224710226059, |
| "loss": 1.1428, |
| "loss_ce": 1.0367987155914307, |
| "loss_region": 0.058588556945323944, |
| "loss_total": 1.0953872203826904, |
| "lr": 0.0010355807880992468, |
| "router/selected_tokens_s0": 1064.75, |
| "router/selected_tokens_s1": 84.625, |
| "step": 5150, |
| "tokens_trained": 16.873987632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4638297872340424, |
| "grad_norm": 0.3368516266345978, |
| "loss": 1.1402, |
| "loss_ce": 1.1748027801513672, |
| "loss_region": 0.0609104186296463, |
| "loss_total": 1.235713243484497, |
| "lr": 0.0010351738961153958, |
| "router/selected_tokens_s0": 1230.6875, |
| "router/selected_tokens_s1": 113.5, |
| "step": 5160, |
| "tokens_trained": 16.906753072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4666666666666668, |
| "grad_norm": 0.16690143942832947, |
| "loss": 1.1482, |
| "loss_ce": 1.0459595918655396, |
| "loss_region": 0.0586651973426342, |
| "loss_total": 1.1046247482299805, |
| "lr": 0.0010347670041315447, |
| "router/selected_tokens_s0": 1180.0, |
| "router/selected_tokens_s1": 91.6875, |
| "step": 5170, |
| "tokens_trained": 16.939515096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4695035460992907, |
| "grad_norm": 0.06549862027168274, |
| "loss": 1.1451, |
| "loss_ce": 1.068226933479309, |
| "loss_region": 0.05954112857580185, |
| "loss_total": 1.1277680397033691, |
| "lr": 0.0010343601121476937, |
| "router/selected_tokens_s0": 1166.0625, |
| "router/selected_tokens_s1": 98.125, |
| "step": 5180, |
| "tokens_trained": 16.972279352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.472340425531915, |
| "grad_norm": 0.11069296300411224, |
| "loss": 1.1482, |
| "loss_ce": 1.1082299947738647, |
| "loss_region": 0.06221709027886391, |
| "loss_total": 1.1704471111297607, |
| "lr": 0.0010339532201638427, |
| "router/selected_tokens_s0": 1137.9375, |
| "router/selected_tokens_s1": 117.75, |
| "step": 5190, |
| "tokens_trained": 17.005044792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.475177304964539, |
| "grad_norm": 0.19471175968647003, |
| "loss": 1.1496, |
| "loss_ce": 0.9849838614463806, |
| "loss_region": 0.06120489165186882, |
| "loss_total": 1.0461887121200562, |
| "lr": 0.0010335463281799918, |
| "router/selected_tokens_s0": 1154.8125, |
| "router/selected_tokens_s1": 110.375, |
| "step": 5200, |
| "tokens_trained": 17.037810232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4780141843971633, |
| "grad_norm": 0.2414424866437912, |
| "loss": 1.1426, |
| "loss_ce": 1.130875825881958, |
| "loss_region": 0.05976984649896622, |
| "loss_total": 1.190645694732666, |
| "lr": 0.0010331394361961408, |
| "router/selected_tokens_s0": 1123.875, |
| "router/selected_tokens_s1": 97.4375, |
| "step": 5210, |
| "tokens_trained": 17.070575672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4808510638297872, |
| "grad_norm": 0.16089095175266266, |
| "loss": 1.1384, |
| "loss_ce": 1.1152632236480713, |
| "loss_region": 0.058639250695705414, |
| "loss_total": 1.1739025115966797, |
| "lr": 0.0010327325442122898, |
| "router/selected_tokens_s0": 1069.625, |
| "router/selected_tokens_s1": 85.5625, |
| "step": 5220, |
| "tokens_trained": 17.103341112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4836879432624113, |
| "grad_norm": 0.17800533771514893, |
| "loss": 1.148, |
| "loss_ce": 1.0959079265594482, |
| "loss_region": 0.05997302010655403, |
| "loss_total": 1.1558809280395508, |
| "lr": 0.0010323256522284385, |
| "router/selected_tokens_s0": 1127.75, |
| "router/selected_tokens_s1": 99.5, |
| "step": 5230, |
| "tokens_trained": 17.136106552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4865248226950354, |
| "grad_norm": 0.22666983306407928, |
| "loss": 1.1402, |
| "loss_ce": 1.1192185878753662, |
| "loss_region": 0.06110667809844017, |
| "loss_total": 1.1803252696990967, |
| "lr": 0.0010319187602445875, |
| "router/selected_tokens_s0": 1163.9375, |
| "router/selected_tokens_s1": 111.0, |
| "step": 5240, |
| "tokens_trained": 17.168867192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4893617021276595, |
| "grad_norm": 0.25073131918907166, |
| "loss": 1.1476, |
| "loss_ce": 1.1026537418365479, |
| "loss_region": 0.060530051589012146, |
| "loss_total": 1.1631838083267212, |
| "lr": 0.0010315118682607365, |
| "router/selected_tokens_s0": 1173.875, |
| "router/selected_tokens_s1": 106.8125, |
| "step": 5250, |
| "tokens_trained": 17.201632632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4921985815602836, |
| "grad_norm": 0.0783478245139122, |
| "loss": 1.1492, |
| "loss_ce": 1.0499285459518433, |
| "loss_region": 0.059468407183885574, |
| "loss_total": 1.1093969345092773, |
| "lr": 0.0010311049762768854, |
| "router/selected_tokens_s0": 1137.625, |
| "router/selected_tokens_s1": 95.875, |
| "step": 5260, |
| "tokens_trained": 17.234398072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4950354609929077, |
| "grad_norm": 0.2490188032388687, |
| "loss": 1.1451, |
| "loss_ce": 1.102023959159851, |
| "loss_region": 0.057921264320611954, |
| "loss_total": 1.1599452495574951, |
| "lr": 0.0010306980842930346, |
| "router/selected_tokens_s0": 1199.375, |
| "router/selected_tokens_s1": 86.75, |
| "step": 5270, |
| "tokens_trained": 17.26716272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4978723404255319, |
| "grad_norm": 0.15398019552230835, |
| "loss": 1.1414, |
| "loss_ce": 1.0141432285308838, |
| "loss_region": 0.059863440692424774, |
| "loss_total": 1.0740066766738892, |
| "lr": 0.0010302911923091836, |
| "router/selected_tokens_s0": 1164.5625, |
| "router/selected_tokens_s1": 101.5625, |
| "step": 5280, |
| "tokens_trained": 17.29992816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.500709219858156, |
| "grad_norm": 0.3028818964958191, |
| "loss": 1.152, |
| "loss_ce": 1.1121013164520264, |
| "loss_region": 0.06068980693817139, |
| "loss_total": 1.1727911233901978, |
| "lr": 0.0010298843003253325, |
| "router/selected_tokens_s0": 1153.375, |
| "router/selected_tokens_s1": 106.875, |
| "step": 5290, |
| "tokens_trained": 17.3326928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.50354609929078, |
| "grad_norm": 0.23320052027702332, |
| "loss": 1.1442, |
| "loss_ce": 1.097700834274292, |
| "loss_region": 0.06065419316291809, |
| "loss_total": 1.1583549976348877, |
| "lr": 0.0010294774083414815, |
| "router/selected_tokens_s0": 1174.5625, |
| "router/selected_tokens_s1": 107.3125, |
| "step": 5300, |
| "tokens_trained": 17.36545824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5063829787234042, |
| "grad_norm": 0.18858042359352112, |
| "loss": 1.149, |
| "loss_ce": 1.0849069356918335, |
| "loss_region": 0.05857797712087631, |
| "loss_total": 1.1434849500656128, |
| "lr": 0.0010290705163576305, |
| "router/selected_tokens_s0": 1167.3125, |
| "router/selected_tokens_s1": 90.5625, |
| "step": 5310, |
| "tokens_trained": 17.39822368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5092198581560283, |
| "grad_norm": 0.07173584401607513, |
| "loss": 1.1478, |
| "loss_ce": 1.1023950576782227, |
| "loss_region": 0.05886488035321236, |
| "loss_total": 1.1612598896026611, |
| "lr": 0.0010286636243737794, |
| "router/selected_tokens_s0": 1112.6875, |
| "router/selected_tokens_s1": 89.75, |
| "step": 5320, |
| "tokens_trained": 17.43098912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5120567375886524, |
| "grad_norm": 0.2823182940483093, |
| "loss": 1.1423, |
| "loss_ce": 1.0714432001113892, |
| "loss_region": 0.05882066488265991, |
| "loss_total": 1.1302638053894043, |
| "lr": 0.0010282567323899284, |
| "router/selected_tokens_s0": 1078.875, |
| "router/selected_tokens_s1": 87.625, |
| "step": 5330, |
| "tokens_trained": 17.46375296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5148936170212766, |
| "grad_norm": 0.14734476804733276, |
| "loss": 1.1442, |
| "loss_ce": 1.1149892807006836, |
| "loss_region": 0.06177487596869469, |
| "loss_total": 1.1767641305923462, |
| "lr": 0.0010278498404060774, |
| "router/selected_tokens_s0": 1133.125, |
| "router/selected_tokens_s1": 113.9375, |
| "step": 5340, |
| "tokens_trained": 17.4965184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5177304964539007, |
| "grad_norm": 0.21337512135505676, |
| "loss": 1.1498, |
| "loss_ce": 1.0628228187561035, |
| "loss_region": 0.06133369356393814, |
| "loss_total": 1.1241564750671387, |
| "lr": 0.0010274429484222263, |
| "router/selected_tokens_s0": 1139.5, |
| "router/selected_tokens_s1": 111.3125, |
| "step": 5350, |
| "tokens_trained": 17.52928304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5205673758865248, |
| "grad_norm": 0.10551182180643082, |
| "loss": 1.147, |
| "loss_ce": 1.094645619392395, |
| "loss_region": 0.05717579647898674, |
| "loss_total": 1.1518213748931885, |
| "lr": 0.0010270360564383753, |
| "router/selected_tokens_s0": 1127.8125, |
| "router/selected_tokens_s1": 77.75, |
| "step": 5360, |
| "tokens_trained": 17.56204848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.523404255319149, |
| "grad_norm": 0.07315859943628311, |
| "loss": 1.1433, |
| "loss_ce": 1.0663042068481445, |
| "loss_region": 0.05805211514234543, |
| "loss_total": 1.1243562698364258, |
| "lr": 0.0010266291644545243, |
| "router/selected_tokens_s0": 1129.4375, |
| "router/selected_tokens_s1": 84.375, |
| "step": 5370, |
| "tokens_trained": 17.59481392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.526241134751773, |
| "grad_norm": 0.2523398697376251, |
| "loss": 1.145, |
| "loss_ce": 1.0977721214294434, |
| "loss_region": 0.06312695890665054, |
| "loss_total": 1.160899043083191, |
| "lr": 0.0010262222724706732, |
| "router/selected_tokens_s0": 1144.1875, |
| "router/selected_tokens_s1": 125.375, |
| "step": 5380, |
| "tokens_trained": 17.62757936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5290780141843971, |
| "grad_norm": 0.1691519170999527, |
| "loss": 1.1479, |
| "loss_ce": 1.086019515991211, |
| "loss_region": 0.05966857820749283, |
| "loss_total": 1.1456880569458008, |
| "lr": 0.0010258153804868222, |
| "router/selected_tokens_s0": 1151.3125, |
| "router/selected_tokens_s1": 98.3125, |
| "step": 5390, |
| "tokens_trained": 17.6603448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5319148936170213, |
| "grad_norm": 0.1503233015537262, |
| "loss": 1.1455, |
| "loss_ce": 1.0918323993682861, |
| "loss_region": 0.05891592800617218, |
| "loss_total": 1.150748372077942, |
| "lr": 0.0010254084885029712, |
| "router/selected_tokens_s0": 1151.9375, |
| "router/selected_tokens_s1": 92.5, |
| "step": 5400, |
| "tokens_trained": 17.69310944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5347517730496454, |
| "grad_norm": 0.21977928280830383, |
| "loss": 1.1464, |
| "loss_ce": 1.1513240337371826, |
| "loss_region": 0.06481648981571198, |
| "loss_total": 1.2161405086517334, |
| "lr": 0.0010250015965191201, |
| "router/selected_tokens_s0": 1131.125, |
| "router/selected_tokens_s1": 137.625, |
| "step": 5410, |
| "tokens_trained": 17.72587488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5375886524822695, |
| "grad_norm": 0.07182486355304718, |
| "loss": 1.1442, |
| "loss_ce": 1.0516289472579956, |
| "loss_region": 0.060127370059490204, |
| "loss_total": 1.1117563247680664, |
| "lr": 0.001024594704535269, |
| "router/selected_tokens_s0": 1144.5, |
| "router/selected_tokens_s1": 101.0, |
| "step": 5420, |
| "tokens_trained": 17.75863776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5404255319148936, |
| "grad_norm": 0.19044677913188934, |
| "loss": 1.1445, |
| "loss_ce": 1.1150530576705933, |
| "loss_region": 0.058283500373363495, |
| "loss_total": 1.1733365058898926, |
| "lr": 0.001024187812551418, |
| "router/selected_tokens_s0": 1191.125, |
| "router/selected_tokens_s1": 89.4375, |
| "step": 5430, |
| "tokens_trained": 17.7914032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5432624113475177, |
| "grad_norm": 0.1769535392522812, |
| "loss": 1.1404, |
| "loss_ce": 1.0554611682891846, |
| "loss_region": 0.05861309543251991, |
| "loss_total": 1.1140742301940918, |
| "lr": 0.001023780920567567, |
| "router/selected_tokens_s0": 1110.5, |
| "router/selected_tokens_s1": 87.9375, |
| "step": 5440, |
| "tokens_trained": 17.824165472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5460992907801419, |
| "grad_norm": 0.21434129774570465, |
| "loss": 1.1434, |
| "loss_ce": 1.0828922986984253, |
| "loss_region": 0.059199750423431396, |
| "loss_total": 1.142091989517212, |
| "lr": 0.0010233740285837162, |
| "router/selected_tokens_s0": 1188.8125, |
| "router/selected_tokens_s1": 97.0625, |
| "step": 5450, |
| "tokens_trained": 17.856930912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.548936170212766, |
| "grad_norm": 0.16277895867824554, |
| "loss": 1.1462, |
| "loss_ce": 1.077553153038025, |
| "loss_region": 0.05955193191766739, |
| "loss_total": 1.137105107307434, |
| "lr": 0.0010229671365998652, |
| "router/selected_tokens_s0": 1180.5, |
| "router/selected_tokens_s1": 100.125, |
| "step": 5460, |
| "tokens_trained": 17.88969364 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.55177304964539, |
| "grad_norm": 0.16868393123149872, |
| "loss": 1.1464, |
| "loss_ce": 1.1285775899887085, |
| "loss_region": 0.06097865477204323, |
| "loss_total": 1.1895562410354614, |
| "lr": 0.0010225602446160141, |
| "router/selected_tokens_s0": 1179.125, |
| "router/selected_tokens_s1": 111.5, |
| "step": 5470, |
| "tokens_trained": 17.92245908 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5546099290780142, |
| "grad_norm": 0.25248610973358154, |
| "loss": 1.1386, |
| "loss_ce": 1.1028388738632202, |
| "loss_region": 0.06022004038095474, |
| "loss_total": 1.163058876991272, |
| "lr": 0.0010221533526321629, |
| "router/selected_tokens_s0": 1154.9375, |
| "router/selected_tokens_s1": 103.25, |
| "step": 5480, |
| "tokens_trained": 17.95522452 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5574468085106383, |
| "grad_norm": 0.20562683045864105, |
| "loss": 1.1459, |
| "loss_ce": 1.1231143474578857, |
| "loss_region": 0.05964804068207741, |
| "loss_total": 1.1827623844146729, |
| "lr": 0.0010217464606483118, |
| "router/selected_tokens_s0": 1227.75, |
| "router/selected_tokens_s1": 103.25, |
| "step": 5490, |
| "tokens_trained": 17.98798996 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5602836879432624, |
| "grad_norm": 0.14120644330978394, |
| "loss": 1.1477, |
| "loss_ce": 1.1169317960739136, |
| "loss_region": 0.05971687659621239, |
| "loss_total": 1.1766486167907715, |
| "lr": 0.0010213395686644608, |
| "router/selected_tokens_s0": 1178.625, |
| "router/selected_tokens_s1": 100.9375, |
| "step": 5500, |
| "tokens_trained": 18.0207554 |
| }, |
| { |
| "epoch": 1.5602836879432624, |
| "eval_ppl": 2.9629568937224073, |
| "eval_runtime": 1.9917, |
| "step": 5500, |
| "tokens_trained": 18.0207554 |
| }, |
| { |
| "epoch": 1.5602836879432624, |
| "eval_F": 0.008238585215556923, |
| "eval_F_cds": 0.008130446500763633, |
| "eval_F_dig": 0.00812563481521994, |
| "eval_F_exon": 0.008018374308585097, |
| "eval_F_intron": 0.008284289521542986, |
| "eval_F_nig": 0.008239803640668593, |
| "eval_F_promoter": 0.008147253279430652, |
| "eval_F_utr": 0.008025921701964379, |
| "eval_G": 0.0443550707796412, |
| "eval_G_cds": 0.038759398021291885, |
| "eval_G_dig": 0.05333020096784905, |
| "eval_G_exon": 0.0435424958973784, |
| "eval_G_intron": 0.04454719544837519, |
| "eval_G_nig": 0.044568092078624715, |
| "eval_G_promoter": 0.0440533747859882, |
| "eval_G_utr": 0.043928981118299744, |
| "eval_avg_bp_per_token": 121.38006391093701, |
| "eval_bp_per_token/cds": 122.99447513812154, |
| "eval_bp_per_token/dig": 123.0673076923077, |
| "eval_bp_per_token/exon": 124.7135593220339, |
| "eval_bp_per_token/intron": 120.71041184636742, |
| "eval_bp_per_token/nig": 121.36211536211536, |
| "eval_bp_per_token/promoter": 122.7407527055403, |
| "eval_bp_per_token/utr": 124.59628154050465, |
| "eval_ppl_cds": 3.699772662007796, |
| "eval_ppl_dig": 1.1000550286613988, |
| "eval_ppl_exon": 3.211033716184067, |
| "eval_ppl_intron": 2.983289329218932, |
| "eval_ppl_nig": 2.796906691689632, |
| "eval_ppl_promoter": 3.2596719050972514, |
| "eval_ppl_utr": 3.3708124471025247, |
| "step": 5500, |
| "tokens_trained": 18.0207554 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5631205673758866, |
| "grad_norm": 0.12358555197715759, |
| "loss": 1.1408, |
| "loss_ce": 1.1074210405349731, |
| "loss_region": 0.058581963181495667, |
| "loss_total": 1.1660029888153076, |
| "lr": 0.0010209326766806098, |
| "router/selected_tokens_s0": 1055.125, |
| "router/selected_tokens_s1": 84.375, |
| "step": 5510, |
| "tokens_trained": 18.05352084 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5659574468085107, |
| "grad_norm": 0.15037629008293152, |
| "loss": 1.1411, |
| "loss_ce": 1.0207792520523071, |
| "loss_region": 0.05887831747531891, |
| "loss_total": 1.0796575546264648, |
| "lr": 0.001020525784696759, |
| "router/selected_tokens_s0": 1158.5625, |
| "router/selected_tokens_s1": 92.75, |
| "step": 5520, |
| "tokens_trained": 18.08628548 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5687943262411348, |
| "grad_norm": 0.1223883107304573, |
| "loss": 1.1397, |
| "loss_ce": 1.0867618322372437, |
| "loss_region": 0.058768656104803085, |
| "loss_total": 1.1455304622650146, |
| "lr": 0.001020118892712908, |
| "router/selected_tokens_s0": 1178.5625, |
| "router/selected_tokens_s1": 92.9375, |
| "step": 5530, |
| "tokens_trained": 18.11905092 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.571631205673759, |
| "grad_norm": 0.29459166526794434, |
| "loss": 1.1411, |
| "loss_ce": 1.0979646444320679, |
| "loss_region": 0.06032467633485794, |
| "loss_total": 1.1582893133163452, |
| "lr": 0.0010197120007290569, |
| "router/selected_tokens_s0": 1156.3125, |
| "router/selected_tokens_s1": 104.25, |
| "step": 5540, |
| "tokens_trained": 18.15181636 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.574468085106383, |
| "grad_norm": 0.13428881764411926, |
| "loss": 1.1447, |
| "loss_ce": 1.0458506345748901, |
| "loss_region": 0.059695612639188766, |
| "loss_total": 1.105546236038208, |
| "lr": 0.0010193051087452058, |
| "router/selected_tokens_s0": 1157.5, |
| "router/selected_tokens_s1": 99.1875, |
| "step": 5550, |
| "tokens_trained": 18.1845818 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5773049645390071, |
| "grad_norm": 0.16526272892951965, |
| "loss": 1.1439, |
| "loss_ce": 1.0565471649169922, |
| "loss_region": 0.05908852443099022, |
| "loss_total": 1.115635633468628, |
| "lr": 0.0010188982167613548, |
| "router/selected_tokens_s0": 1177.4375, |
| "router/selected_tokens_s1": 95.3125, |
| "step": 5560, |
| "tokens_trained": 18.2173404 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.580141843971631, |
| "grad_norm": 0.2615581452846527, |
| "loss": 1.147, |
| "loss_ce": 0.9654809832572937, |
| "loss_region": 0.061932649463415146, |
| "loss_total": 1.0274136066436768, |
| "lr": 0.0010184913247775038, |
| "router/selected_tokens_s0": 1226.0625, |
| "router/selected_tokens_s1": 120.375, |
| "step": 5570, |
| "tokens_trained": 18.25010584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5829787234042554, |
| "grad_norm": 0.2581145465373993, |
| "loss": 1.1447, |
| "loss_ce": 1.0666128396987915, |
| "loss_region": 0.06014023721218109, |
| "loss_total": 1.1267530918121338, |
| "lr": 0.0010180844327936527, |
| "router/selected_tokens_s0": 1150.25, |
| "router/selected_tokens_s1": 102.5625, |
| "step": 5580, |
| "tokens_trained": 18.28287128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5858156028368793, |
| "grad_norm": 0.13884904980659485, |
| "loss": 1.1394, |
| "loss_ce": 1.1608939170837402, |
| "loss_region": 0.05978744477033615, |
| "loss_total": 1.2206813097000122, |
| "lr": 0.0010176775408098017, |
| "router/selected_tokens_s0": 1212.5, |
| "router/selected_tokens_s1": 103.375, |
| "step": 5590, |
| "tokens_trained": 18.31563672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5886524822695036, |
| "grad_norm": 0.266236275434494, |
| "loss": 1.1399, |
| "loss_ce": 1.0968828201293945, |
| "loss_region": 0.06059946119785309, |
| "loss_total": 1.1574822664260864, |
| "lr": 0.0010172706488259507, |
| "router/selected_tokens_s0": 1098.5625, |
| "router/selected_tokens_s1": 102.25, |
| "step": 5600, |
| "tokens_trained": 18.34840216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5914893617021275, |
| "grad_norm": 0.16597892343997955, |
| "loss": 1.1417, |
| "loss_ce": 1.0729858875274658, |
| "loss_region": 0.059629738330841064, |
| "loss_total": 1.132615566253662, |
| "lr": 0.0010168637568420996, |
| "router/selected_tokens_s0": 1152.125, |
| "router/selected_tokens_s1": 98.5625, |
| "step": 5610, |
| "tokens_trained": 18.38116752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5943262411347519, |
| "grad_norm": 0.15744268894195557, |
| "loss": 1.1383, |
| "loss_ce": 0.9952933192253113, |
| "loss_region": 0.06122814118862152, |
| "loss_total": 1.0565214157104492, |
| "lr": 0.0010164568648582486, |
| "router/selected_tokens_s0": 1184.8125, |
| "router/selected_tokens_s1": 113.6875, |
| "step": 5620, |
| "tokens_trained": 18.41393296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5971631205673757, |
| "grad_norm": 0.1597251147031784, |
| "loss": 1.1399, |
| "loss_ce": 1.1098027229309082, |
| "loss_region": 0.05966440215706825, |
| "loss_total": 1.1694670915603638, |
| "lr": 0.0010160499728743976, |
| "router/selected_tokens_s0": 1083.4375, |
| "router/selected_tokens_s1": 94.0625, |
| "step": 5630, |
| "tokens_trained": 18.4466984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6, |
| "grad_norm": 0.1583746075630188, |
| "loss": 1.1391, |
| "loss_ce": 1.0872089862823486, |
| "loss_region": 0.058888886123895645, |
| "loss_total": 1.1460978984832764, |
| "lr": 0.0010156430808905465, |
| "router/selected_tokens_s0": 1156.1875, |
| "router/selected_tokens_s1": 92.3125, |
| "step": 5640, |
| "tokens_trained": 18.47946384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.602836879432624, |
| "grad_norm": 0.1457541137933731, |
| "loss": 1.1407, |
| "loss_ce": 1.0596680641174316, |
| "loss_region": 0.059955596923828125, |
| "loss_total": 1.1196236610412598, |
| "lr": 0.0010152361889066955, |
| "router/selected_tokens_s0": 1146.125, |
| "router/selected_tokens_s1": 100.1875, |
| "step": 5650, |
| "tokens_trained": 18.51222928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6056737588652483, |
| "grad_norm": 0.11980196088552475, |
| "loss": 1.1417, |
| "loss_ce": 1.1236000061035156, |
| "loss_region": 0.06000308692455292, |
| "loss_total": 1.183603048324585, |
| "lr": 0.0010148292969228445, |
| "router/selected_tokens_s0": 1151.8125, |
| "router/selected_tokens_s1": 101.3125, |
| "step": 5660, |
| "tokens_trained": 18.54499392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6085106382978722, |
| "grad_norm": 0.13803839683532715, |
| "loss": 1.1414, |
| "loss_ce": 1.1239171028137207, |
| "loss_region": 0.06013445928692818, |
| "loss_total": 1.184051513671875, |
| "lr": 0.0010144224049389934, |
| "router/selected_tokens_s0": 1134.5, |
| "router/selected_tokens_s1": 101.1875, |
| "step": 5670, |
| "tokens_trained": 18.57775936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6113475177304966, |
| "grad_norm": 0.145533949136734, |
| "loss": 1.1348, |
| "loss_ce": 1.0382026433944702, |
| "loss_region": 0.06151394546031952, |
| "loss_total": 1.0997165441513062, |
| "lr": 0.0010140155129551424, |
| "router/selected_tokens_s0": 1162.4375, |
| "router/selected_tokens_s1": 114.25, |
| "step": 5680, |
| "tokens_trained": 18.6105248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6141843971631205, |
| "grad_norm": 0.16145651042461395, |
| "loss": 1.1371, |
| "loss_ce": 1.1397801637649536, |
| "loss_region": 0.05963653698563576, |
| "loss_total": 1.1994167566299438, |
| "lr": 0.0010136086209712914, |
| "router/selected_tokens_s0": 1064.5625, |
| "router/selected_tokens_s1": 92.5, |
| "step": 5690, |
| "tokens_trained": 18.64329024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6170212765957448, |
| "grad_norm": 0.23432840406894684, |
| "loss": 1.1401, |
| "loss_ce": 1.08021879196167, |
| "loss_region": 0.06039845198392868, |
| "loss_total": 1.1406172513961792, |
| "lr": 0.0010132017289874405, |
| "router/selected_tokens_s0": 1131.4375, |
| "router/selected_tokens_s1": 103.0, |
| "step": 5700, |
| "tokens_trained": 18.67605568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6198581560283687, |
| "grad_norm": 0.12430011481046677, |
| "loss": 1.142, |
| "loss_ce": 1.0135034322738647, |
| "loss_region": 0.0579909011721611, |
| "loss_total": 1.0714943408966064, |
| "lr": 0.0010127948370035895, |
| "router/selected_tokens_s0": 1141.875, |
| "router/selected_tokens_s1": 84.5, |
| "step": 5710, |
| "tokens_trained": 18.70882112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.622695035460993, |
| "grad_norm": 0.09644272178411484, |
| "loss": 1.1433, |
| "loss_ce": 1.1016589403152466, |
| "loss_region": 0.062197744846343994, |
| "loss_total": 1.1638567447662354, |
| "lr": 0.0010123879450197385, |
| "router/selected_tokens_s0": 1114.4375, |
| "router/selected_tokens_s1": 115.5, |
| "step": 5720, |
| "tokens_trained": 18.74158656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.625531914893617, |
| "grad_norm": 0.11881081759929657, |
| "loss": 1.1444, |
| "loss_ce": 1.1430163383483887, |
| "loss_region": 0.06135653704404831, |
| "loss_total": 1.2043728828430176, |
| "lr": 0.0010119810530358872, |
| "router/selected_tokens_s0": 1128.8125, |
| "router/selected_tokens_s1": 110.4375, |
| "step": 5730, |
| "tokens_trained": 18.774352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6283687943262413, |
| "grad_norm": 0.3889363706111908, |
| "loss": 1.145, |
| "loss_ce": 1.0896387100219727, |
| "loss_region": 0.05884133279323578, |
| "loss_total": 1.1484800577163696, |
| "lr": 0.0010115741610520362, |
| "router/selected_tokens_s0": 1127.0625, |
| "router/selected_tokens_s1": 90.5, |
| "step": 5740, |
| "tokens_trained": 18.80711744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6312056737588652, |
| "grad_norm": 0.14049233496189117, |
| "loss": 1.1398, |
| "loss_ce": 1.072272777557373, |
| "loss_region": 0.058674365282058716, |
| "loss_total": 1.1309471130371094, |
| "lr": 0.0010111672690681851, |
| "router/selected_tokens_s0": 1157.8125, |
| "router/selected_tokens_s1": 91.125, |
| "step": 5750, |
| "tokens_trained": 18.83988288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6340425531914895, |
| "grad_norm": 0.3472958207130432, |
| "loss": 1.1394, |
| "loss_ce": 1.0463656187057495, |
| "loss_region": 0.060423437505960464, |
| "loss_total": 1.1067891120910645, |
| "lr": 0.0010107603770843341, |
| "router/selected_tokens_s0": 1203.5, |
| "router/selected_tokens_s1": 106.75, |
| "step": 5760, |
| "tokens_trained": 18.87264832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6368794326241134, |
| "grad_norm": 0.08495907485485077, |
| "loss": 1.1408, |
| "loss_ce": 1.1229298114776611, |
| "loss_region": 0.05851162225008011, |
| "loss_total": 1.1814414262771606, |
| "lr": 0.0010103534851004833, |
| "router/selected_tokens_s0": 1148.0, |
| "router/selected_tokens_s1": 89.0625, |
| "step": 5770, |
| "tokens_trained": 18.90541376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6397163120567377, |
| "grad_norm": 0.07788603007793427, |
| "loss": 1.1404, |
| "loss_ce": 1.0718777179718018, |
| "loss_region": 0.05956518277525902, |
| "loss_total": 1.131442904472351, |
| "lr": 0.0010099465931166323, |
| "router/selected_tokens_s0": 1167.0625, |
| "router/selected_tokens_s1": 98.8125, |
| "step": 5780, |
| "tokens_trained": 18.9381792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6425531914893616, |
| "grad_norm": 0.1432577669620514, |
| "loss": 1.1341, |
| "loss_ce": 0.9698552489280701, |
| "loss_region": 0.058856260031461716, |
| "loss_total": 1.0287115573883057, |
| "lr": 0.0010095397011327812, |
| "router/selected_tokens_s0": 1131.625, |
| "router/selected_tokens_s1": 90.875, |
| "step": 5790, |
| "tokens_trained": 18.97094464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.645390070921986, |
| "grad_norm": 0.15056516230106354, |
| "loss": 1.1392, |
| "loss_ce": 1.0646241903305054, |
| "loss_region": 0.06081939488649368, |
| "loss_total": 1.1254435777664185, |
| "lr": 0.0010091328091489302, |
| "router/selected_tokens_s0": 1152.5, |
| "router/selected_tokens_s1": 107.625, |
| "step": 5800, |
| "tokens_trained": 19.00370848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6482269503546099, |
| "grad_norm": 0.22324803471565247, |
| "loss": 1.1385, |
| "loss_ce": 1.1039924621582031, |
| "loss_region": 0.0615408755838871, |
| "loss_total": 1.1655333042144775, |
| "lr": 0.0010087259171650792, |
| "router/selected_tokens_s0": 1144.4375, |
| "router/selected_tokens_s1": 113.0625, |
| "step": 5810, |
| "tokens_trained": 19.03647392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6510638297872342, |
| "grad_norm": 0.12950165569782257, |
| "loss": 1.1386, |
| "loss_ce": 1.0333588123321533, |
| "loss_region": 0.06084536015987396, |
| "loss_total": 1.0942041873931885, |
| "lr": 0.0010083190251812281, |
| "router/selected_tokens_s0": 1100.9375, |
| "router/selected_tokens_s1": 103.4375, |
| "step": 5820, |
| "tokens_trained": 19.06923936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.653900709219858, |
| "grad_norm": 0.20259711146354675, |
| "loss": 1.1362, |
| "loss_ce": 1.0697150230407715, |
| "loss_region": 0.059984240680933, |
| "loss_total": 1.1296992301940918, |
| "lr": 0.001007912133197377, |
| "router/selected_tokens_s0": 1126.5, |
| "router/selected_tokens_s1": 99.625, |
| "step": 5830, |
| "tokens_trained": 19.1020048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6567375886524822, |
| "grad_norm": 0.13350364565849304, |
| "loss": 1.1393, |
| "loss_ce": 1.0918112993240356, |
| "loss_region": 0.06063058227300644, |
| "loss_total": 1.1524418592453003, |
| "lr": 0.001007505241213526, |
| "router/selected_tokens_s0": 1143.375, |
| "router/selected_tokens_s1": 105.8125, |
| "step": 5840, |
| "tokens_trained": 19.13477024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6595744680851063, |
| "grad_norm": 0.11386194825172424, |
| "loss": 1.1365, |
| "loss_ce": 1.1211421489715576, |
| "loss_region": 0.060025155544281006, |
| "loss_total": 1.1811673641204834, |
| "lr": 0.001007098349229675, |
| "router/selected_tokens_s0": 1107.6875, |
| "router/selected_tokens_s1": 98.3125, |
| "step": 5850, |
| "tokens_trained": 19.16753568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6624113475177305, |
| "grad_norm": 0.1893109381198883, |
| "loss": 1.1351, |
| "loss_ce": 0.9979044795036316, |
| "loss_region": 0.05941358953714371, |
| "loss_total": 1.057318091392517, |
| "lr": 0.001006691457245824, |
| "router/selected_tokens_s0": 1135.0625, |
| "router/selected_tokens_s1": 95.5, |
| "step": 5860, |
| "tokens_trained": 19.20030112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6652482269503546, |
| "grad_norm": 0.12390246987342834, |
| "loss": 1.1378, |
| "loss_ce": 1.0217939615249634, |
| "loss_region": 0.060103025287389755, |
| "loss_total": 1.0818970203399658, |
| "lr": 0.001006284565261973, |
| "router/selected_tokens_s0": 1175.25, |
| "router/selected_tokens_s1": 103.875, |
| "step": 5870, |
| "tokens_trained": 19.23306656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6680851063829787, |
| "grad_norm": 0.20230291783809662, |
| "loss": 1.1366, |
| "loss_ce": 1.1150033473968506, |
| "loss_region": 0.05922327935695648, |
| "loss_total": 1.1742266416549683, |
| "lr": 0.001005877673278122, |
| "router/selected_tokens_s0": 1175.5, |
| "router/selected_tokens_s1": 96.3125, |
| "step": 5880, |
| "tokens_trained": 19.265832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6709219858156028, |
| "grad_norm": 0.14319747686386108, |
| "loss": 1.136, |
| "loss_ce": 1.08858060836792, |
| "loss_region": 0.06075606867671013, |
| "loss_total": 1.1493366956710815, |
| "lr": 0.0010054707812942709, |
| "router/selected_tokens_s0": 1206.375, |
| "router/selected_tokens_s1": 111.1875, |
| "step": 5890, |
| "tokens_trained": 19.29859664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.673758865248227, |
| "grad_norm": 0.11091051995754242, |
| "loss": 1.1405, |
| "loss_ce": 1.085661768913269, |
| "loss_region": 0.0598275251686573, |
| "loss_total": 1.1454893350601196, |
| "lr": 0.0010050638893104198, |
| "router/selected_tokens_s0": 1169.3125, |
| "router/selected_tokens_s1": 101.0, |
| "step": 5900, |
| "tokens_trained": 19.33136116 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.676595744680851, |
| "grad_norm": 0.10978376865386963, |
| "loss": 1.1381, |
| "loss_ce": 1.100111961364746, |
| "loss_region": 0.059790026396512985, |
| "loss_total": 1.1599019765853882, |
| "lr": 0.0010046569973265688, |
| "router/selected_tokens_s0": 1142.875, |
| "router/selected_tokens_s1": 98.9375, |
| "step": 5910, |
| "tokens_trained": 19.3641258 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6794326241134752, |
| "grad_norm": 0.11382731050252914, |
| "loss": 1.1353, |
| "loss_ce": 1.0841070413589478, |
| "loss_region": 0.05912192910909653, |
| "loss_total": 1.1432290077209473, |
| "lr": 0.0010042501053427178, |
| "router/selected_tokens_s0": 1135.125, |
| "router/selected_tokens_s1": 93.5, |
| "step": 5920, |
| "tokens_trained": 19.39688964 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6822695035460993, |
| "grad_norm": 0.1101735383272171, |
| "loss": 1.1405, |
| "loss_ce": 1.071196436882019, |
| "loss_region": 0.06145774573087692, |
| "loss_total": 1.1326541900634766, |
| "lr": 0.0010038432133588667, |
| "router/selected_tokens_s0": 1174.125, |
| "router/selected_tokens_s1": 114.8125, |
| "step": 5930, |
| "tokens_trained": 19.42965508 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6851063829787234, |
| "grad_norm": 0.13230785727500916, |
| "loss": 1.1363, |
| "loss_ce": 1.0845067501068115, |
| "loss_region": 0.05920430272817612, |
| "loss_total": 1.1437110900878906, |
| "lr": 0.0010034363213750157, |
| "router/selected_tokens_s0": 1170.875, |
| "router/selected_tokens_s1": 95.9375, |
| "step": 5940, |
| "tokens_trained": 19.46241972 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6879432624113475, |
| "grad_norm": 0.13463565707206726, |
| "loss": 1.1389, |
| "loss_ce": 1.1332194805145264, |
| "loss_region": 0.061460621654987335, |
| "loss_total": 1.194680094718933, |
| "lr": 0.0010030294293911649, |
| "router/selected_tokens_s0": 1147.4375, |
| "router/selected_tokens_s1": 113.125, |
| "step": 5950, |
| "tokens_trained": 19.49518516 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6907801418439716, |
| "grad_norm": 0.105684794485569, |
| "loss": 1.1375, |
| "loss_ce": 1.1408146619796753, |
| "loss_region": 0.06131913885474205, |
| "loss_total": 1.2021337747573853, |
| "lr": 0.0010026225374073139, |
| "router/selected_tokens_s0": 1148.3125, |
| "router/selected_tokens_s1": 111.9375, |
| "step": 5960, |
| "tokens_trained": 19.5279506 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6936170212765957, |
| "grad_norm": 0.1298324316740036, |
| "loss": 1.1405, |
| "loss_ce": 1.1013529300689697, |
| "loss_region": 0.05970582365989685, |
| "loss_total": 1.161058783531189, |
| "lr": 0.0010022156454234628, |
| "router/selected_tokens_s0": 1179.0625, |
| "router/selected_tokens_s1": 100.375, |
| "step": 5970, |
| "tokens_trained": 19.56071604 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6964539007092199, |
| "grad_norm": 0.12999854981899261, |
| "loss": 1.1337, |
| "loss_ce": 1.073915958404541, |
| "loss_region": 0.06164190545678139, |
| "loss_total": 1.1355578899383545, |
| "lr": 0.0010018087534396116, |
| "router/selected_tokens_s0": 1153.875, |
| "router/selected_tokens_s1": 114.8125, |
| "step": 5980, |
| "tokens_trained": 19.59348068 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.699290780141844, |
| "grad_norm": 0.12657859921455383, |
| "loss": 1.1349, |
| "loss_ce": 1.0706164836883545, |
| "loss_region": 0.06009352579712868, |
| "loss_total": 1.1307100057601929, |
| "lr": 0.0010014018614557605, |
| "router/selected_tokens_s0": 1185.6875, |
| "router/selected_tokens_s1": 104.1875, |
| "step": 5990, |
| "tokens_trained": 19.62624612 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.702127659574468, |
| "grad_norm": 0.2500196099281311, |
| "loss": 1.1404, |
| "loss_ce": 1.060509204864502, |
| "loss_region": 0.05948161706328392, |
| "loss_total": 1.1199908256530762, |
| "lr": 0.0010009949694719095, |
| "router/selected_tokens_s0": 1287.75, |
| "router/selected_tokens_s1": 104.875, |
| "step": 6000, |
| "tokens_trained": 19.659011552 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "eval_ppl": 2.9490778941081053, |
| "eval_runtime": 2.0059, |
| "step": 6000, |
| "tokens_trained": 19.659011552 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "eval_F": 0.0071978279553090085, |
| "eval_F_cds": 0.00597430599227383, |
| "eval_F_dig": 0.010078912415032424, |
| "eval_F_exon": 0.00709422269335834, |
| "eval_F_intron": 0.007232748283338803, |
| "eval_F_nig": 0.007415534059204729, |
| "eval_F_promoter": 0.006979975996462637, |
| "eval_F_utr": 0.0063312051672866415, |
| "eval_G": 0.04385200061737903, |
| "eval_G_cds": 0.03808590240656724, |
| "eval_G_dig": 0.06461251550414095, |
| "eval_G_exon": 0.04268873864346774, |
| "eval_G_intron": 0.043968640216871274, |
| "eval_G_nig": 0.044211571383191844, |
| "eval_G_promoter": 0.04331874247010744, |
| "eval_G_utr": 0.04304163230380192, |
| "eval_avg_bp_per_token": 138.93080054274085, |
| "eval_bp_per_token/cds": 167.38345864661653, |
| "eval_bp_per_token/dig": 99.21705426356588, |
| "eval_bp_per_token/exon": 140.95977011494253, |
| "eval_bp_per_token/intron": 138.26003074150634, |
| "eval_bp_per_token/nig": 134.85205408216328, |
| "eval_bp_per_token/promoter": 143.26696832579185, |
| "eval_bp_per_token/utr": 157.94781144781146, |
| "eval_ppl_cds": 3.6903768655540383, |
| "eval_ppl_dig": 1.0884380017066235, |
| "eval_ppl_exon": 3.198014478374761, |
| "eval_ppl_intron": 2.9685884822623727, |
| "eval_ppl_nig": 2.7787749476744623, |
| "eval_ppl_promoter": 3.2509216061956865, |
| "eval_ppl_utr": 3.3628005411378803, |
| "step": 6000, |
| "tokens_trained": 19.659011552 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 30600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 3000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|