| { |
| "best_global_step": 6000, |
| "best_metric": 2.998067346037042, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/HNet_BPT989_12.8K-100B/checkpoint-6000", |
| "epoch": 1.702127659574468, |
| "eval_steps": 500, |
| "global_step": 6000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0028368794326241137, |
| "grad_norm": 1630.637939453125, |
| "loss": 88.6226, |
| "loss_ce": 60.03535461425781, |
| "loss_region": 0.10551077872514725, |
| "loss_total": 60.140865325927734, |
| "lr": 2.20454076850486e-05, |
| "router/selected_tokens_s0": 919.25, |
| "router/selected_tokens_s1": 207.375, |
| "step": 10, |
| "tokens_trained": 0.03276544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.005673758865248227, |
| "grad_norm": 479.0514221191406, |
| "loss": 44.5113, |
| "loss_ce": 25.269031524658203, |
| "loss_region": 0.9648033976554871, |
| "loss_total": 26.233835220336914, |
| "lr": 4.654030511288038e-05, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 20, |
| "tokens_trained": 0.06553088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.00851063829787234, |
| "grad_norm": 338.2317810058594, |
| "loss": 14.8642, |
| "loss_ce": 7.425730228424072, |
| "loss_region": 0.9663561582565308, |
| "loss_total": 8.392086029052734, |
| "lr": 7.103520254071216e-05, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 30, |
| "tokens_trained": 0.09829632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.011347517730496455, |
| "grad_norm": 492.4618835449219, |
| "loss": 7.0085, |
| "loss_ce": 7.411436557769775, |
| "loss_region": 0.9673476815223694, |
| "loss_total": 8.3787841796875, |
| "lr": 9.553009996854394e-05, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 40, |
| "tokens_trained": 0.13106176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.014184397163120567, |
| "grad_norm": 441.222900390625, |
| "loss": 9.4864, |
| "loss_ce": 10.71827220916748, |
| "loss_region": 0.9682716727256775, |
| "loss_total": 11.686543464660645, |
| "lr": 0.00012002499739637572, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 50, |
| "tokens_trained": 0.1638272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.01702127659574468, |
| "grad_norm": 597.226806640625, |
| "loss": 15.1303, |
| "loss_ce": 20.19317626953125, |
| "loss_region": 0.968892514705658, |
| "loss_total": 21.16206932067871, |
| "lr": 0.00014451989482420748, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 60, |
| "tokens_trained": 0.19659264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.019858156028368795, |
| "grad_norm": 610.672607421875, |
| "loss": 13.1567, |
| "loss_ce": 11.587564468383789, |
| "loss_region": 0.9703285098075867, |
| "loss_total": 12.557892799377441, |
| "lr": 0.00016901479225203927, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 70, |
| "tokens_trained": 0.22935808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.02269503546099291, |
| "grad_norm": 364.7670593261719, |
| "loss": 10.966, |
| "loss_ce": 17.02631378173828, |
| "loss_region": 0.9707926511764526, |
| "loss_total": 17.997106552124023, |
| "lr": 0.00019350968967987104, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 80, |
| "tokens_trained": 0.26212272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.02553191489361702, |
| "grad_norm": 515.1493530273438, |
| "loss": 13.6219, |
| "loss_ce": 12.72504997253418, |
| "loss_region": 0.9703077673912048, |
| "loss_total": 13.695357322692871, |
| "lr": 0.0002180045871077028, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 90, |
| "tokens_trained": 0.294888136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.028368794326241134, |
| "grad_norm": 261.4297180175781, |
| "loss": 13.2669, |
| "loss_ce": 15.91538143157959, |
| "loss_region": 0.969863772392273, |
| "loss_total": 16.885244369506836, |
| "lr": 0.00024249948453553463, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 100, |
| "tokens_trained": 0.327653576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.031205673758865248, |
| "grad_norm": 537.6857299804688, |
| "loss": 14.7024, |
| "loss_ce": 17.717187881469727, |
| "loss_region": 0.9700204133987427, |
| "loss_total": 18.68720817565918, |
| "lr": 0.00026699438196336637, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 110, |
| "tokens_trained": 0.360419016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03404255319148936, |
| "grad_norm": 300.2855529785156, |
| "loss": 11.4653, |
| "loss_ce": 8.877747535705566, |
| "loss_region": 0.10213130712509155, |
| "loss_total": 8.979878425598145, |
| "lr": 0.00029148927939119814, |
| "router/selected_tokens_s0": 2521.375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 120, |
| "tokens_trained": 0.393182856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03687943262411347, |
| "grad_norm": 480.1593933105469, |
| "loss": 10.5954, |
| "loss_ce": 12.311738967895508, |
| "loss_region": 0.12702159583568573, |
| "loss_total": 12.438760757446289, |
| "lr": 0.00031598417681902996, |
| "router/selected_tokens_s0": 3126.375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 130, |
| "tokens_trained": 0.425948296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03971631205673759, |
| "grad_norm": 325.0716247558594, |
| "loss": 11.277, |
| "loss_ce": 10.923357963562012, |
| "loss_region": 0.11427915096282959, |
| "loss_total": 11.037636756896973, |
| "lr": 0.00034047907424686173, |
| "router/selected_tokens_s0": 2800.375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 140, |
| "tokens_trained": 0.458713736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0425531914893617, |
| "grad_norm": 162.2258758544922, |
| "loss": 9.8598, |
| "loss_ce": 5.65625, |
| "loss_region": 0.13778820633888245, |
| "loss_total": 5.79403829574585, |
| "lr": 0.0003649739716746935, |
| "router/selected_tokens_s0": 3915.375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 150, |
| "tokens_trained": 0.491474616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.04539007092198582, |
| "grad_norm": 286.8069763183594, |
| "loss": 6.7746, |
| "loss_ce": 9.406194686889648, |
| "loss_region": 0.07919373363256454, |
| "loss_total": 9.48538875579834, |
| "lr": 0.00038946886910252526, |
| "router/selected_tokens_s0": 1466.9375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 160, |
| "tokens_trained": 0.524240056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.04822695035460993, |
| "grad_norm": 269.352783203125, |
| "loss": 5.7788, |
| "loss_ce": 9.565649032592773, |
| "loss_region": 0.9683924913406372, |
| "loss_total": 10.534041404724121, |
| "lr": 0.0004139637665303571, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 170, |
| "tokens_trained": 0.557004696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.05106382978723404, |
| "grad_norm": 120.00736236572266, |
| "loss": 6.743, |
| "loss_ce": 5.809171199798584, |
| "loss_region": 0.11065831035375595, |
| "loss_total": 5.919829368591309, |
| "lr": 0.0004384586639581888, |
| "router/selected_tokens_s0": 2808.75, |
| "router/selected_tokens_s1": 1.0, |
| "step": 180, |
| "tokens_trained": 0.589768376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.05390070921985816, |
| "grad_norm": 143.41748046875, |
| "loss": 4.5633, |
| "loss_ce": 4.988459587097168, |
| "loss_region": 0.17456556856632233, |
| "loss_total": 5.163025379180908, |
| "lr": 0.0004629535613860206, |
| "router/selected_tokens_s0": 4780.4375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 190, |
| "tokens_trained": 0.622533016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.05673758865248227, |
| "grad_norm": 198.15806579589844, |
| "loss": 3.8252, |
| "loss_ce": 5.1574931144714355, |
| "loss_region": 0.21408332884311676, |
| "loss_total": 5.371576309204102, |
| "lr": 0.00048744845881385244, |
| "router/selected_tokens_s0": 5731.9375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 200, |
| "tokens_trained": 0.655298456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.059574468085106386, |
| "grad_norm": 178.36068725585938, |
| "loss": 6.8621, |
| "loss_ce": 6.98280668258667, |
| "loss_region": 0.07259761542081833, |
| "loss_total": 7.055404186248779, |
| "lr": 0.0005119433562416841, |
| "router/selected_tokens_s0": 1102.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 210, |
| "tokens_trained": 0.688063096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.062411347517730496, |
| "grad_norm": 123.39852142333984, |
| "loss": 5.1909, |
| "loss_ce": 4.071774482727051, |
| "loss_region": 0.11685775220394135, |
| "loss_total": 4.188632011413574, |
| "lr": 0.0005364382536695159, |
| "router/selected_tokens_s0": 3242.6875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 220, |
| "tokens_trained": 0.720828536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.06524822695035461, |
| "grad_norm": 94.15641784667969, |
| "loss": 4.7615, |
| "loss_ce": 5.413005828857422, |
| "loss_region": 0.1339714229106903, |
| "loss_total": 5.5469770431518555, |
| "lr": 0.0005609331510973477, |
| "router/selected_tokens_s0": 3748.125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 230, |
| "tokens_trained": 0.753593976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.06808510638297872, |
| "grad_norm": 68.37236022949219, |
| "loss": 3.3249, |
| "loss_ce": 1.9714446067810059, |
| "loss_region": 0.09609804302453995, |
| "loss_total": 2.067542552947998, |
| "lr": 0.0005854280485251795, |
| "router/selected_tokens_s0": 2231.875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 240, |
| "tokens_trained": 0.786359416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07092198581560284, |
| "grad_norm": 98.68827819824219, |
| "loss": 2.5161, |
| "loss_ce": 2.8443872928619385, |
| "loss_region": 0.10850368440151215, |
| "loss_total": 2.9528908729553223, |
| "lr": 0.0006099229459530113, |
| "router/selected_tokens_s0": 2888.375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 250, |
| "tokens_trained": 0.819124856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07375886524822695, |
| "grad_norm": 156.4917755126953, |
| "loss": 3.2518, |
| "loss_ce": 5.879138946533203, |
| "loss_region": 0.05636154115200043, |
| "loss_total": 5.935500621795654, |
| "lr": 0.0006344178433808431, |
| "router/selected_tokens_s0": 23.6875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 260, |
| "tokens_trained": 0.851889496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07659574468085106, |
| "grad_norm": 66.22383117675781, |
| "loss": 4.0904, |
| "loss_ce": 2.8470406532287598, |
| "loss_region": 0.19776995480060577, |
| "loss_total": 3.0448105335235596, |
| "lr": 0.0006589127408086749, |
| "router/selected_tokens_s0": 5504.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 270, |
| "tokens_trained": 0.884654936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07943262411347518, |
| "grad_norm": 82.5650405883789, |
| "loss": 3.8749, |
| "loss_ce": 4.707574844360352, |
| "loss_region": 0.13613729178905487, |
| "loss_total": 4.843712329864502, |
| "lr": 0.0006834076382365066, |
| "router/selected_tokens_s0": 3865.3125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 280, |
| "tokens_trained": 0.91742036 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08226950354609928, |
| "grad_norm": 49.25559997558594, |
| "loss": 3.7518, |
| "loss_ce": 3.217980146408081, |
| "loss_region": 0.07522433996200562, |
| "loss_total": 3.2932045459747314, |
| "lr": 0.0007079025356643384, |
| "router/selected_tokens_s0": 1294.875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 290, |
| "tokens_trained": 0.9501858 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0851063829787234, |
| "grad_norm": 91.8722915649414, |
| "loss": 4.1724, |
| "loss_ce": 4.418632984161377, |
| "loss_region": 0.1931368112564087, |
| "loss_total": 4.611769676208496, |
| "lr": 0.0007323974330921702, |
| "router/selected_tokens_s0": 5670.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 300, |
| "tokens_trained": 0.98295124 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08794326241134752, |
| "grad_norm": 57.65898895263672, |
| "loss": 3.342, |
| "loss_ce": 1.7671482563018799, |
| "loss_region": 0.13846704363822937, |
| "loss_total": 1.9056153297424316, |
| "lr": 0.000756892330520002, |
| "router/selected_tokens_s0": 4120.75, |
| "router/selected_tokens_s1": 1.0, |
| "step": 310, |
| "tokens_trained": 1.01571668 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09078014184397164, |
| "grad_norm": 54.67989730834961, |
| "loss": 3.6009, |
| "loss_ce": 1.9458640813827515, |
| "loss_region": 0.10488849133253098, |
| "loss_total": 2.050752639770508, |
| "lr": 0.0007813872279478337, |
| "router/selected_tokens_s0": 2792.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 320, |
| "tokens_trained": 1.04848196 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09361702127659574, |
| "grad_norm": 81.29891204833984, |
| "loss": 3.6246, |
| "loss_ce": 2.3018128871917725, |
| "loss_region": 0.09965165704488754, |
| "loss_total": 2.4014644622802734, |
| "lr": 0.0008058821253756655, |
| "router/selected_tokens_s0": 2326.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 330, |
| "tokens_trained": 1.0812466 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09645390070921986, |
| "grad_norm": 59.7260856628418, |
| "loss": 2.717, |
| "loss_ce": 1.7813047170639038, |
| "loss_region": 0.11393033713102341, |
| "loss_total": 1.8952350616455078, |
| "lr": 0.0008303770228034974, |
| "router/selected_tokens_s0": 3166.5625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 340, |
| "tokens_trained": 1.11401204 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09929078014184398, |
| "grad_norm": 39.44504165649414, |
| "loss": 1.8522, |
| "loss_ce": 1.858303427696228, |
| "loss_region": 0.07666981220245361, |
| "loss_total": 1.9349732398986816, |
| "lr": 0.0008548719202313291, |
| "router/selected_tokens_s0": 1411.5625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 350, |
| "tokens_trained": 1.14677748 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10212765957446808, |
| "grad_norm": 46.70553207397461, |
| "loss": 2.1525, |
| "loss_ce": 1.846787452697754, |
| "loss_region": 0.10527511686086655, |
| "loss_total": 1.9520626068115234, |
| "lr": 0.0008793668176591608, |
| "router/selected_tokens_s0": 2740.6875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 360, |
| "tokens_trained": 1.17954212 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1049645390070922, |
| "grad_norm": 42.21476745605469, |
| "loss": 2.2006, |
| "loss_ce": 1.8438307046890259, |
| "loss_region": 0.0864676907658577, |
| "loss_total": 1.9302984476089478, |
| "lr": 0.0009038617150869926, |
| "router/selected_tokens_s0": 1980.625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 370, |
| "tokens_trained": 1.21230756 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10780141843971631, |
| "grad_norm": 104.00314331054688, |
| "loss": 2.3132, |
| "loss_ce": 3.442901849746704, |
| "loss_region": 0.0683647096157074, |
| "loss_total": 3.5112664699554443, |
| "lr": 0.0009283566125148244, |
| "router/selected_tokens_s0": 1006.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 380, |
| "tokens_trained": 1.2450722 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11063829787234042, |
| "grad_norm": 78.1941146850586, |
| "loss": 3.3132, |
| "loss_ce": 4.232690334320068, |
| "loss_region": 0.10663130134344101, |
| "loss_total": 4.339321613311768, |
| "lr": 0.0009528515099426562, |
| "router/selected_tokens_s0": 2983.6875, |
| "router/selected_tokens_s1": 2.0, |
| "step": 390, |
| "tokens_trained": 1.27783604 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11347517730496454, |
| "grad_norm": 95.86592102050781, |
| "loss": 2.8042, |
| "loss_ce": 4.492650032043457, |
| "loss_region": 0.14126580953598022, |
| "loss_total": 4.633915901184082, |
| "lr": 0.000977346407370488, |
| "router/selected_tokens_s0": 3625.0625, |
| "router/selected_tokens_s1": 336.1875, |
| "step": 400, |
| "tokens_trained": 1.31060148 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11631205673758865, |
| "grad_norm": 42.11272430419922, |
| "loss": 2.7845, |
| "loss_ce": 1.6918444633483887, |
| "loss_region": 0.05655944347381592, |
| "loss_total": 1.7484039068222046, |
| "lr": 0.0010018413047983197, |
| "router/selected_tokens_s0": 374.75, |
| "router/selected_tokens_s1": 1.0, |
| "step": 410, |
| "tokens_trained": 1.34336692 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11914893617021277, |
| "grad_norm": 46.05327224731445, |
| "loss": 2.3243, |
| "loss_ce": 1.7027208805084229, |
| "loss_region": 0.1161847934126854, |
| "loss_total": 1.8189057111740112, |
| "lr": 0.0010263362022261515, |
| "router/selected_tokens_s0": 3265.1875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 420, |
| "tokens_trained": 1.37613236 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.12198581560283688, |
| "grad_norm": 64.44268035888672, |
| "loss": 2.5778, |
| "loss_ce": 2.2099809646606445, |
| "loss_region": 0.056851863861083984, |
| "loss_total": 2.2668328285217285, |
| "lr": 0.0010508310996539833, |
| "router/selected_tokens_s0": 343.6875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 430, |
| "tokens_trained": 1.4088978 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.12482269503546099, |
| "grad_norm": 69.73699951171875, |
| "loss": 2.1567, |
| "loss_ce": 2.3271026611328125, |
| "loss_region": 0.08058959245681763, |
| "loss_total": 2.4076921939849854, |
| "lr": 0.0010753259970818151, |
| "router/selected_tokens_s0": 1795.1875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 440, |
| "tokens_trained": 1.44166324 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1276595744680851, |
| "grad_norm": 41.13121032714844, |
| "loss": 1.6985, |
| "loss_ce": 1.7502132654190063, |
| "loss_region": 0.060783833265304565, |
| "loss_total": 1.8109971284866333, |
| "lr": 0.001099820894509647, |
| "router/selected_tokens_s0": 912.25, |
| "router/selected_tokens_s1": 1.0, |
| "step": 450, |
| "tokens_trained": 1.47442868 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13049645390070921, |
| "grad_norm": 23.884733200073242, |
| "loss": 1.8075, |
| "loss_ce": 1.7715412378311157, |
| "loss_region": 0.06649690866470337, |
| "loss_total": 1.8380382061004639, |
| "lr": 0.0011243157919374788, |
| "router/selected_tokens_s0": 803.75, |
| "router/selected_tokens_s1": 28.75, |
| "step": 460, |
| "tokens_trained": 1.50719412 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13333333333333333, |
| "grad_norm": 27.034103393554688, |
| "loss": 1.855, |
| "loss_ce": 1.4545748233795166, |
| "loss_region": 0.06679507344961166, |
| "loss_total": 1.5213699340820312, |
| "lr": 0.0011488106893653104, |
| "router/selected_tokens_s0": 1278.5, |
| "router/selected_tokens_s1": 6.1875, |
| "step": 470, |
| "tokens_trained": 1.53995956 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13617021276595745, |
| "grad_norm": 45.51372528076172, |
| "loss": 1.6442, |
| "loss_ce": 1.8091086149215698, |
| "loss_region": 0.06229155510663986, |
| "loss_total": 1.8714001178741455, |
| "lr": 0.0011733055867931422, |
| "router/selected_tokens_s0": 604.3125, |
| "router/selected_tokens_s1": 18.375, |
| "step": 480, |
| "tokens_trained": 1.5727242 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13900709219858157, |
| "grad_norm": 41.38487243652344, |
| "loss": 1.7489, |
| "loss_ce": 1.6256108283996582, |
| "loss_region": 0.07747390866279602, |
| "loss_total": 1.7030847072601318, |
| "lr": 0.001197800484220974, |
| "router/selected_tokens_s0": 1453.625, |
| "router/selected_tokens_s1": 75.0, |
| "step": 490, |
| "tokens_trained": 1.60548964 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.14184397163120568, |
| "grad_norm": 27.50923728942871, |
| "loss": 1.704, |
| "loss_ce": 1.4722363948822021, |
| "loss_region": 0.06361720710992813, |
| "loss_total": 1.535853624343872, |
| "lr": 0.0012222953816488059, |
| "router/selected_tokens_s0": 779.1875, |
| "router/selected_tokens_s1": 22.8125, |
| "step": 500, |
| "tokens_trained": 1.638252944 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "eval_ppl": 5.020375679302198, |
| "eval_runtime": 2.2178, |
| "step": 500, |
| "tokens_trained": 1.638252944 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "eval_F": 0.002412297835768419, |
| "eval_F_cds": 0.0005749708022639475, |
| "eval_F_dig": 0.001601687631846238, |
| "eval_F_exon": 0.002024979274540982, |
| "eval_F_intron": 0.0026823310403626226, |
| "eval_F_nig": 0.0027446730975761655, |
| "eval_F_promoter": 0.0013133514412650286, |
| "eval_F_utr": 0.0028351861523539506, |
| "eval_G": 0.025070100290056414, |
| "eval_G_cds": 0.006592529450408768, |
| "eval_G_dig": 0.013054837658215485, |
| "eval_G_exon": 0.0204957217641103, |
| "eval_G_intron": 0.0275639601731152, |
| "eval_G_nig": 0.029588715312590982, |
| "eval_G_promoter": 0.01396729528315357, |
| "eval_G_utr": 0.024487496469340553, |
| "eval_avg_bp_per_token": 414.5425101214575, |
| "eval_bp_per_token/cds": 1739.21875, |
| "eval_bp_per_token/dig": 624.3414634146342, |
| "eval_bp_per_token/exon": 493.8322147651007, |
| "eval_bp_per_token/intron": 372.81006145490926, |
| "eval_bp_per_token/nig": 364.3421145064981, |
| "eval_bp_per_token/promoter": 761.4108216432866, |
| "eval_bp_per_token/utr": 352.7105263157895, |
| "eval_ppl_cds": 5.0745923462223805, |
| "eval_ppl_dig": 5.089918968649901, |
| "eval_ppl_exon": 5.0096468826855345, |
| "eval_ppl_intron": 5.023167391585644, |
| "eval_ppl_nig": 5.002412039213493, |
| "eval_ppl_promoter": 4.969192260932606, |
| "eval_ppl_utr": 4.995333125490376, |
| "step": 500, |
| "tokens_trained": 1.638252944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.14468085106382977, |
| "grad_norm": 18.730314254760742, |
| "loss": 1.6324, |
| "loss_ce": 1.4263192415237427, |
| "loss_region": 0.06919127702713013, |
| "loss_total": 1.4955105781555176, |
| "lr": 0.0012243786686061229, |
| "router/selected_tokens_s0": 1183.625, |
| "router/selected_tokens_s1": 46.0, |
| "step": 510, |
| "tokens_trained": 1.671014152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1475177304964539, |
| "grad_norm": 16.276996612548828, |
| "loss": 1.4786, |
| "loss_ce": 1.3790802955627441, |
| "loss_region": 0.060240939259529114, |
| "loss_total": 1.4393212795257568, |
| "lr": 0.0012239717766222718, |
| "router/selected_tokens_s0": 194.1875, |
| "router/selected_tokens_s1": 7.125, |
| "step": 520, |
| "tokens_trained": 1.703779592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.150354609929078, |
| "grad_norm": 24.13681983947754, |
| "loss": 1.6421, |
| "loss_ce": 1.6586048603057861, |
| "loss_region": 0.06046159565448761, |
| "loss_total": 1.7190665006637573, |
| "lr": 0.001223564884638421, |
| "router/selected_tokens_s0": 948.0, |
| "router/selected_tokens_s1": 9.0625, |
| "step": 530, |
| "tokens_trained": 1.736545032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15319148936170213, |
| "grad_norm": 39.31205749511719, |
| "loss": 1.6168, |
| "loss_ce": 1.689642310142517, |
| "loss_region": 0.06132669746875763, |
| "loss_total": 1.7509690523147583, |
| "lr": 0.00122315799265457, |
| "router/selected_tokens_s0": 618.0, |
| "router/selected_tokens_s1": 18.0625, |
| "step": 540, |
| "tokens_trained": 1.769310472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15602836879432624, |
| "grad_norm": 39.57848358154297, |
| "loss": 1.5849, |
| "loss_ce": 1.6077150106430054, |
| "loss_region": 0.05697261542081833, |
| "loss_total": 1.6646876335144043, |
| "lr": 0.001222751100670719, |
| "router/selected_tokens_s0": 451.8125, |
| "router/selected_tokens_s1": 6.3125, |
| "step": 550, |
| "tokens_trained": 1.802075912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15886524822695036, |
| "grad_norm": 47.972530364990234, |
| "loss": 1.6857, |
| "loss_ce": 1.8609164953231812, |
| "loss_region": 0.05524138733744621, |
| "loss_total": 1.916157841682434, |
| "lr": 0.001222344208686868, |
| "router/selected_tokens_s0": 307.3125, |
| "router/selected_tokens_s1": 5.0, |
| "step": 560, |
| "tokens_trained": 1.834841352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16170212765957448, |
| "grad_norm": 38.630558013916016, |
| "loss": 1.687, |
| "loss_ce": 1.7399615049362183, |
| "loss_region": 0.06043495982885361, |
| "loss_total": 1.80039644241333, |
| "lr": 0.0012219373167030169, |
| "router/selected_tokens_s0": 1151.625, |
| "router/selected_tokens_s1": 2.5625, |
| "step": 570, |
| "tokens_trained": 1.867606792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16453900709219857, |
| "grad_norm": 28.116954803466797, |
| "loss": 1.5409, |
| "loss_ce": 1.4483039379119873, |
| "loss_region": 0.05134333670139313, |
| "loss_total": 1.4996472597122192, |
| "lr": 0.0012215304247191658, |
| "router/selected_tokens_s0": 275.0, |
| "router/selected_tokens_s1": 1.3125, |
| "step": 580, |
| "tokens_trained": 1.900370632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1673758865248227, |
| "grad_norm": 22.577993392944336, |
| "loss": 1.6098, |
| "loss_ce": 1.4538688659667969, |
| "loss_region": 0.05483899638056755, |
| "loss_total": 1.508707880973816, |
| "lr": 0.0012211235327353148, |
| "router/selected_tokens_s0": 305.0625, |
| "router/selected_tokens_s1": 5.25, |
| "step": 590, |
| "tokens_trained": 1.933136072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1702127659574468, |
| "grad_norm": 40.604774475097656, |
| "loss": 1.6165, |
| "loss_ce": 1.647032380104065, |
| "loss_region": 0.08088662475347519, |
| "loss_total": 1.7279189825057983, |
| "lr": 0.0012207166407514638, |
| "router/selected_tokens_s0": 253.9375, |
| "router/selected_tokens_s1": 20.1875, |
| "step": 600, |
| "tokens_trained": 1.965901512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17304964539007092, |
| "grad_norm": 15.52480697631836, |
| "loss": 1.5046, |
| "loss_ce": 1.347977638244629, |
| "loss_region": 0.05137379840016365, |
| "loss_total": 1.3993514776229858, |
| "lr": 0.0012203097487676127, |
| "router/selected_tokens_s0": 453.875, |
| "router/selected_tokens_s1": 2.875, |
| "step": 610, |
| "tokens_trained": 1.998666952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17588652482269504, |
| "grad_norm": 47.213138580322266, |
| "loss": 1.5088, |
| "loss_ce": 1.8579106330871582, |
| "loss_region": 0.05340619385242462, |
| "loss_total": 1.9113168716430664, |
| "lr": 0.0012199028567837617, |
| "router/selected_tokens_s0": 305.0, |
| "router/selected_tokens_s1": 3.5625, |
| "step": 620, |
| "tokens_trained": 2.031432392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17872340425531916, |
| "grad_norm": 23.589649200439453, |
| "loss": 1.4227, |
| "loss_ce": 1.362052083015442, |
| "loss_region": 0.06349397450685501, |
| "loss_total": 1.4255460500717163, |
| "lr": 0.0012194959647999107, |
| "router/selected_tokens_s0": 174.6875, |
| "router/selected_tokens_s1": 3.3125, |
| "step": 630, |
| "tokens_trained": 2.064197832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18156028368794327, |
| "grad_norm": 11.682387351989746, |
| "loss": 1.3963, |
| "loss_ce": 1.3035075664520264, |
| "loss_region": 0.054032620042562485, |
| "loss_total": 1.3575401306152344, |
| "lr": 0.0012190890728160596, |
| "router/selected_tokens_s0": 574.5, |
| "router/selected_tokens_s1": 1.375, |
| "step": 640, |
| "tokens_trained": 2.096963272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18439716312056736, |
| "grad_norm": 16.581283569335938, |
| "loss": 1.4373, |
| "loss_ce": 1.2942084074020386, |
| "loss_region": 0.06227206438779831, |
| "loss_total": 1.3564804792404175, |
| "lr": 0.0012186821808322086, |
| "router/selected_tokens_s0": 247.25, |
| "router/selected_tokens_s1": 9.125, |
| "step": 650, |
| "tokens_trained": 2.129728712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18723404255319148, |
| "grad_norm": 17.90604019165039, |
| "loss": 1.5133, |
| "loss_ce": 1.3342182636260986, |
| "loss_region": 0.10024743527173996, |
| "loss_total": 1.4344656467437744, |
| "lr": 0.0012182752888483576, |
| "router/selected_tokens_s0": 1289.5625, |
| "router/selected_tokens_s1": 121.4375, |
| "step": 660, |
| "tokens_trained": 2.162494152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1900709219858156, |
| "grad_norm": 20.957000732421875, |
| "loss": 1.523, |
| "loss_ce": 1.3729053735733032, |
| "loss_region": 0.05556885898113251, |
| "loss_total": 1.4284741878509521, |
| "lr": 0.0012178683968645065, |
| "router/selected_tokens_s0": 160.25, |
| "router/selected_tokens_s1": 3.875, |
| "step": 670, |
| "tokens_trained": 2.195259592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19290780141843972, |
| "grad_norm": 22.54232406616211, |
| "loss": 1.4559, |
| "loss_ce": 1.3261032104492188, |
| "loss_region": 0.055951084941625595, |
| "loss_total": 1.382054328918457, |
| "lr": 0.0012174615048806555, |
| "router/selected_tokens_s0": 96.125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 680, |
| "tokens_trained": 2.228025032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19574468085106383, |
| "grad_norm": 13.925896644592285, |
| "loss": 1.3547, |
| "loss_ce": 1.3023862838745117, |
| "loss_region": 0.05069010332226753, |
| "loss_total": 1.3530763387680054, |
| "lr": 0.0012170546128968045, |
| "router/selected_tokens_s0": 337.625, |
| "router/selected_tokens_s1": 1.625, |
| "step": 690, |
| "tokens_trained": 2.260790472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19858156028368795, |
| "grad_norm": 12.42866039276123, |
| "loss": 1.3505, |
| "loss_ce": 1.2891850471496582, |
| "loss_region": 0.07324955612421036, |
| "loss_total": 1.3624346256256104, |
| "lr": 0.0012166477209129534, |
| "router/selected_tokens_s0": 643.0625, |
| "router/selected_tokens_s1": 35.4375, |
| "step": 700, |
| "tokens_trained": 2.293555912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20141843971631207, |
| "grad_norm": 25.85889434814453, |
| "loss": 1.3485, |
| "loss_ce": 1.3655128479003906, |
| "loss_region": 0.06356744468212128, |
| "loss_total": 1.4290802478790283, |
| "lr": 0.0012162408289291026, |
| "router/selected_tokens_s0": 453.1875, |
| "router/selected_tokens_s1": 18.125, |
| "step": 710, |
| "tokens_trained": 2.326321352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20425531914893616, |
| "grad_norm": 8.729594230651855, |
| "loss": 1.3482, |
| "loss_ce": 1.2156054973602295, |
| "loss_region": 0.05525539815425873, |
| "loss_total": 1.2708609104156494, |
| "lr": 0.0012158339369452516, |
| "router/selected_tokens_s0": 438.3125, |
| "router/selected_tokens_s1": 6.5625, |
| "step": 720, |
| "tokens_trained": 2.359086792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20709219858156028, |
| "grad_norm": 16.93632698059082, |
| "loss": 1.3283, |
| "loss_ce": 1.3369078636169434, |
| "loss_region": 0.05985880643129349, |
| "loss_total": 1.3967666625976562, |
| "lr": 0.0012154270449614005, |
| "router/selected_tokens_s0": 255.75, |
| "router/selected_tokens_s1": 9.25, |
| "step": 730, |
| "tokens_trained": 2.391852232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2099290780141844, |
| "grad_norm": 10.29341983795166, |
| "loss": 1.3212, |
| "loss_ce": 1.2369259595870972, |
| "loss_region": 0.0615067295730114, |
| "loss_total": 1.29843270778656, |
| "lr": 0.0012150201529775495, |
| "router/selected_tokens_s0": 504.125, |
| "router/selected_tokens_s1": 15.375, |
| "step": 740, |
| "tokens_trained": 2.424616064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2127659574468085, |
| "grad_norm": 12.911612510681152, |
| "loss": 1.3173, |
| "loss_ce": 1.2570594549179077, |
| "loss_region": 0.06437371671199799, |
| "loss_total": 1.321433186531067, |
| "lr": 0.0012146132609936982, |
| "router/selected_tokens_s0": 461.875, |
| "router/selected_tokens_s1": 17.9375, |
| "step": 750, |
| "tokens_trained": 2.457381496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.21560283687943263, |
| "grad_norm": 10.224205017089844, |
| "loss": 1.3042, |
| "loss_ce": 1.2603733539581299, |
| "loss_region": 0.062456872314214706, |
| "loss_total": 1.3228302001953125, |
| "lr": 0.0012142063690098472, |
| "router/selected_tokens_s0": 335.4375, |
| "router/selected_tokens_s1": 15.4375, |
| "step": 760, |
| "tokens_trained": 2.490146936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.21843971631205675, |
| "grad_norm": 6.643783092498779, |
| "loss": 1.296, |
| "loss_ce": 1.2319194078445435, |
| "loss_region": 0.0581849031150341, |
| "loss_total": 1.2901042699813843, |
| "lr": 0.0012137994770259962, |
| "router/selected_tokens_s0": 402.4375, |
| "router/selected_tokens_s1": 11.0, |
| "step": 770, |
| "tokens_trained": 2.522912376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22127659574468084, |
| "grad_norm": 8.539620399475098, |
| "loss": 1.2892, |
| "loss_ce": 1.2454586029052734, |
| "loss_region": 0.05890984833240509, |
| "loss_total": 1.304368495941162, |
| "lr": 0.0012133925850421454, |
| "router/selected_tokens_s0": 272.25, |
| "router/selected_tokens_s1": 8.625, |
| "step": 780, |
| "tokens_trained": 2.555677816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22411347517730495, |
| "grad_norm": 9.312758445739746, |
| "loss": 1.2829, |
| "loss_ce": 1.2416056394577026, |
| "loss_region": 0.05711473524570465, |
| "loss_total": 1.298720359802246, |
| "lr": 0.0012129856930582943, |
| "router/selected_tokens_s0": 412.1875, |
| "router/selected_tokens_s1": 8.6875, |
| "step": 790, |
| "tokens_trained": 2.588443256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22695035460992907, |
| "grad_norm": 13.046083450317383, |
| "loss": 1.2906, |
| "loss_ce": 1.230719804763794, |
| "loss_region": 0.05725303664803505, |
| "loss_total": 1.2879728078842163, |
| "lr": 0.0012125788010744433, |
| "router/selected_tokens_s0": 398.3125, |
| "router/selected_tokens_s1": 8.6875, |
| "step": 800, |
| "tokens_trained": 2.621208696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2297872340425532, |
| "grad_norm": 10.468182563781738, |
| "loss": 1.3095, |
| "loss_ce": 1.231147289276123, |
| "loss_region": 0.061947327107191086, |
| "loss_total": 1.2930946350097656, |
| "lr": 0.0012121719090905923, |
| "router/selected_tokens_s0": 618.0625, |
| "router/selected_tokens_s1": 16.9375, |
| "step": 810, |
| "tokens_trained": 2.653974128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2326241134751773, |
| "grad_norm": 13.493775367736816, |
| "loss": 1.3094, |
| "loss_ce": 1.288047194480896, |
| "loss_region": 0.07109444588422775, |
| "loss_total": 1.3591415882110596, |
| "lr": 0.0012117650171067412, |
| "router/selected_tokens_s0": 400.625, |
| "router/selected_tokens_s1": 21.5, |
| "step": 820, |
| "tokens_trained": 2.686739568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23546099290780143, |
| "grad_norm": 11.455880165100098, |
| "loss": 1.3091, |
| "loss_ce": 1.2492051124572754, |
| "loss_region": 0.05696583166718483, |
| "loss_total": 1.30617094039917, |
| "lr": 0.0012113581251228902, |
| "router/selected_tokens_s0": 428.6875, |
| "router/selected_tokens_s1": 7.75, |
| "step": 830, |
| "tokens_trained": 2.719505008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23829787234042554, |
| "grad_norm": 17.325927734375, |
| "loss": 1.3211, |
| "loss_ce": 1.2634320259094238, |
| "loss_region": 0.055105820298194885, |
| "loss_total": 1.3185378313064575, |
| "lr": 0.0012109512331390391, |
| "router/selected_tokens_s0": 383.9375, |
| "router/selected_tokens_s1": 7.5625, |
| "step": 840, |
| "tokens_trained": 2.752270448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24113475177304963, |
| "grad_norm": 7.47580623626709, |
| "loss": 1.2889, |
| "loss_ce": 1.1895134449005127, |
| "loss_region": 0.07138022780418396, |
| "loss_total": 1.260893702507019, |
| "lr": 0.0012105443411551881, |
| "router/selected_tokens_s0": 640.6875, |
| "router/selected_tokens_s1": 26.75, |
| "step": 850, |
| "tokens_trained": 2.785035888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24397163120567375, |
| "grad_norm": 7.988100528717041, |
| "loss": 1.2856, |
| "loss_ce": 1.2606642246246338, |
| "loss_region": 0.05701002851128578, |
| "loss_total": 1.3176742792129517, |
| "lr": 0.001210137449171337, |
| "router/selected_tokens_s0": 351.9375, |
| "router/selected_tokens_s1": 8.5, |
| "step": 860, |
| "tokens_trained": 2.817797816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24680851063829787, |
| "grad_norm": 8.923615455627441, |
| "loss": 1.2757, |
| "loss_ce": 1.233718752861023, |
| "loss_region": 0.061422333121299744, |
| "loss_total": 1.2951411008834839, |
| "lr": 0.001209730557187486, |
| "router/selected_tokens_s0": 302.5, |
| "router/selected_tokens_s1": 12.1875, |
| "step": 870, |
| "tokens_trained": 2.850563256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24964539007092199, |
| "grad_norm": 10.27121353149414, |
| "loss": 1.29, |
| "loss_ce": 1.2663887739181519, |
| "loss_region": 0.05221039056777954, |
| "loss_total": 1.3185992240905762, |
| "lr": 0.001209323665203635, |
| "router/selected_tokens_s0": 404.0625, |
| "router/selected_tokens_s1": 3.3125, |
| "step": 880, |
| "tokens_trained": 2.883328696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2524822695035461, |
| "grad_norm": 6.205187797546387, |
| "loss": 1.2855, |
| "loss_ce": 1.2068557739257812, |
| "loss_region": 0.06301470100879669, |
| "loss_total": 1.2698705196380615, |
| "lr": 0.001208916773219784, |
| "router/selected_tokens_s0": 376.5, |
| "router/selected_tokens_s1": 15.625, |
| "step": 890, |
| "tokens_trained": 2.916094136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2553191489361702, |
| "grad_norm": 1.0363818407058716, |
| "loss": 1.2721, |
| "loss_ce": 1.2075473070144653, |
| "loss_region": 0.05672357976436615, |
| "loss_total": 1.2642709016799927, |
| "lr": 0.001208509881235933, |
| "router/selected_tokens_s0": 444.0625, |
| "router/selected_tokens_s1": 9.75, |
| "step": 900, |
| "tokens_trained": 2.948859576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2581560283687943, |
| "grad_norm": 3.8346028327941895, |
| "loss": 1.2822, |
| "loss_ce": 1.211919903755188, |
| "loss_region": 0.05047914385795593, |
| "loss_total": 1.2623990774154663, |
| "lr": 0.001208102989252082, |
| "router/selected_tokens_s0": 251.9375, |
| "router/selected_tokens_s1": 2.875, |
| "step": 910, |
| "tokens_trained": 2.981619384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26099290780141843, |
| "grad_norm": 10.03120231628418, |
| "loss": 1.2769, |
| "loss_ce": 1.2070960998535156, |
| "loss_region": 0.053904660046100616, |
| "loss_total": 1.2610007524490356, |
| "lr": 0.0012076960972682309, |
| "router/selected_tokens_s0": 419.0, |
| "router/selected_tokens_s1": 5.625, |
| "step": 920, |
| "tokens_trained": 3.014384824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26382978723404255, |
| "grad_norm": 10.83735466003418, |
| "loss": 1.2681, |
| "loss_ce": 1.269758939743042, |
| "loss_region": 0.061172302812337875, |
| "loss_total": 1.3309311866760254, |
| "lr": 0.0012072892052843798, |
| "router/selected_tokens_s0": 413.5, |
| "router/selected_tokens_s1": 14.25, |
| "step": 930, |
| "tokens_trained": 3.047149464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26666666666666666, |
| "grad_norm": 8.0000581741333, |
| "loss": 1.2667, |
| "loss_ce": 1.2282646894454956, |
| "loss_region": 0.055803608149290085, |
| "loss_total": 1.2840683460235596, |
| "lr": 0.0012068823133005288, |
| "router/selected_tokens_s0": 468.25, |
| "router/selected_tokens_s1": 8.0625, |
| "step": 940, |
| "tokens_trained": 3.079914904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2695035460992908, |
| "grad_norm": 6.741837501525879, |
| "loss": 1.272, |
| "loss_ce": 1.1557844877243042, |
| "loss_region": 0.06386042386293411, |
| "loss_total": 1.2196449041366577, |
| "lr": 0.0012064754213166778, |
| "router/selected_tokens_s0": 312.0, |
| "router/selected_tokens_s1": 12.1875, |
| "step": 950, |
| "tokens_trained": 3.112675728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2723404255319149, |
| "grad_norm": 7.861258506774902, |
| "loss": 1.2717, |
| "loss_ce": 1.2447525262832642, |
| "loss_region": 0.05689748376607895, |
| "loss_total": 1.301650047302246, |
| "lr": 0.001206068529332827, |
| "router/selected_tokens_s0": 551.4375, |
| "router/selected_tokens_s1": 9.3125, |
| "step": 960, |
| "tokens_trained": 3.145441168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.275177304964539, |
| "grad_norm": 2.8089139461517334, |
| "loss": 1.2699, |
| "loss_ce": 1.1975443363189697, |
| "loss_region": 0.06169455498456955, |
| "loss_total": 1.259238839149475, |
| "lr": 0.001205661637348976, |
| "router/selected_tokens_s0": 384.4375, |
| "router/selected_tokens_s1": 13.1875, |
| "step": 970, |
| "tokens_trained": 3.178206608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.27801418439716313, |
| "grad_norm": 1.8372883796691895, |
| "loss": 1.2593, |
| "loss_ce": 1.1845065355300903, |
| "loss_region": 0.060579411685466766, |
| "loss_total": 1.2450859546661377, |
| "lr": 0.0012052547453651249, |
| "router/selected_tokens_s0": 402.375, |
| "router/selected_tokens_s1": 12.3125, |
| "step": 980, |
| "tokens_trained": 3.210972048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.28085106382978725, |
| "grad_norm": 4.381184101104736, |
| "loss": 1.2706, |
| "loss_ce": 1.196001410484314, |
| "loss_region": 0.06673609465360641, |
| "loss_total": 1.262737512588501, |
| "lr": 0.0012048478533812738, |
| "router/selected_tokens_s0": 403.1875, |
| "router/selected_tokens_s1": 18.9375, |
| "step": 990, |
| "tokens_trained": 3.243737488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.28368794326241137, |
| "grad_norm": 8.494468688964844, |
| "loss": 1.2773, |
| "loss_ce": 1.209201693534851, |
| "loss_region": 0.06362592428922653, |
| "loss_total": 1.2728276252746582, |
| "lr": 0.0012044409613974226, |
| "router/selected_tokens_s0": 486.875, |
| "router/selected_tokens_s1": 18.0, |
| "step": 1000, |
| "tokens_trained": 3.276502928 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "eval_ppl": 3.401584579757825, |
| "eval_runtime": 2.059, |
| "step": 1000, |
| "tokens_trained": 3.276502928 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "eval_F": 0.0010583792212027433, |
| "eval_F_cds": 0.00044021202048333486, |
| "eval_F_dig": 0.003711227439643722, |
| "eval_F_exon": 0.0007474755711392887, |
| "eval_F_intron": 0.0010856597836654204, |
| "eval_F_nig": 0.0013487171280326855, |
| "eval_F_promoter": 0.0006250921188385657, |
| "eval_F_utr": 0.0008526875646177295, |
| "eval_G": 0.016025620951195235, |
| "eval_G_cds": 0.005563186427544695, |
| "eval_G_dig": 0.024053926942534574, |
| "eval_G_exon": 0.01363821736419728, |
| "eval_G_intron": 0.016894965668453225, |
| "eval_G_nig": 0.018964967608856607, |
| "eval_G_promoter": 0.010849314204034016, |
| "eval_G_utr": 0.013376785980750578, |
| "eval_avg_bp_per_token": 944.840922768305, |
| "eval_bp_per_token/cds": 2271.6326530612246, |
| "eval_bp_per_token/dig": 269.4526315789474, |
| "eval_bp_per_token/exon": 1337.8363636363636, |
| "eval_bp_per_token/intron": 921.0988700564972, |
| "eval_bp_per_token/nig": 741.445318084346, |
| "eval_bp_per_token/promoter": 1599.7642105263158, |
| "eval_bp_per_token/utr": 1172.7625, |
| "eval_ppl_cds": 3.9297508219385455, |
| "eval_ppl_dig": 2.056291383006035, |
| "eval_ppl_exon": 3.560263799656032, |
| "eval_ppl_intron": 3.399758405692712, |
| "eval_ppl_nig": 3.3399007425580884, |
| "eval_ppl_promoter": 3.552518241215457, |
| "eval_ppl_utr": 3.6236436039234974, |
| "step": 1000, |
| "tokens_trained": 3.276502928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2865248226950355, |
| "grad_norm": 4.319965362548828, |
| "loss": 1.2539, |
| "loss_ce": 1.1917036771774292, |
| "loss_region": 0.06307854503393173, |
| "loss_total": 1.2547821998596191, |
| "lr": 0.0012040340694135716, |
| "router/selected_tokens_s0": 447.625, |
| "router/selected_tokens_s1": 16.25, |
| "step": 1010, |
| "tokens_trained": 3.309267568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.28936170212765955, |
| "grad_norm": 7.788521766662598, |
| "loss": 1.2502, |
| "loss_ce": 1.1910172700881958, |
| "loss_region": 0.06294921785593033, |
| "loss_total": 1.2539664506912231, |
| "lr": 0.0012036271774297205, |
| "router/selected_tokens_s0": 517.625, |
| "router/selected_tokens_s1": 19.9375, |
| "step": 1020, |
| "tokens_trained": 3.342033008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.29219858156028367, |
| "grad_norm": 6.11482572555542, |
| "loss": 1.2644, |
| "loss_ce": 1.16280996799469, |
| "loss_region": 0.055934660136699677, |
| "loss_total": 1.2187446355819702, |
| "lr": 0.0012032202854458697, |
| "router/selected_tokens_s0": 781.125, |
| "router/selected_tokens_s1": 8.6875, |
| "step": 1030, |
| "tokens_trained": 3.374798448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2950354609929078, |
| "grad_norm": 4.647332668304443, |
| "loss": 1.2587, |
| "loss_ce": 1.1730914115905762, |
| "loss_region": 0.04860607162117958, |
| "loss_total": 1.221697449684143, |
| "lr": 0.0012028133934620187, |
| "router/selected_tokens_s0": 405.375, |
| "router/selected_tokens_s1": 1.375, |
| "step": 1040, |
| "tokens_trained": 3.407563888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2978723404255319, |
| "grad_norm": 9.93161392211914, |
| "loss": 1.2728, |
| "loss_ce": 1.2400729656219482, |
| "loss_region": 0.07498405873775482, |
| "loss_total": 1.3150570392608643, |
| "lr": 0.0012024065014781676, |
| "router/selected_tokens_s0": 532.25, |
| "router/selected_tokens_s1": 30.4375, |
| "step": 1050, |
| "tokens_trained": 3.440328528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.300709219858156, |
| "grad_norm": 7.319814205169678, |
| "loss": 1.251, |
| "loss_ce": 1.2057647705078125, |
| "loss_region": 0.06156269088387489, |
| "loss_total": 1.2673274278640747, |
| "lr": 0.0012019996094943166, |
| "router/selected_tokens_s0": 317.0, |
| "router/selected_tokens_s1": 11.3125, |
| "step": 1060, |
| "tokens_trained": 3.473093968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30354609929078014, |
| "grad_norm": 3.660691022872925, |
| "loss": 1.2579, |
| "loss_ce": 1.201399803161621, |
| "loss_region": 0.06964390724897385, |
| "loss_total": 1.2710436582565308, |
| "lr": 0.0012015927175104656, |
| "router/selected_tokens_s0": 343.5625, |
| "router/selected_tokens_s1": 17.6875, |
| "step": 1070, |
| "tokens_trained": 3.505859408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30638297872340425, |
| "grad_norm": 4.524622917175293, |
| "loss": 1.2455, |
| "loss_ce": 1.242870569229126, |
| "loss_region": 0.055044859647750854, |
| "loss_total": 1.2979154586791992, |
| "lr": 0.0012011858255266145, |
| "router/selected_tokens_s0": 431.25, |
| "router/selected_tokens_s1": 8.1875, |
| "step": 1080, |
| "tokens_trained": 3.538624848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30921985815602837, |
| "grad_norm": 3.531444549560547, |
| "loss": 1.2507, |
| "loss_ce": 1.1871229410171509, |
| "loss_region": 0.05430071800947189, |
| "loss_total": 1.2414236068725586, |
| "lr": 0.0012007789335427635, |
| "router/selected_tokens_s0": 819.5625, |
| "router/selected_tokens_s1": 5.9375, |
| "step": 1090, |
| "tokens_trained": 3.571390288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3120567375886525, |
| "grad_norm": 5.396289348602295, |
| "loss": 1.2621, |
| "loss_ce": 1.1988043785095215, |
| "loss_region": 0.050519246608018875, |
| "loss_total": 1.2493236064910889, |
| "lr": 0.0012003720415589125, |
| "router/selected_tokens_s0": 485.875, |
| "router/selected_tokens_s1": 3.9375, |
| "step": 1100, |
| "tokens_trained": 3.604155728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3148936170212766, |
| "grad_norm": 5.735856056213379, |
| "loss": 1.2515, |
| "loss_ce": 1.1599225997924805, |
| "loss_region": 0.05765972658991814, |
| "loss_total": 1.21758234500885, |
| "lr": 0.0011999651495750614, |
| "router/selected_tokens_s0": 320.125, |
| "router/selected_tokens_s1": 8.8125, |
| "step": 1110, |
| "tokens_trained": 3.636921168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3177304964539007, |
| "grad_norm": 2.786902904510498, |
| "loss": 1.2463, |
| "loss_ce": 1.1695066690444946, |
| "loss_region": 0.060196422040462494, |
| "loss_total": 1.2297030687332153, |
| "lr": 0.0011995582575912104, |
| "router/selected_tokens_s0": 550.0, |
| "router/selected_tokens_s1": 15.1875, |
| "step": 1120, |
| "tokens_trained": 3.669686584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32056737588652484, |
| "grad_norm": 7.254458427429199, |
| "loss": 1.254, |
| "loss_ce": 1.1615636348724365, |
| "loss_region": 0.06397742033004761, |
| "loss_total": 1.225541114807129, |
| "lr": 0.0011991513656073594, |
| "router/selected_tokens_s0": 454.4375, |
| "router/selected_tokens_s1": 17.5, |
| "step": 1130, |
| "tokens_trained": 3.702452024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32340425531914896, |
| "grad_norm": 5.802817344665527, |
| "loss": 1.2488, |
| "loss_ce": 1.1627715826034546, |
| "loss_region": 0.06256798654794693, |
| "loss_total": 1.2253395318984985, |
| "lr": 0.0011987444736235083, |
| "router/selected_tokens_s0": 577.75, |
| "router/selected_tokens_s1": 19.0, |
| "step": 1140, |
| "tokens_trained": 3.735217464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3262411347517731, |
| "grad_norm": 1.8852355480194092, |
| "loss": 1.2334, |
| "loss_ce": 1.2047169208526611, |
| "loss_region": 0.061925701797008514, |
| "loss_total": 1.2666425704956055, |
| "lr": 0.0011983375816396573, |
| "router/selected_tokens_s0": 625.875, |
| "router/selected_tokens_s1": 19.6875, |
| "step": 1150, |
| "tokens_trained": 3.767982904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32907801418439714, |
| "grad_norm": 6.557276725769043, |
| "loss": 1.2423, |
| "loss_ce": 1.1739070415496826, |
| "loss_region": 0.0680062547326088, |
| "loss_total": 1.2419133186340332, |
| "lr": 0.0011979306896558062, |
| "router/selected_tokens_s0": 354.0, |
| "router/selected_tokens_s1": 17.625, |
| "step": 1160, |
| "tokens_trained": 3.800748344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.33191489361702126, |
| "grad_norm": 3.2266619205474854, |
| "loss": 1.2305, |
| "loss_ce": 1.0980790853500366, |
| "loss_region": 0.05660313367843628, |
| "loss_total": 1.1546821594238281, |
| "lr": 0.0011975237976719552, |
| "router/selected_tokens_s0": 484.6875, |
| "router/selected_tokens_s1": 10.75, |
| "step": 1170, |
| "tokens_trained": 3.833513784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3347517730496454, |
| "grad_norm": 4.397397041320801, |
| "loss": 1.2297, |
| "loss_ce": 1.2073218822479248, |
| "loss_region": 0.049140799790620804, |
| "loss_total": 1.2564626932144165, |
| "lr": 0.0011971169056881042, |
| "router/selected_tokens_s0": 341.8125, |
| "router/selected_tokens_s1": 2.8125, |
| "step": 1180, |
| "tokens_trained": 3.866278424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3375886524822695, |
| "grad_norm": 4.493627071380615, |
| "loss": 1.2376, |
| "loss_ce": 1.1048059463500977, |
| "loss_region": 0.051098912954330444, |
| "loss_total": 1.1559048891067505, |
| "lr": 0.0011967100137042531, |
| "router/selected_tokens_s0": 546.4375, |
| "router/selected_tokens_s1": 4.75, |
| "step": 1190, |
| "tokens_trained": 3.899043864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3404255319148936, |
| "grad_norm": 6.189625263214111, |
| "loss": 1.2339, |
| "loss_ce": 1.1637437343597412, |
| "loss_region": 0.07028566300868988, |
| "loss_total": 1.2340294122695923, |
| "lr": 0.0011963031217204021, |
| "router/selected_tokens_s0": 495.8125, |
| "router/selected_tokens_s1": 26.0, |
| "step": 1200, |
| "tokens_trained": 3.931809304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3432624113475177, |
| "grad_norm": 5.87813663482666, |
| "loss": 1.242, |
| "loss_ce": 1.223149299621582, |
| "loss_region": 0.06371823698282242, |
| "loss_total": 1.2868674993515015, |
| "lr": 0.0011958962297365513, |
| "router/selected_tokens_s0": 239.8125, |
| "router/selected_tokens_s1": 10.125, |
| "step": 1210, |
| "tokens_trained": 3.964574744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.34609929078014184, |
| "grad_norm": 8.016247749328613, |
| "loss": 1.2328, |
| "loss_ce": 1.198567509651184, |
| "loss_region": 0.05910230800509453, |
| "loss_total": 1.2576698064804077, |
| "lr": 0.0011954893377527003, |
| "router/selected_tokens_s0": 410.75, |
| "router/selected_tokens_s1": 11.25, |
| "step": 1220, |
| "tokens_trained": 3.997337592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.34893617021276596, |
| "grad_norm": 6.139803886413574, |
| "loss": 1.2404, |
| "loss_ce": 1.1777026653289795, |
| "loss_region": 0.06743069738149643, |
| "loss_total": 1.245133399963379, |
| "lr": 0.0011950824457688492, |
| "router/selected_tokens_s0": 609.0625, |
| "router/selected_tokens_s1": 26.5, |
| "step": 1230, |
| "tokens_trained": 4.030103032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3517730496453901, |
| "grad_norm": 2.7356536388397217, |
| "loss": 1.2385, |
| "loss_ce": 1.1438438892364502, |
| "loss_region": 0.10062191635370255, |
| "loss_total": 1.2444658279418945, |
| "lr": 0.0011946755537849982, |
| "router/selected_tokens_s0": 515.625, |
| "router/selected_tokens_s1": 60.5625, |
| "step": 1240, |
| "tokens_trained": 4.062868472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3546099290780142, |
| "grad_norm": 3.0639026165008545, |
| "loss": 1.2257, |
| "loss_ce": 1.152791142463684, |
| "loss_region": 0.06103505194187164, |
| "loss_total": 1.2138261795043945, |
| "lr": 0.001194268661801147, |
| "router/selected_tokens_s0": 408.625, |
| "router/selected_tokens_s1": 13.875, |
| "step": 1250, |
| "tokens_trained": 4.095633912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3574468085106383, |
| "grad_norm": 4.941549777984619, |
| "loss": 1.247, |
| "loss_ce": 1.141486644744873, |
| "loss_region": 0.07407496869564056, |
| "loss_total": 1.2155616283416748, |
| "lr": 0.001193861769817296, |
| "router/selected_tokens_s0": 486.0625, |
| "router/selected_tokens_s1": 32.6875, |
| "step": 1260, |
| "tokens_trained": 4.128399352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36028368794326243, |
| "grad_norm": 2.038921356201172, |
| "loss": 1.2258, |
| "loss_ce": 1.1693215370178223, |
| "loss_region": 0.05073130875825882, |
| "loss_total": 1.2200528383255005, |
| "lr": 0.0011934548778334449, |
| "router/selected_tokens_s0": 396.9375, |
| "router/selected_tokens_s1": 4.3125, |
| "step": 1270, |
| "tokens_trained": 4.161163992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36312056737588655, |
| "grad_norm": 7.992796421051025, |
| "loss": 1.2398, |
| "loss_ce": 1.2495265007019043, |
| "loss_region": 0.04827826842665672, |
| "loss_total": 1.2978047132492065, |
| "lr": 0.001193047985849594, |
| "router/selected_tokens_s0": 209.0625, |
| "router/selected_tokens_s1": 2.125, |
| "step": 1280, |
| "tokens_trained": 4.193929432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3659574468085106, |
| "grad_norm": 7.719239711761475, |
| "loss": 1.2413, |
| "loss_ce": 1.2265716791152954, |
| "loss_region": 0.048719920217990875, |
| "loss_total": 1.2752915620803833, |
| "lr": 0.001192641093865743, |
| "router/selected_tokens_s0": 424.375, |
| "router/selected_tokens_s1": 2.375, |
| "step": 1290, |
| "tokens_trained": 4.226694872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36879432624113473, |
| "grad_norm": 3.3148317337036133, |
| "loss": 1.2322, |
| "loss_ce": 1.1886953115463257, |
| "loss_region": 0.06017257645726204, |
| "loss_total": 1.2488678693771362, |
| "lr": 0.001192234201881892, |
| "router/selected_tokens_s0": 436.1875, |
| "router/selected_tokens_s1": 14.1875, |
| "step": 1300, |
| "tokens_trained": 4.259455304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.37163120567375885, |
| "grad_norm": 3.881063938140869, |
| "loss": 1.2232, |
| "loss_ce": 1.1405267715454102, |
| "loss_region": 0.05613284558057785, |
| "loss_total": 1.1966595649719238, |
| "lr": 0.001191827309898041, |
| "router/selected_tokens_s0": 488.125, |
| "router/selected_tokens_s1": 10.0625, |
| "step": 1310, |
| "tokens_trained": 4.292217888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.37446808510638296, |
| "grad_norm": 3.223666191101074, |
| "loss": 1.2262, |
| "loss_ce": 1.1618560552597046, |
| "loss_region": 0.054282836616039276, |
| "loss_total": 1.2161388397216797, |
| "lr": 0.00119142041791419, |
| "router/selected_tokens_s0": 387.5625, |
| "router/selected_tokens_s1": 7.25, |
| "step": 1320, |
| "tokens_trained": 4.324979816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3773049645390071, |
| "grad_norm": 2.419039726257324, |
| "loss": 1.2177, |
| "loss_ce": 1.0862337350845337, |
| "loss_region": 0.060854170471429825, |
| "loss_total": 1.1470879316329956, |
| "lr": 0.0011910135259303389, |
| "router/selected_tokens_s0": 542.1875, |
| "router/selected_tokens_s1": 17.3125, |
| "step": 1330, |
| "tokens_trained": 4.357745256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3801418439716312, |
| "grad_norm": 1.7404555082321167, |
| "loss": 1.2162, |
| "loss_ce": 1.1373934745788574, |
| "loss_region": 0.06797235459089279, |
| "loss_total": 1.205365777015686, |
| "lr": 0.0011906066339464878, |
| "router/selected_tokens_s0": 492.875, |
| "router/selected_tokens_s1": 23.625, |
| "step": 1340, |
| "tokens_trained": 4.390510696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3829787234042553, |
| "grad_norm": 1.9988096952438354, |
| "loss": 1.209, |
| "loss_ce": 1.1519917249679565, |
| "loss_region": 0.06009136512875557, |
| "loss_total": 1.212083101272583, |
| "lr": 0.0011901997419626368, |
| "router/selected_tokens_s0": 459.75, |
| "router/selected_tokens_s1": 14.75, |
| "step": 1350, |
| "tokens_trained": 4.423276136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.38581560283687943, |
| "grad_norm": 2.7944321632385254, |
| "loss": 1.2114, |
| "loss_ce": 1.1149932146072388, |
| "loss_region": 0.059466730803251266, |
| "loss_total": 1.1744599342346191, |
| "lr": 0.0011897928499787858, |
| "router/selected_tokens_s0": 399.0625, |
| "router/selected_tokens_s1": 11.75, |
| "step": 1360, |
| "tokens_trained": 4.456041576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.38865248226950355, |
| "grad_norm": 2.893796920776367, |
| "loss": 1.2176, |
| "loss_ce": 1.1631447076797485, |
| "loss_region": 0.05681516230106354, |
| "loss_total": 1.2199598550796509, |
| "lr": 0.0011893859579949347, |
| "router/selected_tokens_s0": 431.6875, |
| "router/selected_tokens_s1": 9.875, |
| "step": 1370, |
| "tokens_trained": 4.488807016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.39148936170212767, |
| "grad_norm": 1.3144123554229736, |
| "loss": 1.2112, |
| "loss_ce": 1.0997382402420044, |
| "loss_region": 0.057619161903858185, |
| "loss_total": 1.1573574542999268, |
| "lr": 0.0011889790660110837, |
| "router/selected_tokens_s0": 530.75, |
| "router/selected_tokens_s1": 13.0, |
| "step": 1380, |
| "tokens_trained": 4.521572456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3943262411347518, |
| "grad_norm": 2.78426456451416, |
| "loss": 1.2116, |
| "loss_ce": 1.1366982460021973, |
| "loss_region": 0.0555262453854084, |
| "loss_total": 1.1922245025634766, |
| "lr": 0.0011885721740272327, |
| "router/selected_tokens_s0": 451.5625, |
| "router/selected_tokens_s1": 8.9375, |
| "step": 1390, |
| "tokens_trained": 4.554337096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3971631205673759, |
| "grad_norm": 1.5746723413467407, |
| "loss": 1.2185, |
| "loss_ce": 1.0210630893707275, |
| "loss_region": 0.0583292655646801, |
| "loss_total": 1.0793923139572144, |
| "lr": 0.0011881652820433816, |
| "router/selected_tokens_s0": 428.0, |
| "router/selected_tokens_s1": 11.125, |
| "step": 1400, |
| "tokens_trained": 4.587102536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4, |
| "grad_norm": 5.932886123657227, |
| "loss": 1.2196, |
| "loss_ce": 1.1898956298828125, |
| "loss_region": 0.05531156435608864, |
| "loss_total": 1.2452071905136108, |
| "lr": 0.0011877583900595306, |
| "router/selected_tokens_s0": 421.6875, |
| "router/selected_tokens_s1": 9.0, |
| "step": 1410, |
| "tokens_trained": 4.619867976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.40283687943262414, |
| "grad_norm": 5.935864448547363, |
| "loss": 1.222, |
| "loss_ce": 1.2237108945846558, |
| "loss_region": 0.054401297122240067, |
| "loss_total": 1.2781121730804443, |
| "lr": 0.0011873514980756796, |
| "router/selected_tokens_s0": 434.25, |
| "router/selected_tokens_s1": 8.0625, |
| "step": 1420, |
| "tokens_trained": 4.652632616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4056737588652482, |
| "grad_norm": 3.4950997829437256, |
| "loss": 1.2222, |
| "loss_ce": 1.1726809740066528, |
| "loss_region": 0.0537344329059124, |
| "loss_total": 1.2264153957366943, |
| "lr": 0.0011869446060918285, |
| "router/selected_tokens_s0": 370.75, |
| "router/selected_tokens_s1": 6.4375, |
| "step": 1430, |
| "tokens_trained": 4.685398056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4085106382978723, |
| "grad_norm": 2.04207444190979, |
| "loss": 1.2198, |
| "loss_ce": 1.1512529850006104, |
| "loss_region": 0.062247976660728455, |
| "loss_total": 1.2135009765625, |
| "lr": 0.0011865377141079775, |
| "router/selected_tokens_s0": 454.1875, |
| "router/selected_tokens_s1": 16.375, |
| "step": 1440, |
| "tokens_trained": 4.718163496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.41134751773049644, |
| "grad_norm": 1.2782090902328491, |
| "loss": 1.2156, |
| "loss_ce": 1.1050682067871094, |
| "loss_region": 0.061748046427965164, |
| "loss_total": 1.166816234588623, |
| "lr": 0.0011861308221241265, |
| "router/selected_tokens_s0": 433.9375, |
| "router/selected_tokens_s1": 15.875, |
| "step": 1450, |
| "tokens_trained": 4.750928936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.41418439716312055, |
| "grad_norm": 1.481460452079773, |
| "loss": 1.2037, |
| "loss_ce": 1.2169268131256104, |
| "loss_region": 0.061500489711761475, |
| "loss_total": 1.2784273624420166, |
| "lr": 0.0011857239301402756, |
| "router/selected_tokens_s0": 409.9375, |
| "router/selected_tokens_s1": 14.125, |
| "step": 1460, |
| "tokens_trained": 4.783694376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.41702127659574467, |
| "grad_norm": 1.3822318315505981, |
| "loss": 1.2076, |
| "loss_ce": 1.154876947402954, |
| "loss_region": 0.06019541248679161, |
| "loss_total": 1.2150723934173584, |
| "lr": 0.0011853170381564246, |
| "router/selected_tokens_s0": 414.875, |
| "router/selected_tokens_s1": 13.125, |
| "step": 1470, |
| "tokens_trained": 4.816459816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4198581560283688, |
| "grad_norm": 1.4973548650741577, |
| "loss": 1.2036, |
| "loss_ce": 1.1452707052230835, |
| "loss_region": 0.06044153869152069, |
| "loss_total": 1.2057121992111206, |
| "lr": 0.0011849101461725736, |
| "router/selected_tokens_s0": 500.1875, |
| "router/selected_tokens_s1": 15.6875, |
| "step": 1480, |
| "tokens_trained": 4.849225256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4226950354609929, |
| "grad_norm": 1.2402268648147583, |
| "loss": 1.2057, |
| "loss_ce": 1.1824591159820557, |
| "loss_region": 0.058981362730264664, |
| "loss_total": 1.2414405345916748, |
| "lr": 0.0011845032541887225, |
| "router/selected_tokens_s0": 480.1875, |
| "router/selected_tokens_s1": 14.125, |
| "step": 1490, |
| "tokens_trained": 4.881990696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.425531914893617, |
| "grad_norm": 1.4796574115753174, |
| "loss": 1.211, |
| "loss_ce": 1.1951260566711426, |
| "loss_region": 0.05882687494158745, |
| "loss_total": 1.253952980041504, |
| "lr": 0.0011840963622048713, |
| "router/selected_tokens_s0": 468.125, |
| "router/selected_tokens_s1": 13.0625, |
| "step": 1500, |
| "tokens_trained": 4.914756056 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "eval_ppl": 3.1502154013198798, |
| "eval_runtime": 2.0571, |
| "step": 1500, |
| "tokens_trained": 4.914756056 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "eval_F": 0.001229291011186336, |
| "eval_F_cds": 0.001293684305093882, |
| "eval_F_dig": 0.0017970153918274866, |
| "eval_F_exon": 0.0010736467294546147, |
| "eval_F_intron": 0.0011918489644829776, |
| "eval_F_nig": 0.001490433652565069, |
| "eval_F_promoter": 0.0010369949255679784, |
| "eval_F_utr": 0.0008313703755022863, |
| "eval_G": 0.017374615319094434, |
| "eval_G_cds": 0.019024816671907287, |
| "eval_G_dig": 0.0214062438960075, |
| "eval_G_exon": 0.014820481637583071, |
| "eval_G_intron": 0.017102886037205698, |
| "eval_G_nig": 0.01946365484807892, |
| "eval_G_promoter": 0.015690876278806876, |
| "eval_G_utr": 0.01441579077445348, |
| "eval_avg_bp_per_token": 813.4770293609672, |
| "eval_bp_per_token/cds": 772.9861111111111, |
| "eval_bp_per_token/dig": 556.4782608695652, |
| "eval_bp_per_token/exon": 931.4050632911392, |
| "eval_bp_per_token/intron": 839.0324863300096, |
| "eval_bp_per_token/nig": 670.9456662354463, |
| "eval_bp_per_token/promoter": 964.3248730964467, |
| "eval_bp_per_token/utr": 1202.8333333333333, |
| "eval_ppl_cds": 3.7561527389283587, |
| "eval_ppl_dig": 1.3154554554553453, |
| "eval_ppl_exon": 3.3617922872366606, |
| "eval_ppl_intron": 3.1668774100622223, |
| "eval_ppl_nig": 3.048275380960171, |
| "eval_ppl_promoter": 3.3705226274627504, |
| "eval_ppl_utr": 3.4453219090888463, |
| "step": 1500, |
| "tokens_trained": 4.914756056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.42836879432624114, |
| "grad_norm": 2.848538875579834, |
| "loss": 1.2073, |
| "loss_ce": 1.1491812467575073, |
| "loss_region": 0.06309393793344498, |
| "loss_total": 1.2122751474380493, |
| "lr": 0.0011836894702210202, |
| "router/selected_tokens_s0": 435.6875, |
| "router/selected_tokens_s1": 16.5, |
| "step": 1510, |
| "tokens_trained": 4.947521496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.43120567375886526, |
| "grad_norm": 9.449421882629395, |
| "loss": 1.2125, |
| "loss_ce": 1.211773157119751, |
| "loss_region": 0.05893857032060623, |
| "loss_total": 1.2707117795944214, |
| "lr": 0.0011832825782371692, |
| "router/selected_tokens_s0": 496.5, |
| "router/selected_tokens_s1": 13.625, |
| "step": 1520, |
| "tokens_trained": 4.980286136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4340425531914894, |
| "grad_norm": 4.181710720062256, |
| "loss": 1.2218, |
| "loss_ce": 1.162986397743225, |
| "loss_region": 0.054903510957956314, |
| "loss_total": 1.2178899049758911, |
| "lr": 0.0011828756862533184, |
| "router/selected_tokens_s0": 706.5625, |
| "router/selected_tokens_s1": 9.75, |
| "step": 1530, |
| "tokens_trained": 5.013051576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4368794326241135, |
| "grad_norm": 3.0098717212677, |
| "loss": 1.2173, |
| "loss_ce": 1.1538748741149902, |
| "loss_region": 0.05533435568213463, |
| "loss_total": 1.2092092037200928, |
| "lr": 0.0011824687942694674, |
| "router/selected_tokens_s0": 588.375, |
| "router/selected_tokens_s1": 10.25, |
| "step": 1540, |
| "tokens_trained": 5.045813688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4397163120567376, |
| "grad_norm": 2.5773324966430664, |
| "loss": 1.2122, |
| "loss_ce": 1.095015525817871, |
| "loss_region": 0.054704807698726654, |
| "loss_total": 1.149720311164856, |
| "lr": 0.0011820619022856163, |
| "router/selected_tokens_s0": 423.625, |
| "router/selected_tokens_s1": 8.375, |
| "step": 1550, |
| "tokens_trained": 5.078578552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4425531914893617, |
| "grad_norm": 3.0698676109313965, |
| "loss": 1.2121, |
| "loss_ce": 1.1329262256622314, |
| "loss_region": 0.05666619539260864, |
| "loss_total": 1.1895923614501953, |
| "lr": 0.0011816550103017653, |
| "router/selected_tokens_s0": 360.3125, |
| "router/selected_tokens_s1": 8.8125, |
| "step": 1560, |
| "tokens_trained": 5.111343992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4453900709219858, |
| "grad_norm": 0.7125281095504761, |
| "loss": 1.2112, |
| "loss_ce": 1.218764305114746, |
| "loss_region": 0.05678607523441315, |
| "loss_total": 1.275550365447998, |
| "lr": 0.0011812481183179143, |
| "router/selected_tokens_s0": 374.1875, |
| "router/selected_tokens_s1": 9.0, |
| "step": 1570, |
| "tokens_trained": 5.144109432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4482269503546099, |
| "grad_norm": 1.8508944511413574, |
| "loss": 1.2046, |
| "loss_ce": 1.1496455669403076, |
| "loss_region": 0.06071101874113083, |
| "loss_total": 1.210356593132019, |
| "lr": 0.0011808412263340632, |
| "router/selected_tokens_s0": 447.25, |
| "router/selected_tokens_s1": 14.4375, |
| "step": 1580, |
| "tokens_trained": 5.176874072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.451063829787234, |
| "grad_norm": 0.30530375242233276, |
| "loss": 1.207, |
| "loss_ce": 1.1380561590194702, |
| "loss_region": 0.05641654506325722, |
| "loss_total": 1.1944726705551147, |
| "lr": 0.0011804343343502122, |
| "router/selected_tokens_s0": 441.125, |
| "router/selected_tokens_s1": 10.3125, |
| "step": 1590, |
| "tokens_trained": 5.209639512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.45390070921985815, |
| "grad_norm": 2.42014479637146, |
| "loss": 1.2082, |
| "loss_ce": 1.1705315113067627, |
| "loss_region": 0.0601993128657341, |
| "loss_total": 1.2307307720184326, |
| "lr": 0.0011800274423663611, |
| "router/selected_tokens_s0": 471.625, |
| "router/selected_tokens_s1": 14.4375, |
| "step": 1600, |
| "tokens_trained": 5.242403352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.45673758865248226, |
| "grad_norm": 1.9463731050491333, |
| "loss": 1.2037, |
| "loss_ce": 1.137668251991272, |
| "loss_region": 0.059560373425483704, |
| "loss_total": 1.1972286701202393, |
| "lr": 0.0011796205503825101, |
| "router/selected_tokens_s0": 430.125, |
| "router/selected_tokens_s1": 13.375, |
| "step": 1610, |
| "tokens_trained": 5.275167992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4595744680851064, |
| "grad_norm": 1.944096326828003, |
| "loss": 1.2095, |
| "loss_ce": 1.1958094835281372, |
| "loss_region": 0.05815673992037773, |
| "loss_total": 1.253966212272644, |
| "lr": 0.001179213658398659, |
| "router/selected_tokens_s0": 438.0625, |
| "router/selected_tokens_s1": 11.875, |
| "step": 1620, |
| "tokens_trained": 5.307932632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4624113475177305, |
| "grad_norm": 0.5991163849830627, |
| "loss": 1.2036, |
| "loss_ce": 1.1603964567184448, |
| "loss_region": 0.058173276484012604, |
| "loss_total": 1.2185697555541992, |
| "lr": 0.001178806766414808, |
| "router/selected_tokens_s0": 421.25, |
| "router/selected_tokens_s1": 11.5625, |
| "step": 1630, |
| "tokens_trained": 5.340694016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4652482269503546, |
| "grad_norm": 1.0851240158081055, |
| "loss": 1.206, |
| "loss_ce": 1.1598323583602905, |
| "loss_region": 0.05617981776595116, |
| "loss_total": 1.2160121202468872, |
| "lr": 0.001178399874430957, |
| "router/selected_tokens_s0": 385.4375, |
| "router/selected_tokens_s1": 9.0, |
| "step": 1640, |
| "tokens_trained": 5.373459456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.46808510638297873, |
| "grad_norm": 0.9609947204589844, |
| "loss": 1.2045, |
| "loss_ce": 1.1532255411148071, |
| "loss_region": 0.06535308808088303, |
| "loss_total": 1.218578577041626, |
| "lr": 0.001177992982447106, |
| "router/selected_tokens_s0": 450.0625, |
| "router/selected_tokens_s1": 19.25, |
| "step": 1650, |
| "tokens_trained": 5.406224096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.47092198581560285, |
| "grad_norm": 0.9339040517807007, |
| "loss": 1.1979, |
| "loss_ce": 1.1834607124328613, |
| "loss_region": 0.06303829699754715, |
| "loss_total": 1.2464990615844727, |
| "lr": 0.001177586090463255, |
| "router/selected_tokens_s0": 362.5625, |
| "router/selected_tokens_s1": 14.1875, |
| "step": 1660, |
| "tokens_trained": 5.438988736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.47375886524822697, |
| "grad_norm": 1.7706341743469238, |
| "loss": 1.2017, |
| "loss_ce": 1.1299399137496948, |
| "loss_region": 0.059476908296346664, |
| "loss_total": 1.189416766166687, |
| "lr": 0.001177179198479404, |
| "router/selected_tokens_s0": 475.5, |
| "router/selected_tokens_s1": 13.875, |
| "step": 1670, |
| "tokens_trained": 5.471754176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4765957446808511, |
| "grad_norm": 1.8785125017166138, |
| "loss": 1.1999, |
| "loss_ce": 1.0933830738067627, |
| "loss_region": 0.06130226328969002, |
| "loss_total": 1.154685378074646, |
| "lr": 0.0011767723064955529, |
| "router/selected_tokens_s0": 452.5625, |
| "router/selected_tokens_s1": 15.125, |
| "step": 1680, |
| "tokens_trained": 5.504519616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4794326241134752, |
| "grad_norm": 1.1370394229888916, |
| "loss": 1.1996, |
| "loss_ce": 1.1579982042312622, |
| "loss_region": 0.05934907868504524, |
| "loss_total": 1.217347264289856, |
| "lr": 0.0011763654145117018, |
| "router/selected_tokens_s0": 394.25, |
| "router/selected_tokens_s1": 12.0625, |
| "step": 1690, |
| "tokens_trained": 5.537285056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.48226950354609927, |
| "grad_norm": 1.047006607055664, |
| "loss": 1.1999, |
| "loss_ce": 1.1407577991485596, |
| "loss_region": 0.06121063977479935, |
| "loss_total": 1.2019684314727783, |
| "lr": 0.0011759585225278508, |
| "router/selected_tokens_s0": 430.375, |
| "router/selected_tokens_s1": 14.9375, |
| "step": 1700, |
| "tokens_trained": 5.570050496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4851063829787234, |
| "grad_norm": 0.6814096570014954, |
| "loss": 1.2015, |
| "loss_ce": 1.1641886234283447, |
| "loss_region": 0.057673294097185135, |
| "loss_total": 1.2218619585037231, |
| "lr": 0.001175551630544, |
| "router/selected_tokens_s0": 437.875, |
| "router/selected_tokens_s1": 11.4375, |
| "step": 1710, |
| "tokens_trained": 5.602815936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4879432624113475, |
| "grad_norm": 1.2100872993469238, |
| "loss": 1.199, |
| "loss_ce": 1.1427383422851562, |
| "loss_region": 0.06006227061152458, |
| "loss_total": 1.2028006315231323, |
| "lr": 0.001175144738560149, |
| "router/selected_tokens_s0": 399.0, |
| "router/selected_tokens_s1": 12.875, |
| "step": 1720, |
| "tokens_trained": 5.635581376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4907801418439716, |
| "grad_norm": 2.063210964202881, |
| "loss": 1.1993, |
| "loss_ce": 1.1663732528686523, |
| "loss_region": 0.06088735908269882, |
| "loss_total": 1.2272605895996094, |
| "lr": 0.001174737846576298, |
| "router/selected_tokens_s0": 430.4375, |
| "router/selected_tokens_s1": 14.3125, |
| "step": 1730, |
| "tokens_trained": 5.668346816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49361702127659574, |
| "grad_norm": 2.0260696411132812, |
| "loss": 1.2099, |
| "loss_ce": 1.1854848861694336, |
| "loss_region": 0.06261169165372849, |
| "loss_total": 1.2480965852737427, |
| "lr": 0.0011743309545924469, |
| "router/selected_tokens_s0": 341.5, |
| "router/selected_tokens_s1": 13.1875, |
| "step": 1740, |
| "tokens_trained": 5.701112256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49645390070921985, |
| "grad_norm": 1.5324933528900146, |
| "loss": 1.2132, |
| "loss_ce": 1.183475136756897, |
| "loss_region": 0.06018225848674774, |
| "loss_total": 1.2436573505401611, |
| "lr": 0.0011739240626085956, |
| "router/selected_tokens_s0": 372.25, |
| "router/selected_tokens_s1": 12.1875, |
| "step": 1750, |
| "tokens_trained": 5.733877696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49929078014184397, |
| "grad_norm": 2.0786314010620117, |
| "loss": 1.206, |
| "loss_ce": 1.1014961004257202, |
| "loss_region": 0.059073276817798615, |
| "loss_total": 1.160569429397583, |
| "lr": 0.0011735171706247446, |
| "router/selected_tokens_s0": 516.875, |
| "router/selected_tokens_s1": 14.125, |
| "step": 1760, |
| "tokens_trained": 5.766643136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.502127659574468, |
| "grad_norm": 1.221889615058899, |
| "loss": 1.2006, |
| "loss_ce": 1.1406219005584717, |
| "loss_region": 0.05831628665328026, |
| "loss_total": 1.1989381313323975, |
| "lr": 0.0011731102786408936, |
| "router/selected_tokens_s0": 365.0625, |
| "router/selected_tokens_s1": 10.125, |
| "step": 1770, |
| "tokens_trained": 5.799407776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5049645390070922, |
| "grad_norm": 1.8206592798233032, |
| "loss": 1.2039, |
| "loss_ce": 1.197525143623352, |
| "loss_region": 0.06177800893783569, |
| "loss_total": 1.259303092956543, |
| "lr": 0.0011727033866570427, |
| "router/selected_tokens_s0": 393.5625, |
| "router/selected_tokens_s1": 14.0, |
| "step": 1780, |
| "tokens_trained": 5.832173216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5078014184397163, |
| "grad_norm": 1.2375364303588867, |
| "loss": 1.1982, |
| "loss_ce": 1.15640127658844, |
| "loss_region": 0.057916007936000824, |
| "loss_total": 1.2143173217773438, |
| "lr": 0.0011722964946731917, |
| "router/selected_tokens_s0": 469.875, |
| "router/selected_tokens_s1": 12.375, |
| "step": 1790, |
| "tokens_trained": 5.864938656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5106382978723404, |
| "grad_norm": 1.060533046722412, |
| "loss": 1.1963, |
| "loss_ce": 1.1505051851272583, |
| "loss_region": 0.05862096697092056, |
| "loss_total": 1.2091261148452759, |
| "lr": 0.0011718896026893407, |
| "router/selected_tokens_s0": 431.5625, |
| "router/selected_tokens_s1": 12.375, |
| "step": 1800, |
| "tokens_trained": 5.897704096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5134751773049645, |
| "grad_norm": 0.39916256070137024, |
| "loss": 1.194, |
| "loss_ce": 1.19837486743927, |
| "loss_region": 0.06013888120651245, |
| "loss_total": 1.2585136890411377, |
| "lr": 0.0011714827107054896, |
| "router/selected_tokens_s0": 451.6875, |
| "router/selected_tokens_s1": 14.25, |
| "step": 1810, |
| "tokens_trained": 5.930469536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5163120567375886, |
| "grad_norm": 2.187572479248047, |
| "loss": 1.1932, |
| "loss_ce": 1.1421654224395752, |
| "loss_region": 0.057629093527793884, |
| "loss_total": 1.1997945308685303, |
| "lr": 0.0011710758187216386, |
| "router/selected_tokens_s0": 468.0, |
| "router/selected_tokens_s1": 12.0, |
| "step": 1820, |
| "tokens_trained": 5.963234976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5191489361702127, |
| "grad_norm": 1.2308872938156128, |
| "loss": 1.1995, |
| "loss_ce": 1.0662407875061035, |
| "loss_region": 0.05716879665851593, |
| "loss_total": 1.123409628868103, |
| "lr": 0.0011706689267377876, |
| "router/selected_tokens_s0": 509.75, |
| "router/selected_tokens_s1": 12.25, |
| "step": 1830, |
| "tokens_trained": 5.996000416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5219858156028369, |
| "grad_norm": 1.3613312244415283, |
| "loss": 1.1964, |
| "loss_ce": 1.105545163154602, |
| "loss_region": 0.057579901069402695, |
| "loss_total": 1.1631250381469727, |
| "lr": 0.0011702620347539365, |
| "router/selected_tokens_s0": 426.125, |
| "router/selected_tokens_s1": 11.4375, |
| "step": 1840, |
| "tokens_trained": 6.02876568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.524822695035461, |
| "grad_norm": 1.1689107418060303, |
| "loss": 1.195, |
| "loss_ce": 1.1149792671203613, |
| "loss_region": 0.05797363072633743, |
| "loss_total": 1.1729528903961182, |
| "lr": 0.0011698551427700855, |
| "router/selected_tokens_s0": 453.5, |
| "router/selected_tokens_s1": 12.0, |
| "step": 1850, |
| "tokens_trained": 6.06153112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5276595744680851, |
| "grad_norm": 1.1174789667129517, |
| "loss": 1.2006, |
| "loss_ce": 1.137022614479065, |
| "loss_region": 0.056159064173698425, |
| "loss_total": 1.1931816339492798, |
| "lr": 0.0011694482507862345, |
| "router/selected_tokens_s0": 419.625, |
| "router/selected_tokens_s1": 10.0, |
| "step": 1860, |
| "tokens_trained": 6.09429256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5304964539007092, |
| "grad_norm": 1.199244737625122, |
| "loss": 1.1887, |
| "loss_ce": 1.1424124240875244, |
| "loss_region": 0.058031920343637466, |
| "loss_total": 1.2004443407058716, |
| "lr": 0.0011690413588023834, |
| "router/selected_tokens_s0": 445.5625, |
| "router/selected_tokens_s1": 12.0, |
| "step": 1870, |
| "tokens_trained": 6.127058 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.2622954845428467, |
| "loss": 1.1974, |
| "loss_ce": 1.158575177192688, |
| "loss_region": 0.059437863528728485, |
| "loss_total": 1.218013048171997, |
| "lr": 0.0011686344668185324, |
| "router/selected_tokens_s0": 399.75, |
| "router/selected_tokens_s1": 12.3125, |
| "step": 1880, |
| "tokens_trained": 6.15982344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5361702127659574, |
| "grad_norm": 1.0283706188201904, |
| "loss": 1.1981, |
| "loss_ce": 1.137675166130066, |
| "loss_region": 0.0630335733294487, |
| "loss_total": 1.2007087469100952, |
| "lr": 0.0011682275748346814, |
| "router/selected_tokens_s0": 346.75, |
| "router/selected_tokens_s1": 13.625, |
| "step": 1890, |
| "tokens_trained": 6.19258888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5390070921985816, |
| "grad_norm": 1.3320529460906982, |
| "loss": 1.1866, |
| "loss_ce": 1.1702394485473633, |
| "loss_region": 0.057980410754680634, |
| "loss_total": 1.2282198667526245, |
| "lr": 0.0011678206828508303, |
| "router/selected_tokens_s0": 374.8125, |
| "router/selected_tokens_s1": 10.1875, |
| "step": 1900, |
| "tokens_trained": 6.22535432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5418439716312057, |
| "grad_norm": 1.0204395055770874, |
| "loss": 1.1974, |
| "loss_ce": 1.1290398836135864, |
| "loss_region": 0.06040707230567932, |
| "loss_total": 1.1894469261169434, |
| "lr": 0.0011674137908669793, |
| "router/selected_tokens_s0": 506.0625, |
| "router/selected_tokens_s1": 15.625, |
| "step": 1910, |
| "tokens_trained": 6.25811976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5446808510638298, |
| "grad_norm": 1.0836961269378662, |
| "loss": 1.1942, |
| "loss_ce": 1.1287145614624023, |
| "loss_region": 0.061726104468107224, |
| "loss_total": 1.1904406547546387, |
| "lr": 0.0011670068988831283, |
| "router/selected_tokens_s0": 471.25, |
| "router/selected_tokens_s1": 16.25, |
| "step": 1920, |
| "tokens_trained": 6.2908852 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5475177304964539, |
| "grad_norm": 1.853095293045044, |
| "loss": 1.1915, |
| "loss_ce": 1.1728310585021973, |
| "loss_region": 0.06108897551894188, |
| "loss_total": 1.2339199781417847, |
| "lr": 0.0011666000068992772, |
| "router/selected_tokens_s0": 377.0625, |
| "router/selected_tokens_s1": 13.1875, |
| "step": 1930, |
| "tokens_trained": 6.32365064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.550354609929078, |
| "grad_norm": 0.9053633213043213, |
| "loss": 1.1988, |
| "loss_ce": 1.1357173919677734, |
| "loss_region": 0.058883197605609894, |
| "loss_total": 1.1946005821228027, |
| "lr": 0.0011661931149154262, |
| "router/selected_tokens_s0": 392.9375, |
| "router/selected_tokens_s1": 11.875, |
| "step": 1940, |
| "tokens_trained": 6.35641608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5531914893617021, |
| "grad_norm": 0.5790743827819824, |
| "loss": 1.193, |
| "loss_ce": 1.1007791757583618, |
| "loss_region": 0.06010182946920395, |
| "loss_total": 1.1608810424804688, |
| "lr": 0.0011657862229315751, |
| "router/selected_tokens_s0": 448.8125, |
| "router/selected_tokens_s1": 13.625, |
| "step": 1950, |
| "tokens_trained": 6.38918152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5560283687943263, |
| "grad_norm": 2.2684543132781982, |
| "loss": 1.1929, |
| "loss_ce": 1.06866455078125, |
| "loss_region": 0.05867019668221474, |
| "loss_total": 1.127334713935852, |
| "lr": 0.0011653793309477243, |
| "router/selected_tokens_s0": 506.125, |
| "router/selected_tokens_s1": 13.8125, |
| "step": 1960, |
| "tokens_trained": 6.42194696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5588652482269504, |
| "grad_norm": 1.153763771057129, |
| "loss": 1.1983, |
| "loss_ce": 1.112860083580017, |
| "loss_region": 0.05813064053654671, |
| "loss_total": 1.1709907054901123, |
| "lr": 0.0011649724389638733, |
| "router/selected_tokens_s0": 454.9375, |
| "router/selected_tokens_s1": 12.0625, |
| "step": 1970, |
| "tokens_trained": 6.4547124 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5617021276595745, |
| "grad_norm": 1.0854955911636353, |
| "loss": 1.1992, |
| "loss_ce": 1.1787145137786865, |
| "loss_region": 0.060824230313301086, |
| "loss_total": 1.2395387887954712, |
| "lr": 0.0011645655469800223, |
| "router/selected_tokens_s0": 456.375, |
| "router/selected_tokens_s1": 14.875, |
| "step": 1980, |
| "tokens_trained": 6.48747784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5645390070921986, |
| "grad_norm": 1.0839793682098389, |
| "loss": 1.1938, |
| "loss_ce": 1.1302708387374878, |
| "loss_region": 0.05648130550980568, |
| "loss_total": 1.186752200126648, |
| "lr": 0.0011641586549961712, |
| "router/selected_tokens_s0": 445.125, |
| "router/selected_tokens_s1": 10.3125, |
| "step": 1990, |
| "tokens_trained": 6.52024328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5673758865248227, |
| "grad_norm": 1.595931887626648, |
| "loss": 1.1913, |
| "loss_ce": 1.1068353652954102, |
| "loss_region": 0.058468256145715714, |
| "loss_total": 1.1653035879135132, |
| "lr": 0.00116375176301232, |
| "router/selected_tokens_s0": 455.125, |
| "router/selected_tokens_s1": 12.3125, |
| "step": 2000, |
| "tokens_trained": 6.55300872 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "eval_ppl": 3.1223812712711436, |
| "eval_runtime": 2.1243, |
| "step": 2000, |
| "tokens_trained": 6.55300872 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "eval_F": 0.0011084850378935854, |
| "eval_F_cds": 0.0012667325487377595, |
| "eval_F_dig": 0.0036330963356512227, |
| "eval_F_exon": 0.0010736467294546147, |
| "eval_F_intron": 0.001082592948118343, |
| "eval_F_nig": 0.0010864933547482748, |
| "eval_F_promoter": 0.0011212178636851747, |
| "eval_F_utr": 0.00111915242856077, |
| "eval_G": 0.016599176883909706, |
| "eval_G_cds": 0.021858506086604976, |
| "eval_G_dig": 0.02974574734842566, |
| "eval_G_exon": 0.014798423675948955, |
| "eval_G_intron": 0.016208956635712073, |
| "eval_G_nig": 0.017406812648344424, |
| "eval_G_promoter": 0.016065857011987292, |
| "eval_G_utr": 0.014424679875774081, |
| "eval_avg_bp_per_token": 902.1321585903083, |
| "eval_bp_per_token/cds": 789.4326241134752, |
| "eval_bp_per_token/dig": 275.247311827957, |
| "eval_bp_per_token/exon": 931.4050632911392, |
| "eval_bp_per_token/intron": 923.7082152974505, |
| "eval_bp_per_token/nig": 920.3921916592724, |
| "eval_bp_per_token/promoter": 891.887323943662, |
| "eval_bp_per_token/utr": 893.5333333333333, |
| "eval_ppl_cds": 3.745216707137465, |
| "eval_ppl_dig": 1.1877180783796364, |
| "eval_ppl_exon": 3.3494194089611438, |
| "eval_ppl_intron": 3.1455625493500388, |
| "eval_ppl_nig": 3.0053043691065953, |
| "eval_ppl_promoter": 3.3584421148317247, |
| "eval_ppl_utr": 3.4473742076146077, |
| "step": 2000, |
| "tokens_trained": 6.55300872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5702127659574469, |
| "grad_norm": 2.117267608642578, |
| "loss": 1.1965, |
| "loss_ce": 1.1464393138885498, |
| "loss_region": 0.059396736323833466, |
| "loss_total": 1.2058360576629639, |
| "lr": 0.001163344871028469, |
| "router/selected_tokens_s0": 464.8125, |
| "router/selected_tokens_s1": 13.375, |
| "step": 2010, |
| "tokens_trained": 6.58577416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.573049645390071, |
| "grad_norm": 2.818798303604126, |
| "loss": 1.2032, |
| "loss_ce": 1.1034953594207764, |
| "loss_region": 0.05682379752397537, |
| "loss_total": 1.160319209098816, |
| "lr": 0.001162937979044618, |
| "router/selected_tokens_s0": 477.375, |
| "router/selected_tokens_s1": 10.875, |
| "step": 2020, |
| "tokens_trained": 6.6185396 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5758865248226951, |
| "grad_norm": 2.917182207107544, |
| "loss": 1.201, |
| "loss_ce": 1.0508410930633545, |
| "loss_region": 0.05919254571199417, |
| "loss_total": 1.110033631324768, |
| "lr": 0.001162531087060767, |
| "router/selected_tokens_s0": 583.3125, |
| "router/selected_tokens_s1": 15.3125, |
| "step": 2030, |
| "tokens_trained": 6.65130504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5787234042553191, |
| "grad_norm": 0.7146549224853516, |
| "loss": 1.1938, |
| "loss_ce": 1.1160414218902588, |
| "loss_region": 0.0593046136200428, |
| "loss_total": 1.17534601688385, |
| "lr": 0.001162124195076916, |
| "router/selected_tokens_s0": 425.0, |
| "router/selected_tokens_s1": 12.9375, |
| "step": 2040, |
| "tokens_trained": 6.68407048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5815602836879432, |
| "grad_norm": 0.6383615136146545, |
| "loss": 1.1903, |
| "loss_ce": 1.1438560485839844, |
| "loss_region": 0.05890098586678505, |
| "loss_total": 1.2027570009231567, |
| "lr": 0.001161717303093065, |
| "router/selected_tokens_s0": 421.0, |
| "router/selected_tokens_s1": 12.1875, |
| "step": 2050, |
| "tokens_trained": 6.71683512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5843971631205673, |
| "grad_norm": 0.9087597131729126, |
| "loss": 1.1919, |
| "loss_ce": 1.1244920492172241, |
| "loss_region": 0.060024410486221313, |
| "loss_total": 1.184516429901123, |
| "lr": 0.001161310411109214, |
| "router/selected_tokens_s0": 444.1875, |
| "router/selected_tokens_s1": 14.25, |
| "step": 2060, |
| "tokens_trained": 6.74960056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5872340425531914, |
| "grad_norm": 0.4662902355194092, |
| "loss": 1.1897, |
| "loss_ce": 1.1663501262664795, |
| "loss_region": 0.06032518297433853, |
| "loss_total": 1.226675271987915, |
| "lr": 0.001160903519125363, |
| "router/selected_tokens_s0": 445.5, |
| "router/selected_tokens_s1": 14.125, |
| "step": 2070, |
| "tokens_trained": 6.782364224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5900709219858156, |
| "grad_norm": 1.13335382938385, |
| "loss": 1.1972, |
| "loss_ce": 1.1606361865997314, |
| "loss_region": 0.05782872810959816, |
| "loss_total": 1.218464970588684, |
| "lr": 0.001160496627141512, |
| "router/selected_tokens_s0": 456.8125, |
| "router/selected_tokens_s1": 11.9375, |
| "step": 2080, |
| "tokens_trained": 6.815129664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5929078014184397, |
| "grad_norm": 0.8821171522140503, |
| "loss": 1.1893, |
| "loss_ce": 1.2031593322753906, |
| "loss_region": 0.056816309690475464, |
| "loss_total": 1.2599756717681885, |
| "lr": 0.0011600897351576609, |
| "router/selected_tokens_s0": 408.1875, |
| "router/selected_tokens_s1": 9.8125, |
| "step": 2090, |
| "tokens_trained": 6.847895104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5957446808510638, |
| "grad_norm": 2.006725549697876, |
| "loss": 1.1914, |
| "loss_ce": 1.069053053855896, |
| "loss_region": 0.06600474566221237, |
| "loss_total": 1.135057806968689, |
| "lr": 0.0011596828431738098, |
| "router/selected_tokens_s0": 493.0625, |
| "router/selected_tokens_s1": 22.75, |
| "step": 2100, |
| "tokens_trained": 6.880660544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5985815602836879, |
| "grad_norm": 0.902592122554779, |
| "loss": 1.1893, |
| "loss_ce": 1.1250122785568237, |
| "loss_region": 0.061842553317546844, |
| "loss_total": 1.1868548393249512, |
| "lr": 0.0011592759511899588, |
| "router/selected_tokens_s0": 457.9375, |
| "router/selected_tokens_s1": 16.25, |
| "step": 2110, |
| "tokens_trained": 6.913423848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.601418439716312, |
| "grad_norm": 1.49269700050354, |
| "loss": 1.1907, |
| "loss_ce": 1.0372364521026611, |
| "loss_region": 0.05842522904276848, |
| "loss_total": 1.0956616401672363, |
| "lr": 0.0011588690592061078, |
| "router/selected_tokens_s0": 508.0625, |
| "router/selected_tokens_s1": 12.625, |
| "step": 2120, |
| "tokens_trained": 6.946189128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6042553191489362, |
| "grad_norm": 0.784643828868866, |
| "loss": 1.19, |
| "loss_ce": 1.148450255393982, |
| "loss_region": 0.05916881933808327, |
| "loss_total": 1.207619071006775, |
| "lr": 0.0011584621672222567, |
| "router/selected_tokens_s0": 464.8125, |
| "router/selected_tokens_s1": 13.5, |
| "step": 2130, |
| "tokens_trained": 6.978954568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6070921985815603, |
| "grad_norm": 1.5643503665924072, |
| "loss": 1.1929, |
| "loss_ce": 1.1538206338882446, |
| "loss_region": 0.056670140475034714, |
| "loss_total": 1.2104908227920532, |
| "lr": 0.0011580552752384057, |
| "router/selected_tokens_s0": 373.3125, |
| "router/selected_tokens_s1": 9.25, |
| "step": 2140, |
| "tokens_trained": 7.011720008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6099290780141844, |
| "grad_norm": 1.9936473369598389, |
| "loss": 1.1945, |
| "loss_ce": 1.0993125438690186, |
| "loss_region": 0.062473952770233154, |
| "loss_total": 1.1617865562438965, |
| "lr": 0.0011576483832545547, |
| "router/selected_tokens_s0": 536.8125, |
| "router/selected_tokens_s1": 19.0625, |
| "step": 2150, |
| "tokens_trained": 7.044485448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6127659574468085, |
| "grad_norm": 1.7021753787994385, |
| "loss": 1.1961, |
| "loss_ce": 1.1517091989517212, |
| "loss_region": 0.05705780163407326, |
| "loss_total": 1.208767056465149, |
| "lr": 0.0011572414912707036, |
| "router/selected_tokens_s0": 389.75, |
| "router/selected_tokens_s1": 9.875, |
| "step": 2160, |
| "tokens_trained": 7.077250888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6156028368794326, |
| "grad_norm": 0.9088471531867981, |
| "loss": 1.187, |
| "loss_ce": 1.1205592155456543, |
| "loss_region": 0.0625070258975029, |
| "loss_total": 1.1830662488937378, |
| "lr": 0.0011568345992868526, |
| "router/selected_tokens_s0": 414.3125, |
| "router/selected_tokens_s1": 15.5, |
| "step": 2170, |
| "tokens_trained": 7.110016328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6184397163120567, |
| "grad_norm": 0.7021254301071167, |
| "loss": 1.1904, |
| "loss_ce": 1.1545506715774536, |
| "loss_region": 0.05998982861638069, |
| "loss_total": 1.2145404815673828, |
| "lr": 0.0011564277073030016, |
| "router/selected_tokens_s0": 416.6875, |
| "router/selected_tokens_s1": 13.0625, |
| "step": 2180, |
| "tokens_trained": 7.1427786 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6212765957446809, |
| "grad_norm": 3.0147488117218018, |
| "loss": 1.191, |
| "loss_ce": 1.1359323263168335, |
| "loss_region": 0.05584647133946419, |
| "loss_total": 1.1917787790298462, |
| "lr": 0.0011560208153191505, |
| "router/selected_tokens_s0": 396.875, |
| "router/selected_tokens_s1": 8.8125, |
| "step": 2190, |
| "tokens_trained": 7.17554404 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.624113475177305, |
| "grad_norm": 2.2795827388763428, |
| "loss": 1.2038, |
| "loss_ce": 1.0853055715560913, |
| "loss_region": 0.06210076063871384, |
| "loss_total": 1.1474063396453857, |
| "lr": 0.0011556139233352995, |
| "router/selected_tokens_s0": 380.9375, |
| "router/selected_tokens_s1": 14.25, |
| "step": 2200, |
| "tokens_trained": 7.20830948 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6269503546099291, |
| "grad_norm": 1.8677184581756592, |
| "loss": 1.2053, |
| "loss_ce": 1.108307957649231, |
| "loss_region": 0.057143088430166245, |
| "loss_total": 1.1654510498046875, |
| "lr": 0.0011552070313514487, |
| "router/selected_tokens_s0": 272.5625, |
| "router/selected_tokens_s1": 7.6875, |
| "step": 2210, |
| "tokens_trained": 7.24107492 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6297872340425532, |
| "grad_norm": 0.9489080309867859, |
| "loss": 1.1974, |
| "loss_ce": 1.173215389251709, |
| "loss_region": 0.05955825746059418, |
| "loss_total": 1.2327736616134644, |
| "lr": 0.0011548001393675976, |
| "router/selected_tokens_s0": 375.0625, |
| "router/selected_tokens_s1": 11.875, |
| "step": 2220, |
| "tokens_trained": 7.27384036 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6326241134751773, |
| "grad_norm": 0.8159853219985962, |
| "loss": 1.1931, |
| "loss_ce": 1.1084233522415161, |
| "loss_region": 0.05950550734996796, |
| "loss_total": 1.1679288148880005, |
| "lr": 0.0011543932473837466, |
| "router/selected_tokens_s0": 468.75, |
| "router/selected_tokens_s1": 14.0, |
| "step": 2230, |
| "tokens_trained": 7.3066058 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6354609929078014, |
| "grad_norm": 1.2158466577529907, |
| "loss": 1.1847, |
| "loss_ce": 1.1424072980880737, |
| "loss_region": 0.061019301414489746, |
| "loss_total": 1.2034265995025635, |
| "lr": 0.0011539863553998956, |
| "router/selected_tokens_s0": 405.5, |
| "router/selected_tokens_s1": 13.75, |
| "step": 2240, |
| "tokens_trained": 7.33937124 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6382978723404256, |
| "grad_norm": 0.38309988379478455, |
| "loss": 1.1872, |
| "loss_ce": 1.0867244005203247, |
| "loss_region": 0.06173141673207283, |
| "loss_total": 1.1484558582305908, |
| "lr": 0.0011535794634160443, |
| "router/selected_tokens_s0": 432.4375, |
| "router/selected_tokens_s1": 14.9375, |
| "step": 2250, |
| "tokens_trained": 7.37213668 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6411347517730497, |
| "grad_norm": 1.231326937675476, |
| "loss": 1.1891, |
| "loss_ce": 1.1232706308364868, |
| "loss_region": 0.061620455235242844, |
| "loss_total": 1.1848911046981812, |
| "lr": 0.0011531725714321933, |
| "router/selected_tokens_s0": 425.25, |
| "router/selected_tokens_s1": 14.625, |
| "step": 2260, |
| "tokens_trained": 7.40490212 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6439716312056738, |
| "grad_norm": 0.6881319284439087, |
| "loss": 1.1885, |
| "loss_ce": 1.0646520853042603, |
| "loss_region": 0.061182402074337006, |
| "loss_total": 1.1258344650268555, |
| "lr": 0.0011527656794483422, |
| "router/selected_tokens_s0": 436.5, |
| "router/selected_tokens_s1": 14.375, |
| "step": 2270, |
| "tokens_trained": 7.43766756 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6468085106382979, |
| "grad_norm": 0.5453615784645081, |
| "loss": 1.1857, |
| "loss_ce": 1.183090329170227, |
| "loss_region": 0.06025899201631546, |
| "loss_total": 1.243349313735962, |
| "lr": 0.0011523587874644914, |
| "router/selected_tokens_s0": 389.5, |
| "router/selected_tokens_s1": 12.6875, |
| "step": 2280, |
| "tokens_trained": 7.470433 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.649645390070922, |
| "grad_norm": 0.4796663224697113, |
| "loss": 1.1928, |
| "loss_ce": 1.1268420219421387, |
| "loss_region": 0.058640334755182266, |
| "loss_total": 1.185482382774353, |
| "lr": 0.0011519518954806404, |
| "router/selected_tokens_s0": 434.125, |
| "router/selected_tokens_s1": 12.25, |
| "step": 2290, |
| "tokens_trained": 7.503197136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6524822695035462, |
| "grad_norm": 1.9199693202972412, |
| "loss": 1.1889, |
| "loss_ce": 1.1117569208145142, |
| "loss_region": 0.061376579105854034, |
| "loss_total": 1.1731334924697876, |
| "lr": 0.0011515450034967894, |
| "router/selected_tokens_s0": 396.0, |
| "router/selected_tokens_s1": 14.125, |
| "step": 2300, |
| "tokens_trained": 7.535962576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6553191489361702, |
| "grad_norm": 1.2376593351364136, |
| "loss": 1.1913, |
| "loss_ce": 1.1556870937347412, |
| "loss_region": 0.0595984049141407, |
| "loss_total": 1.2152855396270752, |
| "lr": 0.0011511381115129383, |
| "router/selected_tokens_s0": 356.375, |
| "router/selected_tokens_s1": 11.1875, |
| "step": 2310, |
| "tokens_trained": 7.568726416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6581560283687943, |
| "grad_norm": 0.6415485739707947, |
| "loss": 1.1884, |
| "loss_ce": 1.15511155128479, |
| "loss_region": 0.06067604944109917, |
| "loss_total": 1.215787649154663, |
| "lr": 0.0011507312195290873, |
| "router/selected_tokens_s0": 492.75, |
| "router/selected_tokens_s1": 15.625, |
| "step": 2320, |
| "tokens_trained": 7.601491056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6609929078014184, |
| "grad_norm": 0.5361623764038086, |
| "loss": 1.1881, |
| "loss_ce": 1.1369774341583252, |
| "loss_region": 0.06078004837036133, |
| "loss_total": 1.1977574825286865, |
| "lr": 0.0011503243275452363, |
| "router/selected_tokens_s0": 371.25, |
| "router/selected_tokens_s1": 12.6875, |
| "step": 2330, |
| "tokens_trained": 7.634256496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6638297872340425, |
| "grad_norm": 1.0889244079589844, |
| "loss": 1.1856, |
| "loss_ce": 1.1064252853393555, |
| "loss_region": 0.05778004229068756, |
| "loss_total": 1.1642053127288818, |
| "lr": 0.0011499174355613852, |
| "router/selected_tokens_s0": 391.8125, |
| "router/selected_tokens_s1": 10.3125, |
| "step": 2340, |
| "tokens_trained": 7.667021936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.2836039066314697, |
| "loss": 1.1873, |
| "loss_ce": 1.1070295572280884, |
| "loss_region": 0.05989352613687515, |
| "loss_total": 1.1669230461120605, |
| "lr": 0.0011495105435775342, |
| "router/selected_tokens_s0": 372.5625, |
| "router/selected_tokens_s1": 11.875, |
| "step": 2350, |
| "tokens_trained": 7.699787376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6695035460992907, |
| "grad_norm": 0.9303162097930908, |
| "loss": 1.1863, |
| "loss_ce": 1.1171040534973145, |
| "loss_region": 0.0614146888256073, |
| "loss_total": 1.1785187721252441, |
| "lr": 0.0011491036515936831, |
| "router/selected_tokens_s0": 455.9375, |
| "router/selected_tokens_s1": 15.5625, |
| "step": 2360, |
| "tokens_trained": 7.732552016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6723404255319149, |
| "grad_norm": 1.983481764793396, |
| "loss": 1.1806, |
| "loss_ce": 1.1478883028030396, |
| "loss_region": 0.05946619436144829, |
| "loss_total": 1.2073545455932617, |
| "lr": 0.0011486967596098321, |
| "router/selected_tokens_s0": 448.5, |
| "router/selected_tokens_s1": 13.375, |
| "step": 2370, |
| "tokens_trained": 7.765317456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.675177304964539, |
| "grad_norm": 1.7260289192199707, |
| "loss": 1.1894, |
| "loss_ce": 1.1961910724639893, |
| "loss_region": 0.05941138043999672, |
| "loss_total": 1.255602478981018, |
| "lr": 0.001148289867625981, |
| "router/selected_tokens_s0": 517.4375, |
| "router/selected_tokens_s1": 14.9375, |
| "step": 2380, |
| "tokens_trained": 7.798082896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6780141843971631, |
| "grad_norm": 1.469610571861267, |
| "loss": 1.1915, |
| "loss_ce": 1.075291395187378, |
| "loss_region": 0.058670733124017715, |
| "loss_total": 1.1339621543884277, |
| "lr": 0.00114788297564213, |
| "router/selected_tokens_s0": 471.375, |
| "router/selected_tokens_s1": 12.9375, |
| "step": 2390, |
| "tokens_trained": 7.830848336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6808510638297872, |
| "grad_norm": 0.5615185499191284, |
| "loss": 1.1925, |
| "loss_ce": 1.0627862215042114, |
| "loss_region": 0.05851579084992409, |
| "loss_total": 1.1213020086288452, |
| "lr": 0.001147476083658279, |
| "router/selected_tokens_s0": 436.0625, |
| "router/selected_tokens_s1": 12.125, |
| "step": 2400, |
| "tokens_trained": 7.863613776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6836879432624113, |
| "grad_norm": 0.3367394506931305, |
| "loss": 1.1881, |
| "loss_ce": 1.120520830154419, |
| "loss_region": 0.05808383971452713, |
| "loss_total": 1.1786047220230103, |
| "lr": 0.001147069191674428, |
| "router/selected_tokens_s0": 428.5, |
| "router/selected_tokens_s1": 11.4375, |
| "step": 2410, |
| "tokens_trained": 7.896379216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6865248226950355, |
| "grad_norm": 1.269070029258728, |
| "loss": 1.1858, |
| "loss_ce": 1.1046808958053589, |
| "loss_region": 0.06424061954021454, |
| "loss_total": 1.1689214706420898, |
| "lr": 0.001146662299690577, |
| "router/selected_tokens_s0": 459.5625, |
| "router/selected_tokens_s1": 19.0625, |
| "step": 2420, |
| "tokens_trained": 7.929144656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6893617021276596, |
| "grad_norm": 0.5868048667907715, |
| "loss": 1.1849, |
| "loss_ce": 1.170312762260437, |
| "loss_region": 0.05978027358651161, |
| "loss_total": 1.230093002319336, |
| "lr": 0.001146255407706726, |
| "router/selected_tokens_s0": 452.9375, |
| "router/selected_tokens_s1": 13.9375, |
| "step": 2430, |
| "tokens_trained": 7.961910096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6921985815602837, |
| "grad_norm": 1.2158846855163574, |
| "loss": 1.1891, |
| "loss_ce": 1.1489698886871338, |
| "loss_region": 0.05716322362422943, |
| "loss_total": 1.2061331272125244, |
| "lr": 0.0011458485157228749, |
| "router/selected_tokens_s0": 397.5, |
| "router/selected_tokens_s1": 9.9375, |
| "step": 2440, |
| "tokens_trained": 7.994675536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6950354609929078, |
| "grad_norm": 0.8825588822364807, |
| "loss": 1.1826, |
| "loss_ce": 1.0938084125518799, |
| "loss_region": 0.0573706328868866, |
| "loss_total": 1.1511790752410889, |
| "lr": 0.0011454416237390238, |
| "router/selected_tokens_s0": 475.25, |
| "router/selected_tokens_s1": 11.5625, |
| "step": 2450, |
| "tokens_trained": 8.027440976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6978723404255319, |
| "grad_norm": 1.0089329481124878, |
| "loss": 1.1837, |
| "loss_ce": 1.1076617240905762, |
| "loss_region": 0.0649326741695404, |
| "loss_total": 1.172594428062439, |
| "lr": 0.001145034731755173, |
| "router/selected_tokens_s0": 442.0, |
| "router/selected_tokens_s1": 19.3125, |
| "step": 2460, |
| "tokens_trained": 8.060206416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.700709219858156, |
| "grad_norm": 1.4214838743209839, |
| "loss": 1.1831, |
| "loss_ce": 1.1698490381240845, |
| "loss_region": 0.06358074396848679, |
| "loss_total": 1.2334297895431519, |
| "lr": 0.001144627839771322, |
| "router/selected_tokens_s0": 476.4375, |
| "router/selected_tokens_s1": 18.4375, |
| "step": 2470, |
| "tokens_trained": 8.092971856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7035460992907802, |
| "grad_norm": 0.659936249256134, |
| "loss": 1.1903, |
| "loss_ce": 1.1888149976730347, |
| "loss_region": 0.059051670134067535, |
| "loss_total": 1.2478666305541992, |
| "lr": 0.001144220947787471, |
| "router/selected_tokens_s0": 398.9375, |
| "router/selected_tokens_s1": 11.75, |
| "step": 2480, |
| "tokens_trained": 8.125737296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7063829787234043, |
| "grad_norm": 0.5858396887779236, |
| "loss": 1.1837, |
| "loss_ce": 1.1628644466400146, |
| "loss_region": 0.05618392676115036, |
| "loss_total": 1.2190483808517456, |
| "lr": 0.00114381405580362, |
| "router/selected_tokens_s0": 418.3125, |
| "router/selected_tokens_s1": 9.4375, |
| "step": 2490, |
| "tokens_trained": 8.158502728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7092198581560284, |
| "grad_norm": 0.7732462882995605, |
| "loss": 1.1838, |
| "loss_ce": 1.1414825916290283, |
| "loss_region": 0.05881210044026375, |
| "loss_total": 1.2002947330474854, |
| "lr": 0.0011434071638197687, |
| "router/selected_tokens_s0": 458.6875, |
| "router/selected_tokens_s1": 12.9375, |
| "step": 2500, |
| "tokens_trained": 8.191268168 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "eval_ppl": 3.088251523031909, |
| "eval_runtime": 2.0713, |
| "step": 2500, |
| "tokens_trained": 8.191268168 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "eval_F": 0.0013405429092626122, |
| "eval_F_cds": 0.0019225586200700745, |
| "eval_F_dig": 0.001992343151808735, |
| "eval_F_exon": 0.0010600562645248094, |
| "eval_F_intron": 0.0012945879553100724, |
| "eval_F_nig": 0.0013708904618030583, |
| "eval_F_promoter": 0.0013752026614448445, |
| "eval_F_utr": 0.0013536415088306456, |
| "eval_G": 0.016355751570530732, |
| "eval_G_cds": 0.017110347088087324, |
| "eval_G_dig": 0.024907799193296352, |
| "eval_G_exon": 0.01501345562203558, |
| "eval_G_intron": 0.01610771212822286, |
| "eval_G_nig": 0.017513512402485533, |
| "eval_G_promoter": 0.01555233561146511, |
| "eval_G_utr": 0.014783220840749938, |
| "eval_avg_bp_per_token": 745.9664238200824, |
| "eval_bp_per_token/cds": 520.1401869158879, |
| "eval_bp_per_token/dig": 501.921568627451, |
| "eval_bp_per_token/exon": 943.3461538461538, |
| "eval_bp_per_token/intron": 772.4465501924785, |
| "eval_bp_per_token/nig": 729.4528832630099, |
| "eval_bp_per_token/promoter": 727.1655502392344, |
| "eval_bp_per_token/utr": 738.7480314960629, |
| "eval_ppl_cds": 3.741853201441482, |
| "eval_ppl_dig": 1.134775489212344, |
| "eval_ppl_exon": 3.3239018881446203, |
| "eval_ppl_intron": 3.1123804579035923, |
| "eval_ppl_nig": 2.962110202365204, |
| "eval_ppl_promoter": 3.3370101406004844, |
| "eval_ppl_utr": 3.4205064986888787, |
| "step": 2500, |
| "tokens_trained": 8.191268168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7120567375886525, |
| "grad_norm": 0.7734953165054321, |
| "loss": 1.1844, |
| "loss_ce": 1.0916640758514404, |
| "loss_region": 0.06200540438294411, |
| "loss_total": 1.1536694765090942, |
| "lr": 0.0011430002718359176, |
| "router/selected_tokens_s0": 502.0, |
| "router/selected_tokens_s1": 17.75, |
| "step": 2510, |
| "tokens_trained": 8.224033608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7148936170212766, |
| "grad_norm": 1.9390835762023926, |
| "loss": 1.1836, |
| "loss_ce": 1.1503256559371948, |
| "loss_region": 0.05846535786986351, |
| "loss_total": 1.2087910175323486, |
| "lr": 0.0011425933798520666, |
| "router/selected_tokens_s0": 334.75, |
| "router/selected_tokens_s1": 9.4375, |
| "step": 2520, |
| "tokens_trained": 8.25679904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7177304964539007, |
| "grad_norm": 1.4347184896469116, |
| "loss": 1.1931, |
| "loss_ce": 1.1123408079147339, |
| "loss_region": 0.060282111167907715, |
| "loss_total": 1.1726229190826416, |
| "lr": 0.0011421864878682158, |
| "router/selected_tokens_s0": 568.0, |
| "router/selected_tokens_s1": 17.1875, |
| "step": 2530, |
| "tokens_trained": 8.28956448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7205673758865249, |
| "grad_norm": 0.858637273311615, |
| "loss": 1.1863, |
| "loss_ce": 1.1312373876571655, |
| "loss_region": 0.0616295225918293, |
| "loss_total": 1.1928669214248657, |
| "lr": 0.0011417795958843647, |
| "router/selected_tokens_s0": 501.75, |
| "router/selected_tokens_s1": 17.1875, |
| "step": 2540, |
| "tokens_trained": 8.322329544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.723404255319149, |
| "grad_norm": 1.4742088317871094, |
| "loss": 1.1857, |
| "loss_ce": 1.1528387069702148, |
| "loss_region": 0.05786595121026039, |
| "loss_total": 1.2107046842575073, |
| "lr": 0.0011413727039005137, |
| "router/selected_tokens_s0": 439.6875, |
| "router/selected_tokens_s1": 11.8125, |
| "step": 2550, |
| "tokens_trained": 8.355094984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7262411347517731, |
| "grad_norm": 0.3498552739620209, |
| "loss": 1.1811, |
| "loss_ce": 1.0615895986557007, |
| "loss_region": 0.06257026642560959, |
| "loss_total": 1.124159812927246, |
| "lr": 0.0011409658119166627, |
| "router/selected_tokens_s0": 490.8125, |
| "router/selected_tokens_s1": 17.8125, |
| "step": 2560, |
| "tokens_trained": 8.3878584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7290780141843972, |
| "grad_norm": 1.282585859298706, |
| "loss": 1.1838, |
| "loss_ce": 1.071786642074585, |
| "loss_region": 0.05918266996741295, |
| "loss_total": 1.1309692859649658, |
| "lr": 0.0011405589199328116, |
| "router/selected_tokens_s0": 412.4375, |
| "router/selected_tokens_s1": 12.25, |
| "step": 2570, |
| "tokens_trained": 8.42062384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7319148936170212, |
| "grad_norm": 0.8695186972618103, |
| "loss": 1.1852, |
| "loss_ce": 1.1701353788375854, |
| "loss_region": 0.05956278741359711, |
| "loss_total": 1.2296981811523438, |
| "lr": 0.0011401520279489606, |
| "router/selected_tokens_s0": 362.6875, |
| "router/selected_tokens_s1": 11.5, |
| "step": 2580, |
| "tokens_trained": 8.453389256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7347517730496453, |
| "grad_norm": 1.7512108087539673, |
| "loss": 1.1873, |
| "loss_ce": 1.0919519662857056, |
| "loss_region": 0.06304050981998444, |
| "loss_total": 1.1549924612045288, |
| "lr": 0.0011397451359651096, |
| "router/selected_tokens_s0": 493.125, |
| "router/selected_tokens_s1": 18.8125, |
| "step": 2590, |
| "tokens_trained": 8.486154696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7375886524822695, |
| "grad_norm": 1.4450966119766235, |
| "loss": 1.1868, |
| "loss_ce": 1.1768208742141724, |
| "loss_region": 0.06446579843759537, |
| "loss_total": 1.2412866353988647, |
| "lr": 0.0011393382439812585, |
| "router/selected_tokens_s0": 372.875, |
| "router/selected_tokens_s1": 16.125, |
| "step": 2600, |
| "tokens_trained": 8.518920136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7404255319148936, |
| "grad_norm": 0.3035755753517151, |
| "loss": 1.1798, |
| "loss_ce": 1.1247832775115967, |
| "loss_region": 0.05857830122113228, |
| "loss_total": 1.183361530303955, |
| "lr": 0.0011389313519974075, |
| "router/selected_tokens_s0": 464.9375, |
| "router/selected_tokens_s1": 12.9375, |
| "step": 2610, |
| "tokens_trained": 8.551685576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7432624113475177, |
| "grad_norm": 1.2541152238845825, |
| "loss": 1.1852, |
| "loss_ce": 1.1951887607574463, |
| "loss_region": 0.06222536042332649, |
| "loss_total": 1.2574141025543213, |
| "lr": 0.0011385244600135565, |
| "router/selected_tokens_s0": 338.875, |
| "router/selected_tokens_s1": 12.875, |
| "step": 2620, |
| "tokens_trained": 8.584451016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7460992907801418, |
| "grad_norm": 0.5121674537658691, |
| "loss": 1.1806, |
| "loss_ce": 1.0926810503005981, |
| "loss_region": 0.05847754701972008, |
| "loss_total": 1.1511585712432861, |
| "lr": 0.0011381175680297054, |
| "router/selected_tokens_s0": 476.875, |
| "router/selected_tokens_s1": 12.875, |
| "step": 2630, |
| "tokens_trained": 8.617213968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7489361702127659, |
| "grad_norm": 1.1699979305267334, |
| "loss": 1.1829, |
| "loss_ce": 1.0987080335617065, |
| "loss_region": 0.06216296926140785, |
| "loss_total": 1.1608710289001465, |
| "lr": 0.0011377106760458544, |
| "router/selected_tokens_s0": 442.9375, |
| "router/selected_tokens_s1": 16.3125, |
| "step": 2640, |
| "tokens_trained": 8.649979408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.75177304964539, |
| "grad_norm": 0.7619943618774414, |
| "loss": 1.1842, |
| "loss_ce": 1.1102006435394287, |
| "loss_region": 0.05994445085525513, |
| "loss_total": 1.170145034790039, |
| "lr": 0.0011373037840620034, |
| "router/selected_tokens_s0": 414.1875, |
| "router/selected_tokens_s1": 12.9375, |
| "step": 2650, |
| "tokens_trained": 8.682744848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7546099290780142, |
| "grad_norm": 0.7444802522659302, |
| "loss": 1.1803, |
| "loss_ce": 1.1419343948364258, |
| "loss_region": 0.06051338091492653, |
| "loss_total": 1.202447772026062, |
| "lr": 0.0011368968920781523, |
| "router/selected_tokens_s0": 428.5625, |
| "router/selected_tokens_s1": 13.9375, |
| "step": 2660, |
| "tokens_trained": 8.715510288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7574468085106383, |
| "grad_norm": 1.5269882678985596, |
| "loss": 1.1867, |
| "loss_ce": 1.1233625411987305, |
| "loss_region": 0.0623200386762619, |
| "loss_total": 1.1856825351715088, |
| "lr": 0.0011364900000943013, |
| "router/selected_tokens_s0": 402.25, |
| "router/selected_tokens_s1": 15.0625, |
| "step": 2670, |
| "tokens_trained": 8.748275728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7602836879432624, |
| "grad_norm": 0.7747791409492493, |
| "loss": 1.1861, |
| "loss_ce": 1.15846586227417, |
| "loss_region": 0.05765581130981445, |
| "loss_total": 1.2161216735839844, |
| "lr": 0.0011360831081104503, |
| "router/selected_tokens_s0": 354.5, |
| "router/selected_tokens_s1": 9.1875, |
| "step": 2680, |
| "tokens_trained": 8.781041168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7631205673758865, |
| "grad_norm": 1.0417836904525757, |
| "loss": 1.1889, |
| "loss_ce": 1.1638587713241577, |
| "loss_region": 0.057558320462703705, |
| "loss_total": 1.2214170694351196, |
| "lr": 0.0011356762161265992, |
| "router/selected_tokens_s0": 439.75, |
| "router/selected_tokens_s1": 11.0, |
| "step": 2690, |
| "tokens_trained": 8.813806448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7659574468085106, |
| "grad_norm": 1.2498610019683838, |
| "loss": 1.1761, |
| "loss_ce": 1.1044461727142334, |
| "loss_region": 0.05698206275701523, |
| "loss_total": 1.1614282131195068, |
| "lr": 0.0011352693241427482, |
| "router/selected_tokens_s0": 423.875, |
| "router/selected_tokens_s1": 10.5, |
| "step": 2700, |
| "tokens_trained": 8.846571888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7687943262411348, |
| "grad_norm": 0.5136662125587463, |
| "loss": 1.1832, |
| "loss_ce": 1.072133183479309, |
| "loss_region": 0.05974389612674713, |
| "loss_total": 1.131877064704895, |
| "lr": 0.0011348624321588974, |
| "router/selected_tokens_s0": 457.4375, |
| "router/selected_tokens_s1": 13.875, |
| "step": 2710, |
| "tokens_trained": 8.879337328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7716312056737589, |
| "grad_norm": 1.5680474042892456, |
| "loss": 1.1811, |
| "loss_ce": 1.1259905099868774, |
| "loss_region": 0.06005231291055679, |
| "loss_total": 1.1860427856445312, |
| "lr": 0.0011344555401750463, |
| "router/selected_tokens_s0": 474.875, |
| "router/selected_tokens_s1": 14.6875, |
| "step": 2720, |
| "tokens_trained": 8.912102768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.774468085106383, |
| "grad_norm": 1.0861411094665527, |
| "loss": 1.1884, |
| "loss_ce": 1.1043437719345093, |
| "loss_region": 0.06049416959285736, |
| "loss_total": 1.1648379564285278, |
| "lr": 0.0011340486481911953, |
| "router/selected_tokens_s0": 508.75, |
| "router/selected_tokens_s1": 15.8125, |
| "step": 2730, |
| "tokens_trained": 8.944868208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7773049645390071, |
| "grad_norm": 0.502753496170044, |
| "loss": 1.1859, |
| "loss_ce": 1.155367136001587, |
| "loss_region": 0.06546095013618469, |
| "loss_total": 1.2208280563354492, |
| "lr": 0.001133641756207344, |
| "router/selected_tokens_s0": 431.5625, |
| "router/selected_tokens_s1": 19.0625, |
| "step": 2740, |
| "tokens_trained": 8.977633592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7801418439716312, |
| "grad_norm": 0.863655149936676, |
| "loss": 1.1828, |
| "loss_ce": 1.136635661125183, |
| "loss_region": 0.0639290139079094, |
| "loss_total": 1.2005646228790283, |
| "lr": 0.001133234864223493, |
| "router/selected_tokens_s0": 403.4375, |
| "router/selected_tokens_s1": 16.4375, |
| "step": 2750, |
| "tokens_trained": 9.010399032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7829787234042553, |
| "grad_norm": 1.6406680345535278, |
| "loss": 1.1787, |
| "loss_ce": 1.0808862447738647, |
| "loss_region": 0.05786607041954994, |
| "loss_total": 1.1387523412704468, |
| "lr": 0.001132827972239642, |
| "router/selected_tokens_s0": 499.5625, |
| "router/selected_tokens_s1": 12.75, |
| "step": 2760, |
| "tokens_trained": 9.043164472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7858156028368795, |
| "grad_norm": 1.7592346668243408, |
| "loss": 1.1895, |
| "loss_ce": 1.1217154264450073, |
| "loss_region": 0.057540249079465866, |
| "loss_total": 1.179255723953247, |
| "lr": 0.001132421080255791, |
| "router/selected_tokens_s0": 420.25, |
| "router/selected_tokens_s1": 10.5625, |
| "step": 2770, |
| "tokens_trained": 9.075929912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7886524822695036, |
| "grad_norm": 2.4455196857452393, |
| "loss": 1.1903, |
| "loss_ce": 1.0879745483398438, |
| "loss_region": 0.0601075105369091, |
| "loss_total": 1.1480820178985596, |
| "lr": 0.0011320141882719401, |
| "router/selected_tokens_s0": 386.9375, |
| "router/selected_tokens_s1": 11.625, |
| "step": 2780, |
| "tokens_trained": 9.108695352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7914893617021277, |
| "grad_norm": 0.37144532799720764, |
| "loss": 1.1867, |
| "loss_ce": 1.0730124711990356, |
| "loss_region": 0.05783551186323166, |
| "loss_total": 1.1308479309082031, |
| "lr": 0.001131607296288089, |
| "router/selected_tokens_s0": 433.375, |
| "router/selected_tokens_s1": 11.375, |
| "step": 2790, |
| "tokens_trained": 9.141456136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7943262411347518, |
| "grad_norm": 0.5975992679595947, |
| "loss": 1.1831, |
| "loss_ce": 1.1580114364624023, |
| "loss_region": 0.05659972503781319, |
| "loss_total": 1.2146111726760864, |
| "lr": 0.001131200404304238, |
| "router/selected_tokens_s0": 395.875, |
| "router/selected_tokens_s1": 9.25, |
| "step": 2800, |
| "tokens_trained": 9.174221576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7971631205673759, |
| "grad_norm": 0.7188255786895752, |
| "loss": 1.1828, |
| "loss_ce": 1.1139250993728638, |
| "loss_region": 0.059663042426109314, |
| "loss_total": 1.1735881567001343, |
| "lr": 0.001130793512320387, |
| "router/selected_tokens_s0": 467.5, |
| "router/selected_tokens_s1": 14.25, |
| "step": 2810, |
| "tokens_trained": 9.206987016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8, |
| "grad_norm": 0.35444915294647217, |
| "loss": 1.1823, |
| "loss_ce": 1.0711995363235474, |
| "loss_region": 0.0630360022187233, |
| "loss_total": 1.1342355012893677, |
| "lr": 0.001130386620336536, |
| "router/selected_tokens_s0": 442.3125, |
| "router/selected_tokens_s1": 16.75, |
| "step": 2820, |
| "tokens_trained": 9.239752456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8028368794326242, |
| "grad_norm": 1.3028233051300049, |
| "loss": 1.1818, |
| "loss_ce": 1.1242021322250366, |
| "loss_region": 0.06099163740873337, |
| "loss_total": 1.1851937770843506, |
| "lr": 0.001129979728352685, |
| "router/selected_tokens_s0": 402.5625, |
| "router/selected_tokens_s1": 13.875, |
| "step": 2830, |
| "tokens_trained": 9.272517896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8056737588652483, |
| "grad_norm": 1.156672716140747, |
| "loss": 1.1866, |
| "loss_ce": 1.125891923904419, |
| "loss_region": 0.05897829309105873, |
| "loss_total": 1.1848702430725098, |
| "lr": 0.001129572836368834, |
| "router/selected_tokens_s0": 472.3125, |
| "router/selected_tokens_s1": 13.4375, |
| "step": 2840, |
| "tokens_trained": 9.305283336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8085106382978723, |
| "grad_norm": 0.48895296454429626, |
| "loss": 1.1846, |
| "loss_ce": 1.1702710390090942, |
| "loss_region": 0.061904530972242355, |
| "loss_total": 1.232175588607788, |
| "lr": 0.0011291659443849829, |
| "router/selected_tokens_s0": 442.25, |
| "router/selected_tokens_s1": 15.75, |
| "step": 2850, |
| "tokens_trained": 9.338048776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8113475177304964, |
| "grad_norm": 0.7052562832832336, |
| "loss": 1.1794, |
| "loss_ce": 1.1244450807571411, |
| "loss_region": 0.06674501299858093, |
| "loss_total": 1.1911901235580444, |
| "lr": 0.0011287590524011318, |
| "router/selected_tokens_s0": 462.25, |
| "router/selected_tokens_s1": 21.8125, |
| "step": 2860, |
| "tokens_trained": 9.370814216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8141843971631205, |
| "grad_norm": 0.4975396394729614, |
| "loss": 1.1789, |
| "loss_ce": 1.1839637756347656, |
| "loss_region": 0.059558697044849396, |
| "loss_total": 1.2435225248336792, |
| "lr": 0.0011283521604172808, |
| "router/selected_tokens_s0": 375.125, |
| "router/selected_tokens_s1": 11.8125, |
| "step": 2870, |
| "tokens_trained": 9.403579656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8170212765957446, |
| "grad_norm": 1.2806240320205688, |
| "loss": 1.1816, |
| "loss_ce": 1.0861955881118774, |
| "loss_region": 0.059288810938596725, |
| "loss_total": 1.145484447479248, |
| "lr": 0.0011279452684334298, |
| "router/selected_tokens_s0": 510.375, |
| "router/selected_tokens_s1": 14.625, |
| "step": 2880, |
| "tokens_trained": 9.436345096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8198581560283688, |
| "grad_norm": 0.5983365774154663, |
| "loss": 1.1837, |
| "loss_ce": 1.1588997840881348, |
| "loss_region": 0.059880323708057404, |
| "loss_total": 1.2187801599502563, |
| "lr": 0.0011275383764495787, |
| "router/selected_tokens_s0": 381.0625, |
| "router/selected_tokens_s1": 12.0625, |
| "step": 2890, |
| "tokens_trained": 9.469110536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8226950354609929, |
| "grad_norm": 1.5357625484466553, |
| "loss": 1.1892, |
| "loss_ce": 1.138158917427063, |
| "loss_region": 0.058603640645742416, |
| "loss_total": 1.1967625617980957, |
| "lr": 0.0011271314844657277, |
| "router/selected_tokens_s0": 493.5, |
| "router/selected_tokens_s1": 13.3125, |
| "step": 2900, |
| "tokens_trained": 9.501875176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.825531914893617, |
| "grad_norm": 1.3005295991897583, |
| "loss": 1.1821, |
| "loss_ce": 1.1475462913513184, |
| "loss_region": 0.0588202029466629, |
| "loss_total": 1.2063665390014648, |
| "lr": 0.0011267245924818767, |
| "router/selected_tokens_s0": 450.0, |
| "router/selected_tokens_s1": 12.625, |
| "step": 2910, |
| "tokens_trained": 9.534640608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8283687943262411, |
| "grad_norm": 1.562443494796753, |
| "loss": 1.1826, |
| "loss_ce": 1.0995811223983765, |
| "loss_region": 0.059245094656944275, |
| "loss_total": 1.158826231956482, |
| "lr": 0.0011263177004980256, |
| "router/selected_tokens_s0": 436.25, |
| "router/selected_tokens_s1": 12.75, |
| "step": 2920, |
| "tokens_trained": 9.567405248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8312056737588652, |
| "grad_norm": 1.1646053791046143, |
| "loss": 1.1852, |
| "loss_ce": 1.1184066534042358, |
| "loss_region": 0.06138136610388756, |
| "loss_total": 1.1797879934310913, |
| "lr": 0.0011259108085141746, |
| "router/selected_tokens_s0": 416.6875, |
| "router/selected_tokens_s1": 14.25, |
| "step": 2930, |
| "tokens_trained": 9.600170688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8340425531914893, |
| "grad_norm": 1.9767482280731201, |
| "loss": 1.1857, |
| "loss_ce": 1.1399308443069458, |
| "loss_region": 0.05815236270427704, |
| "loss_total": 1.1980831623077393, |
| "lr": 0.0011255039165303236, |
| "router/selected_tokens_s0": 421.5625, |
| "router/selected_tokens_s1": 11.1875, |
| "step": 2940, |
| "tokens_trained": 9.632932368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8368794326241135, |
| "grad_norm": 0.7609412670135498, |
| "loss": 1.1829, |
| "loss_ce": 1.115827202796936, |
| "loss_region": 0.058818917721509933, |
| "loss_total": 1.1746461391448975, |
| "lr": 0.0011250970245464725, |
| "router/selected_tokens_s0": 552.375, |
| "router/selected_tokens_s1": 14.5, |
| "step": 2950, |
| "tokens_trained": 9.665697808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8397163120567376, |
| "grad_norm": 1.6597883701324463, |
| "loss": 1.1808, |
| "loss_ce": 1.1614501476287842, |
| "loss_region": 0.05823326110839844, |
| "loss_total": 1.2196834087371826, |
| "lr": 0.0011246901325626217, |
| "router/selected_tokens_s0": 361.875, |
| "router/selected_tokens_s1": 9.8125, |
| "step": 2960, |
| "tokens_trained": 9.698463248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8425531914893617, |
| "grad_norm": 1.3326839208602905, |
| "loss": 1.1815, |
| "loss_ce": 1.0941112041473389, |
| "loss_region": 0.05796293914318085, |
| "loss_total": 1.1520740985870361, |
| "lr": 0.0011242832405787707, |
| "router/selected_tokens_s0": 536.125, |
| "router/selected_tokens_s1": 13.25, |
| "step": 2970, |
| "tokens_trained": 9.731228528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8453900709219858, |
| "grad_norm": 1.1672005653381348, |
| "loss": 1.183, |
| "loss_ce": 1.1921038627624512, |
| "loss_region": 0.06101511791348457, |
| "loss_total": 1.2531189918518066, |
| "lr": 0.0011238763485949196, |
| "router/selected_tokens_s0": 378.5, |
| "router/selected_tokens_s1": 13.0625, |
| "step": 2980, |
| "tokens_trained": 9.763992368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8482269503546099, |
| "grad_norm": 2.5261764526367188, |
| "loss": 1.1802, |
| "loss_ce": 1.0772920846939087, |
| "loss_region": 0.06559628993272781, |
| "loss_total": 1.1428884267807007, |
| "lr": 0.0011234694566110684, |
| "router/selected_tokens_s0": 440.75, |
| "router/selected_tokens_s1": 19.5625, |
| "step": 2990, |
| "tokens_trained": 9.796757008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.851063829787234, |
| "grad_norm": 1.7378714084625244, |
| "loss": 1.1819, |
| "loss_ce": 1.1687030792236328, |
| "loss_region": 0.06797832995653152, |
| "loss_total": 1.2366814613342285, |
| "lr": 0.0011230625646272174, |
| "router/selected_tokens_s0": 416.25, |
| "router/selected_tokens_s1": 21.375, |
| "step": 3000, |
| "tokens_trained": 9.829522448 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "eval_ppl": 3.074431694876341, |
| "eval_runtime": 2.047, |
| "step": 3000, |
| "tokens_trained": 9.829522448 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "eval_F": 0.0011498860304983065, |
| "eval_F_cds": 0.0007995687718983021, |
| "eval_F_dig": 0.002109539807797484, |
| "eval_F_exon": 0.0011823704488930567, |
| "eval_F_intron": 0.0011857152933888225, |
| "eval_F_nig": 0.0012426707491309018, |
| "eval_F_promoter": 0.0009290842861053208, |
| "eval_F_utr": 0.0010445422666567187, |
| "eval_G": 0.011293016012815836, |
| "eval_G_cds": 0.010612061051343095, |
| "eval_G_dig": 0.018157775387725603, |
| "eval_G_exon": 0.010093654229692448, |
| "eval_G_intron": 0.011264918238164315, |
| "eval_G_nig": 0.01205549809200873, |
| "eval_G_promoter": 0.010501267435957667, |
| "eval_G_utr": 0.009932436168341843, |
| "eval_avg_bp_per_token": 869.6514032496307, |
| "eval_bp_per_token/cds": 1250.6741573033707, |
| "eval_bp_per_token/dig": 474.037037037037, |
| "eval_bp_per_token/exon": 845.7586206896551, |
| "eval_bp_per_token/intron": 843.3727772389266, |
| "eval_bp_per_token/nig": 804.7183863460046, |
| "eval_bp_per_token/promoter": 1076.328611898017, |
| "eval_bp_per_token/utr": 957.3571428571429, |
| "eval_ppl_cds": 3.727089112409837, |
| "eval_ppl_dig": 1.1362078085496616, |
| "eval_ppl_exon": 3.3139976145435175, |
| "eval_ppl_intron": 3.099196725149016, |
| "eval_ppl_nig": 2.941734635306602, |
| "eval_ppl_promoter": 3.3293378412790497, |
| "eval_ppl_utr": 3.420161176488646, |
| "step": 3000, |
| "tokens_trained": 9.829522448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8539007092198582, |
| "grad_norm": 1.2796287536621094, |
| "loss": 1.1722, |
| "loss_ce": 1.1145899295806885, |
| "loss_region": 0.06270541250705719, |
| "loss_total": 1.1772953271865845, |
| "lr": 0.0011226556726433663, |
| "router/selected_tokens_s0": 373.0625, |
| "router/selected_tokens_s1": 14.375, |
| "step": 3010, |
| "tokens_trained": 9.862287888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8567375886524823, |
| "grad_norm": 1.7585965394973755, |
| "loss": 1.1785, |
| "loss_ce": 1.1197354793548584, |
| "loss_region": 0.057484500110149384, |
| "loss_total": 1.1772199869155884, |
| "lr": 0.0011222487806595153, |
| "router/selected_tokens_s0": 470.375, |
| "router/selected_tokens_s1": 11.4375, |
| "step": 3020, |
| "tokens_trained": 9.895048312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8595744680851064, |
| "grad_norm": 2.1293466091156006, |
| "loss": 1.1776, |
| "loss_ce": 1.1695561408996582, |
| "loss_region": 0.05918756127357483, |
| "loss_total": 1.2287436723709106, |
| "lr": 0.0011218418886756645, |
| "router/selected_tokens_s0": 343.0625, |
| "router/selected_tokens_s1": 10.5, |
| "step": 3030, |
| "tokens_trained": 9.927812952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8624113475177305, |
| "grad_norm": 0.6708946228027344, |
| "loss": 1.1847, |
| "loss_ce": 1.1524713039398193, |
| "loss_region": 0.057949285954236984, |
| "loss_total": 1.2104206085205078, |
| "lr": 0.0011214349966918134, |
| "router/selected_tokens_s0": 389.5625, |
| "router/selected_tokens_s1": 10.3125, |
| "step": 3040, |
| "tokens_trained": 9.960578392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8652482269503546, |
| "grad_norm": 0.22883817553520203, |
| "loss": 1.1884, |
| "loss_ce": 1.0983341932296753, |
| "loss_region": 0.06179222837090492, |
| "loss_total": 1.1601264476776123, |
| "lr": 0.0011210281047079624, |
| "router/selected_tokens_s0": 488.625, |
| "router/selected_tokens_s1": 16.875, |
| "step": 3050, |
| "tokens_trained": 9.993343832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8680851063829788, |
| "grad_norm": 1.4661550521850586, |
| "loss": 1.1803, |
| "loss_ce": 1.1707799434661865, |
| "loss_region": 0.059745948761701584, |
| "loss_total": 1.2305258512496948, |
| "lr": 0.0011206212127241114, |
| "router/selected_tokens_s0": 356.0, |
| "router/selected_tokens_s1": 11.625, |
| "step": 3060, |
| "tokens_trained": 10.026108952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8709219858156029, |
| "grad_norm": 1.0446758270263672, |
| "loss": 1.1806, |
| "loss_ce": 1.1076890230178833, |
| "loss_region": 0.05960114300251007, |
| "loss_total": 1.167290210723877, |
| "lr": 0.0011202143207402603, |
| "router/selected_tokens_s0": 411.1875, |
| "router/selected_tokens_s1": 12.5, |
| "step": 3070, |
| "tokens_trained": 10.058874392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.873758865248227, |
| "grad_norm": 3.738612413406372, |
| "loss": 1.1938, |
| "loss_ce": 1.1079699993133545, |
| "loss_region": 0.06472290307283401, |
| "loss_total": 1.172692894935608, |
| "lr": 0.0011198074287564093, |
| "router/selected_tokens_s0": 560.8125, |
| "router/selected_tokens_s1": 22.5, |
| "step": 3080, |
| "tokens_trained": 10.091635328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8765957446808511, |
| "grad_norm": 0.6973010897636414, |
| "loss": 1.1884, |
| "loss_ce": 1.148849368095398, |
| "loss_region": 0.06155673786997795, |
| "loss_total": 1.2104060649871826, |
| "lr": 0.0011194005367725583, |
| "router/selected_tokens_s0": 433.1875, |
| "router/selected_tokens_s1": 15.25, |
| "step": 3090, |
| "tokens_trained": 10.124400768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8794326241134752, |
| "grad_norm": 1.7843979597091675, |
| "loss": 1.1861, |
| "loss_ce": 1.1582818031311035, |
| "loss_region": 0.05559933930635452, |
| "loss_total": 1.2138811349868774, |
| "lr": 0.0011189936447887072, |
| "router/selected_tokens_s0": 460.5625, |
| "router/selected_tokens_s1": 9.125, |
| "step": 3100, |
| "tokens_trained": 10.157166208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8822695035460993, |
| "grad_norm": 3.1261589527130127, |
| "loss": 1.182, |
| "loss_ce": 1.1174323558807373, |
| "loss_region": 0.05768623203039169, |
| "loss_total": 1.1751185655593872, |
| "lr": 0.0011185867528048562, |
| "router/selected_tokens_s0": 536.625, |
| "router/selected_tokens_s1": 13.0625, |
| "step": 3110, |
| "tokens_trained": 10.189931648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8851063829787233, |
| "grad_norm": 1.7496775388717651, |
| "loss": 1.1924, |
| "loss_ce": 1.1259160041809082, |
| "loss_region": 0.056387342512607574, |
| "loss_total": 1.1823033094406128, |
| "lr": 0.0011181798608210052, |
| "router/selected_tokens_s0": 313.0625, |
| "router/selected_tokens_s1": 7.125, |
| "step": 3120, |
| "tokens_trained": 10.22269608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8879432624113475, |
| "grad_norm": 2.1885197162628174, |
| "loss": 1.1868, |
| "loss_ce": 1.0467259883880615, |
| "loss_region": 0.06346448510885239, |
| "loss_total": 1.110190510749817, |
| "lr": 0.0011177729688371541, |
| "router/selected_tokens_s0": 419.4375, |
| "router/selected_tokens_s1": 17.1875, |
| "step": 3130, |
| "tokens_trained": 10.25546152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8907801418439716, |
| "grad_norm": 2.0042295455932617, |
| "loss": 1.1805, |
| "loss_ce": 1.1999831199645996, |
| "loss_region": 0.06402087956666946, |
| "loss_total": 1.2640039920806885, |
| "lr": 0.001117366076853303, |
| "router/selected_tokens_s0": 401.5, |
| "router/selected_tokens_s1": 16.25, |
| "step": 3140, |
| "tokens_trained": 10.28822616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8936170212765957, |
| "grad_norm": 1.7117962837219238, |
| "loss": 1.1863, |
| "loss_ce": 1.0752744674682617, |
| "loss_region": 0.055134840309619904, |
| "loss_total": 1.1304093599319458, |
| "lr": 0.001116959184869452, |
| "router/selected_tokens_s0": 383.25, |
| "router/selected_tokens_s1": 7.5, |
| "step": 3150, |
| "tokens_trained": 10.3209916 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8964539007092198, |
| "grad_norm": 2.2417776584625244, |
| "loss": 1.1847, |
| "loss_ce": 1.133635401725769, |
| "loss_region": 0.05906910449266434, |
| "loss_total": 1.1927045583724976, |
| "lr": 0.001116552292885601, |
| "router/selected_tokens_s0": 470.125, |
| "router/selected_tokens_s1": 13.5, |
| "step": 3160, |
| "tokens_trained": 10.35375624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8992907801418439, |
| "grad_norm": 1.591855525970459, |
| "loss": 1.1846, |
| "loss_ce": 1.0943703651428223, |
| "loss_region": 0.05788113921880722, |
| "loss_total": 1.1522514820098877, |
| "lr": 0.00111614540090175, |
| "router/selected_tokens_s0": 473.875, |
| "router/selected_tokens_s1": 11.6875, |
| "step": 3170, |
| "tokens_trained": 10.38652168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.902127659574468, |
| "grad_norm": 1.1587814092636108, |
| "loss": 1.1926, |
| "loss_ce": 1.195311427116394, |
| "loss_region": 0.06113569065928459, |
| "loss_total": 1.2564470767974854, |
| "lr": 0.001115738508917899, |
| "router/selected_tokens_s0": 375.3125, |
| "router/selected_tokens_s1": 13.3125, |
| "step": 3180, |
| "tokens_trained": 10.41928712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9049645390070922, |
| "grad_norm": 0.9598585367202759, |
| "loss": 1.1911, |
| "loss_ce": 1.082224726676941, |
| "loss_region": 0.057053092867136, |
| "loss_total": 1.1392778158187866, |
| "lr": 0.001115331616934048, |
| "router/selected_tokens_s0": 362.1875, |
| "router/selected_tokens_s1": 8.8125, |
| "step": 3190, |
| "tokens_trained": 10.45205256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9078014184397163, |
| "grad_norm": 3.3285744190216064, |
| "loss": 1.1946, |
| "loss_ce": 1.1597694158554077, |
| "loss_region": 0.055617451667785645, |
| "loss_total": 1.2153868675231934, |
| "lr": 0.0011149247249501969, |
| "router/selected_tokens_s0": 449.625, |
| "router/selected_tokens_s1": 8.875, |
| "step": 3200, |
| "tokens_trained": 10.4848172 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9106382978723404, |
| "grad_norm": 2.8464155197143555, |
| "loss": 1.1819, |
| "loss_ce": 1.1424304246902466, |
| "loss_region": 0.07084633409976959, |
| "loss_total": 1.213276743888855, |
| "lr": 0.001114517832966346, |
| "router/selected_tokens_s0": 359.3125, |
| "router/selected_tokens_s1": 19.8125, |
| "step": 3210, |
| "tokens_trained": 10.51758264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9134751773049645, |
| "grad_norm": 2.721564769744873, |
| "loss": 1.1914, |
| "loss_ce": 1.173911690711975, |
| "loss_region": 0.057439640164375305, |
| "loss_total": 1.231351375579834, |
| "lr": 0.001114110940982495, |
| "router/selected_tokens_s0": 363.3125, |
| "router/selected_tokens_s1": 9.125, |
| "step": 3220, |
| "tokens_trained": 10.55034808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9163120567375886, |
| "grad_norm": 1.964784026145935, |
| "loss": 1.1948, |
| "loss_ce": 1.1454870700836182, |
| "loss_region": 0.06161702051758766, |
| "loss_total": 1.2071040868759155, |
| "lr": 0.001113704048998644, |
| "router/selected_tokens_s0": 264.3125, |
| "router/selected_tokens_s1": 9.5625, |
| "step": 3230, |
| "tokens_trained": 10.58311352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9191489361702128, |
| "grad_norm": 1.3745988607406616, |
| "loss": 1.1871, |
| "loss_ce": 1.1452701091766357, |
| "loss_region": 0.06286661326885223, |
| "loss_total": 1.2081366777420044, |
| "lr": 0.0011132971570147927, |
| "router/selected_tokens_s0": 394.5625, |
| "router/selected_tokens_s1": 15.125, |
| "step": 3240, |
| "tokens_trained": 10.61587896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9219858156028369, |
| "grad_norm": 2.1371731758117676, |
| "loss": 1.1914, |
| "loss_ce": 1.1208454370498657, |
| "loss_region": 0.060756195336580276, |
| "loss_total": 1.181601643562317, |
| "lr": 0.0011128902650309417, |
| "router/selected_tokens_s0": 399.25, |
| "router/selected_tokens_s1": 13.25, |
| "step": 3250, |
| "tokens_trained": 10.6486444 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.924822695035461, |
| "grad_norm": 0.8851431608200073, |
| "loss": 1.1752, |
| "loss_ce": 1.127838373184204, |
| "loss_region": 0.059403836727142334, |
| "loss_total": 1.1872422695159912, |
| "lr": 0.0011124833730470907, |
| "router/selected_tokens_s0": 485.4375, |
| "router/selected_tokens_s1": 14.3125, |
| "step": 3260, |
| "tokens_trained": 10.68140984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9276595744680851, |
| "grad_norm": 1.363264560699463, |
| "loss": 1.1837, |
| "loss_ce": 1.0901366472244263, |
| "loss_region": 0.05801825225353241, |
| "loss_total": 1.148154854774475, |
| "lr": 0.0011120764810632396, |
| "router/selected_tokens_s0": 623.9375, |
| "router/selected_tokens_s1": 14.0625, |
| "step": 3270, |
| "tokens_trained": 10.714171944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9304964539007092, |
| "grad_norm": 2.8988099098205566, |
| "loss": 1.1867, |
| "loss_ce": 1.0907230377197266, |
| "loss_region": 0.06304143369197845, |
| "loss_total": 1.1537644863128662, |
| "lr": 0.0011116695890793888, |
| "router/selected_tokens_s0": 524.875, |
| "router/selected_tokens_s1": 19.875, |
| "step": 3280, |
| "tokens_trained": 10.746937384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9333333333333333, |
| "grad_norm": 1.667448878288269, |
| "loss": 1.1761, |
| "loss_ce": 1.166186809539795, |
| "loss_region": 0.06140407547354698, |
| "loss_total": 1.2275909185409546, |
| "lr": 0.0011112626970955378, |
| "router/selected_tokens_s0": 421.0, |
| "router/selected_tokens_s1": 14.8125, |
| "step": 3290, |
| "tokens_trained": 10.779702824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9361702127659575, |
| "grad_norm": 2.053053379058838, |
| "loss": 1.1802, |
| "loss_ce": 1.2236586809158325, |
| "loss_region": 0.062042418867349625, |
| "loss_total": 1.2857011556625366, |
| "lr": 0.0011108558051116867, |
| "router/selected_tokens_s0": 396.5625, |
| "router/selected_tokens_s1": 14.5, |
| "step": 3300, |
| "tokens_trained": 10.812468264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9390070921985816, |
| "grad_norm": 2.3373043537139893, |
| "loss": 1.1879, |
| "loss_ce": 1.0431127548217773, |
| "loss_region": 0.05739426985383034, |
| "loss_total": 1.1005070209503174, |
| "lr": 0.0011104489131278357, |
| "router/selected_tokens_s0": 408.5625, |
| "router/selected_tokens_s1": 10.125, |
| "step": 3310, |
| "tokens_trained": 10.845230648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9418439716312057, |
| "grad_norm": 2.5947089195251465, |
| "loss": 1.1842, |
| "loss_ce": 1.0695154666900635, |
| "loss_region": 0.05876738205552101, |
| "loss_total": 1.128282904624939, |
| "lr": 0.0011100420211439847, |
| "router/selected_tokens_s0": 479.875, |
| "router/selected_tokens_s1": 13.125, |
| "step": 3320, |
| "tokens_trained": 10.877996088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9446808510638298, |
| "grad_norm": 1.015472412109375, |
| "loss": 1.1902, |
| "loss_ce": 1.1727052927017212, |
| "loss_region": 0.06721828877925873, |
| "loss_total": 1.2399235963821411, |
| "lr": 0.0011096351291601336, |
| "router/selected_tokens_s0": 426.25, |
| "router/selected_tokens_s1": 21.0625, |
| "step": 3330, |
| "tokens_trained": 10.910761528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9475177304964539, |
| "grad_norm": 3.790830135345459, |
| "loss": 1.1912, |
| "loss_ce": 1.1549181938171387, |
| "loss_region": 0.06554245948791504, |
| "loss_total": 1.2204606533050537, |
| "lr": 0.0011092282371762826, |
| "router/selected_tokens_s0": 472.25, |
| "router/selected_tokens_s1": 20.9375, |
| "step": 3340, |
| "tokens_trained": 10.943526968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.950354609929078, |
| "grad_norm": 2.207831382751465, |
| "loss": 1.1935, |
| "loss_ce": 1.1465940475463867, |
| "loss_region": 0.05767892673611641, |
| "loss_total": 1.204272985458374, |
| "lr": 0.0011088213451924316, |
| "router/selected_tokens_s0": 309.375, |
| "router/selected_tokens_s1": 8.25, |
| "step": 3350, |
| "tokens_trained": 10.976292408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9531914893617022, |
| "grad_norm": 1.879736304283142, |
| "loss": 1.1895, |
| "loss_ce": 1.075555682182312, |
| "loss_region": 0.061205778270959854, |
| "loss_total": 1.1367614269256592, |
| "lr": 0.0011084144532085805, |
| "router/selected_tokens_s0": 389.4375, |
| "router/selected_tokens_s1": 13.375, |
| "step": 3360, |
| "tokens_trained": 11.009057848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9560283687943263, |
| "grad_norm": 2.642979145050049, |
| "loss": 1.1893, |
| "loss_ce": 1.0544582605361938, |
| "loss_region": 0.06224330514669418, |
| "loss_total": 1.116701602935791, |
| "lr": 0.0011080075612247295, |
| "router/selected_tokens_s0": 490.5625, |
| "router/selected_tokens_s1": 17.0625, |
| "step": 3370, |
| "tokens_trained": 11.041823288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9588652482269504, |
| "grad_norm": 1.8629803657531738, |
| "loss": 1.1888, |
| "loss_ce": 1.0984424352645874, |
| "loss_region": 0.060682639479637146, |
| "loss_total": 1.1591250896453857, |
| "lr": 0.0011076006692408785, |
| "router/selected_tokens_s0": 426.3125, |
| "router/selected_tokens_s1": 14.25, |
| "step": 3380, |
| "tokens_trained": 11.074587928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9617021276595744, |
| "grad_norm": 2.3181028366088867, |
| "loss": 1.1818, |
| "loss_ce": 1.116972804069519, |
| "loss_region": 0.0578857958316803, |
| "loss_total": 1.174858570098877, |
| "lr": 0.0011071937772570274, |
| "router/selected_tokens_s0": 447.1875, |
| "router/selected_tokens_s1": 11.5, |
| "step": 3390, |
| "tokens_trained": 11.107353368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9645390070921985, |
| "grad_norm": 1.6701099872589111, |
| "loss": 1.1952, |
| "loss_ce": 1.1640697717666626, |
| "loss_region": 0.05918049067258835, |
| "loss_total": 1.2232502698898315, |
| "lr": 0.0011067868852731764, |
| "router/selected_tokens_s0": 487.875, |
| "router/selected_tokens_s1": 13.5, |
| "step": 3400, |
| "tokens_trained": 11.140118808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9673758865248226, |
| "grad_norm": 2.3079371452331543, |
| "loss": 1.1839, |
| "loss_ce": 1.1166282892227173, |
| "loss_region": 0.06041375920176506, |
| "loss_total": 1.177042007446289, |
| "lr": 0.0011063799932893254, |
| "router/selected_tokens_s0": 495.375, |
| "router/selected_tokens_s1": 16.0, |
| "step": 3410, |
| "tokens_trained": 11.172884248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9702127659574468, |
| "grad_norm": 0.47914406657218933, |
| "loss": 1.188, |
| "loss_ce": 1.1150635480880737, |
| "loss_region": 0.058126792311668396, |
| "loss_total": 1.1731903553009033, |
| "lr": 0.0011059731013054743, |
| "router/selected_tokens_s0": 524.125, |
| "router/selected_tokens_s1": 12.9375, |
| "step": 3420, |
| "tokens_trained": 11.205649688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9730496453900709, |
| "grad_norm": 4.547239780426025, |
| "loss": 1.1843, |
| "loss_ce": 1.2131940126419067, |
| "loss_region": 0.0646928995847702, |
| "loss_total": 1.2778868675231934, |
| "lr": 0.0011055662093216233, |
| "router/selected_tokens_s0": 486.5, |
| "router/selected_tokens_s1": 20.4375, |
| "step": 3430, |
| "tokens_trained": 11.238415128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.975886524822695, |
| "grad_norm": 3.8095717430114746, |
| "loss": 1.2016, |
| "loss_ce": 1.221601128578186, |
| "loss_region": 0.058637771755456924, |
| "loss_total": 1.2802388668060303, |
| "lr": 0.0011051593173377723, |
| "router/selected_tokens_s0": 254.4375, |
| "router/selected_tokens_s1": 7.75, |
| "step": 3440, |
| "tokens_trained": 11.27117724 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9787234042553191, |
| "grad_norm": 1.4747904539108276, |
| "loss": 1.1841, |
| "loss_ce": 1.0546537637710571, |
| "loss_region": 0.06348768621683121, |
| "loss_total": 1.1181414127349854, |
| "lr": 0.0011047524253539212, |
| "router/selected_tokens_s0": 416.625, |
| "router/selected_tokens_s1": 16.9375, |
| "step": 3450, |
| "tokens_trained": 11.30394268 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9815602836879432, |
| "grad_norm": 3.052072525024414, |
| "loss": 1.1922, |
| "loss_ce": 1.1165164709091187, |
| "loss_region": 0.06090632081031799, |
| "loss_total": 1.1774227619171143, |
| "lr": 0.0011043455333700704, |
| "router/selected_tokens_s0": 375.1875, |
| "router/selected_tokens_s1": 12.25, |
| "step": 3460, |
| "tokens_trained": 11.33670812 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9843971631205674, |
| "grad_norm": 0.5006700158119202, |
| "loss": 1.1902, |
| "loss_ce": 1.1183781623840332, |
| "loss_region": 0.055539172142744064, |
| "loss_total": 1.173917293548584, |
| "lr": 0.0011039386413862194, |
| "router/selected_tokens_s0": 613.75, |
| "router/selected_tokens_s1": 9.875, |
| "step": 3470, |
| "tokens_trained": 11.369471136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9872340425531915, |
| "grad_norm": 0.7260355949401855, |
| "loss": 1.18, |
| "loss_ce": 1.0984702110290527, |
| "loss_region": 0.06133873388171196, |
| "loss_total": 1.1598089933395386, |
| "lr": 0.0011035317494023683, |
| "router/selected_tokens_s0": 591.1875, |
| "router/selected_tokens_s1": 18.8125, |
| "step": 3480, |
| "tokens_trained": 11.402236576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9900709219858156, |
| "grad_norm": 1.2982653379440308, |
| "loss": 1.1858, |
| "loss_ce": 1.0897129774093628, |
| "loss_region": 0.05735067278146744, |
| "loss_total": 1.1470636129379272, |
| "lr": 0.001103124857418517, |
| "router/selected_tokens_s0": 467.0625, |
| "router/selected_tokens_s1": 11.0, |
| "step": 3490, |
| "tokens_trained": 11.435000416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9929078014184397, |
| "grad_norm": 2.721036672592163, |
| "loss": 1.1799, |
| "loss_ce": 1.0403258800506592, |
| "loss_region": 0.060663700103759766, |
| "loss_total": 1.100989580154419, |
| "lr": 0.001102717965434666, |
| "router/selected_tokens_s0": 470.125, |
| "router/selected_tokens_s1": 15.25, |
| "step": 3500, |
| "tokens_trained": 11.467765856 |
| }, |
| { |
| "epoch": 0.9929078014184397, |
| "eval_ppl": 3.0734159224478197, |
| "eval_runtime": 2.0438, |
| "step": 3500, |
| "tokens_trained": 11.467765856 |
| }, |
| { |
| "epoch": 0.9929078014184397, |
| "eval_F": 0.0010450035158996796, |
| "eval_F_cds": 0.0007456652591860569, |
| "eval_F_dig": 0.0008203765919212438, |
| "eval_F_exon": 0.0008154278957883149, |
| "eval_F_intron": 0.0010699422514866485, |
| "eval_F_nig": 0.0011481930661093125, |
| "eval_F_promoter": 0.0008896047838628851, |
| "eval_F_utr": 0.001065859455772162, |
| "eval_G": 0.014131649526876887, |
| "eval_G_cds": 0.012825789882984457, |
| "eval_G_dig": 0.026729230555121494, |
| "eval_G_exon": 0.013125177313097132, |
| "eval_G_intron": 0.014041850972301874, |
| "eval_G_nig": 0.015474153168448888, |
| "eval_G_promoter": 0.012678847285718421, |
| "eval_G_utr": 0.012453943213672845, |
| "eval_avg_bp_per_token": 956.9345794392524, |
| "eval_bp_per_token/cds": 1341.0843373493976, |
| "eval_bp_per_token/dig": 1218.952380952381, |
| "eval_bp_per_token/exon": 1226.35, |
| "eval_bp_per_token/intron": 934.6298817628091, |
| "eval_bp_per_token/nig": 870.9336691855583, |
| "eval_bp_per_token/promoter": 1124.094674556213, |
| "eval_bp_per_token/utr": 938.21, |
| "eval_ppl_cds": 3.75262282223572, |
| "eval_ppl_dig": 1.1259622508284348, |
| "eval_ppl_exon": 3.3195279800467707, |
| "eval_ppl_intron": 3.0972300222467624, |
| "eval_ppl_nig": 2.9334187719602456, |
| "eval_ppl_promoter": 3.3346561893814193, |
| "eval_ppl_utr": 3.428849009891916, |
| "step": 3500, |
| "tokens_trained": 11.467765856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9957446808510638, |
| "grad_norm": 2.2520830631256104, |
| "loss": 1.1819, |
| "loss_ce": 1.1254873275756836, |
| "loss_region": 0.05975877493619919, |
| "loss_total": 1.1852461099624634, |
| "lr": 0.001102311073450815, |
| "router/selected_tokens_s0": 381.6875, |
| "router/selected_tokens_s1": 11.8125, |
| "step": 3510, |
| "tokens_trained": 11.500531296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9985815602836879, |
| "grad_norm": 1.1383335590362549, |
| "loss": 1.1876, |
| "loss_ce": 1.1634926795959473, |
| "loss_region": 0.05750522017478943, |
| "loss_total": 1.220997929573059, |
| "lr": 0.001101904181466964, |
| "router/selected_tokens_s0": 403.125, |
| "router/selected_tokens_s1": 9.9375, |
| "step": 3520, |
| "tokens_trained": 11.533296736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.001418439716312, |
| "grad_norm": 1.6436549425125122, |
| "loss": 1.1797, |
| "loss_ce": 1.1281497478485107, |
| "loss_region": 0.06324146687984467, |
| "loss_total": 1.1913912296295166, |
| "lr": 0.0011014972894831132, |
| "router/selected_tokens_s0": 401.75, |
| "router/selected_tokens_s1": 16.0625, |
| "step": 3530, |
| "tokens_trained": 11.566062176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.004255319148936, |
| "grad_norm": 2.5775251388549805, |
| "loss": 1.179, |
| "loss_ce": 1.1454085111618042, |
| "loss_region": 0.056651920080184937, |
| "loss_total": 1.2020604610443115, |
| "lr": 0.0011010903974992621, |
| "router/selected_tokens_s0": 416.875, |
| "router/selected_tokens_s1": 9.5, |
| "step": 3540, |
| "tokens_trained": 11.598827616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0070921985815602, |
| "grad_norm": 4.208657741546631, |
| "loss": 1.2065, |
| "loss_ce": 1.154625415802002, |
| "loss_region": 0.06376195698976517, |
| "loss_total": 1.2183873653411865, |
| "lr": 0.001100683505515411, |
| "router/selected_tokens_s0": 392.1875, |
| "router/selected_tokens_s1": 16.1875, |
| "step": 3550, |
| "tokens_trained": 11.631593056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0099290780141843, |
| "grad_norm": 0.942011833190918, |
| "loss": 1.1958, |
| "loss_ce": 1.183512568473816, |
| "loss_region": 0.05952717736363411, |
| "loss_total": 1.2430397272109985, |
| "lr": 0.00110027661353156, |
| "router/selected_tokens_s0": 374.625, |
| "router/selected_tokens_s1": 11.1875, |
| "step": 3560, |
| "tokens_trained": 11.664358496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0127659574468084, |
| "grad_norm": 1.8631629943847656, |
| "loss": 1.1879, |
| "loss_ce": 1.0455646514892578, |
| "loss_region": 0.0647355318069458, |
| "loss_total": 1.1103001832962036, |
| "lr": 0.001099869721547709, |
| "router/selected_tokens_s0": 387.1875, |
| "router/selected_tokens_s1": 16.875, |
| "step": 3570, |
| "tokens_trained": 11.697123936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0156028368794325, |
| "grad_norm": 1.5745539665222168, |
| "loss": 1.1884, |
| "loss_ce": 1.1641039848327637, |
| "loss_region": 0.05902648717164993, |
| "loss_total": 1.223130464553833, |
| "lr": 0.001099462829563858, |
| "router/selected_tokens_s0": 415.3125, |
| "router/selected_tokens_s1": 12.3125, |
| "step": 3580, |
| "tokens_trained": 11.729887776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0184397163120567, |
| "grad_norm": 1.9251041412353516, |
| "loss": 1.1883, |
| "loss_ce": 1.0914249420166016, |
| "loss_region": 0.06119132786989212, |
| "loss_total": 1.152616262435913, |
| "lr": 0.001099055937580007, |
| "router/selected_tokens_s0": 443.0, |
| "router/selected_tokens_s1": 15.0, |
| "step": 3590, |
| "tokens_trained": 11.762653216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0212765957446808, |
| "grad_norm": 2.675755262374878, |
| "loss": 1.1864, |
| "loss_ce": 1.139623761177063, |
| "loss_region": 0.062219422310590744, |
| "loss_total": 1.2018431425094604, |
| "lr": 0.001098649045596156, |
| "router/selected_tokens_s0": 408.25, |
| "router/selected_tokens_s1": 15.5, |
| "step": 3600, |
| "tokens_trained": 11.795417056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0241134751773049, |
| "grad_norm": 0.8826751708984375, |
| "loss": 1.1881, |
| "loss_ce": 1.1148252487182617, |
| "loss_region": 0.057967670261859894, |
| "loss_total": 1.172792911529541, |
| "lr": 0.0010982421536123049, |
| "router/selected_tokens_s0": 486.75, |
| "router/selected_tokens_s1": 12.25, |
| "step": 3610, |
| "tokens_trained": 11.828182496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.026950354609929, |
| "grad_norm": 2.168684244155884, |
| "loss": 1.1835, |
| "loss_ce": 1.1765309572219849, |
| "loss_region": 0.06523355096578598, |
| "loss_total": 1.2417645454406738, |
| "lr": 0.0010978352616284538, |
| "router/selected_tokens_s0": 513.4375, |
| "router/selected_tokens_s1": 22.125, |
| "step": 3620, |
| "tokens_trained": 11.860946336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0297872340425531, |
| "grad_norm": 2.1988205909729004, |
| "loss": 1.1775, |
| "loss_ce": 1.1069942712783813, |
| "loss_region": 0.05631319805979729, |
| "loss_total": 1.1633074283599854, |
| "lr": 0.0010974283696446028, |
| "router/selected_tokens_s0": 459.0, |
| "router/selected_tokens_s1": 8.875, |
| "step": 3630, |
| "tokens_trained": 11.893711776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0326241134751772, |
| "grad_norm": 2.237173557281494, |
| "loss": 1.1737, |
| "loss_ce": 1.0201938152313232, |
| "loss_region": 0.0641506239771843, |
| "loss_total": 1.0843443870544434, |
| "lr": 0.0010970214776607518, |
| "router/selected_tokens_s0": 433.3125, |
| "router/selected_tokens_s1": 17.875, |
| "step": 3640, |
| "tokens_trained": 11.926477216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0354609929078014, |
| "grad_norm": 1.5898455381393433, |
| "loss": 1.1757, |
| "loss_ce": 1.146393895149231, |
| "loss_region": 0.05984107777476311, |
| "loss_total": 1.2062349319458008, |
| "lr": 0.0010966145856769007, |
| "router/selected_tokens_s0": 498.0, |
| "router/selected_tokens_s1": 14.5, |
| "step": 3650, |
| "tokens_trained": 11.959242656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0382978723404255, |
| "grad_norm": 1.6782652139663696, |
| "loss": 1.1872, |
| "loss_ce": 1.065536618232727, |
| "loss_region": 0.06172078102827072, |
| "loss_total": 1.1272573471069336, |
| "lr": 0.0010962076936930497, |
| "router/selected_tokens_s0": 456.875, |
| "router/selected_tokens_s1": 16.1875, |
| "step": 3660, |
| "tokens_trained": 11.992008096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0411347517730496, |
| "grad_norm": 2.0294923782348633, |
| "loss": 1.189, |
| "loss_ce": 1.1464074850082397, |
| "loss_region": 0.0617014616727829, |
| "loss_total": 1.208108901977539, |
| "lr": 0.0010958008017091987, |
| "router/selected_tokens_s0": 320.9375, |
| "router/selected_tokens_s1": 11.9375, |
| "step": 3670, |
| "tokens_trained": 12.024773536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0439716312056737, |
| "grad_norm": 1.3117985725402832, |
| "loss": 1.1906, |
| "loss_ce": 1.0983753204345703, |
| "loss_region": 0.05521257221698761, |
| "loss_total": 1.1535879373550415, |
| "lr": 0.0010953939097253476, |
| "router/selected_tokens_s0": 352.6875, |
| "router/selected_tokens_s1": 7.375, |
| "step": 3680, |
| "tokens_trained": 12.057538976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0468085106382978, |
| "grad_norm": 2.1483266353607178, |
| "loss": 1.1763, |
| "loss_ce": 1.1546844244003296, |
| "loss_region": 0.056960638612508774, |
| "loss_total": 1.2116450071334839, |
| "lr": 0.0010949870177414966, |
| "router/selected_tokens_s0": 412.0, |
| "router/selected_tokens_s1": 9.75, |
| "step": 3690, |
| "tokens_trained": 12.090304416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.049645390070922, |
| "grad_norm": 0.35853058099746704, |
| "loss": 1.1845, |
| "loss_ce": 1.103888988494873, |
| "loss_region": 0.060773998498916626, |
| "loss_total": 1.1646629571914673, |
| "lr": 0.0010945801257576456, |
| "router/selected_tokens_s0": 467.375, |
| "router/selected_tokens_s1": 15.25, |
| "step": 3700, |
| "tokens_trained": 12.123069856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.052482269503546, |
| "grad_norm": 1.2375736236572266, |
| "loss": 1.1833, |
| "loss_ce": 1.1234687566757202, |
| "loss_region": 0.06205715984106064, |
| "loss_total": 1.185525894165039, |
| "lr": 0.0010941732337737947, |
| "router/selected_tokens_s0": 533.0625, |
| "router/selected_tokens_s1": 18.875, |
| "step": 3710, |
| "tokens_trained": 12.155835296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0553191489361702, |
| "grad_norm": 2.2981228828430176, |
| "loss": 1.1837, |
| "loss_ce": 1.1076805591583252, |
| "loss_region": 0.06259025633335114, |
| "loss_total": 1.1702708005905151, |
| "lr": 0.0010937663417899437, |
| "router/selected_tokens_s0": 405.0, |
| "router/selected_tokens_s1": 15.5625, |
| "step": 3720, |
| "tokens_trained": 12.188600576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0581560283687943, |
| "grad_norm": 3.0473289489746094, |
| "loss": 1.1866, |
| "loss_ce": 1.1796292066574097, |
| "loss_region": 0.059974055737257004, |
| "loss_total": 1.2396032810211182, |
| "lr": 0.0010933594498060927, |
| "router/selected_tokens_s0": 430.0625, |
| "router/selected_tokens_s1": 13.6875, |
| "step": 3730, |
| "tokens_trained": 12.221366016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0609929078014184, |
| "grad_norm": 1.4829760789871216, |
| "loss": 1.1815, |
| "loss_ce": 1.1546300649642944, |
| "loss_region": 0.0575762614607811, |
| "loss_total": 1.2122063636779785, |
| "lr": 0.0010929525578222414, |
| "router/selected_tokens_s0": 386.875, |
| "router/selected_tokens_s1": 9.75, |
| "step": 3740, |
| "tokens_trained": 12.254131456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0638297872340425, |
| "grad_norm": 1.9733688831329346, |
| "loss": 1.1836, |
| "loss_ce": 1.102645754814148, |
| "loss_region": 0.059846386313438416, |
| "loss_total": 1.1624921560287476, |
| "lr": 0.0010925456658383904, |
| "router/selected_tokens_s0": 407.25, |
| "router/selected_tokens_s1": 12.5, |
| "step": 3750, |
| "tokens_trained": 12.286896896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0666666666666667, |
| "grad_norm": 1.8906550407409668, |
| "loss": 1.1801, |
| "loss_ce": 1.1507176160812378, |
| "loss_region": 0.05901917442679405, |
| "loss_total": 1.2097368240356445, |
| "lr": 0.0010921387738545394, |
| "router/selected_tokens_s0": 405.875, |
| "router/selected_tokens_s1": 11.8125, |
| "step": 3760, |
| "tokens_trained": 12.319662336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0695035460992908, |
| "grad_norm": 1.3158189058303833, |
| "loss": 1.1746, |
| "loss_ce": 1.0708504915237427, |
| "loss_region": 0.057457201182842255, |
| "loss_total": 1.1283077001571655, |
| "lr": 0.0010917318818706883, |
| "router/selected_tokens_s0": 435.625, |
| "router/selected_tokens_s1": 10.5625, |
| "step": 3770, |
| "tokens_trained": 12.352427776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0723404255319149, |
| "grad_norm": 2.073458194732666, |
| "loss": 1.1798, |
| "loss_ce": 1.141626238822937, |
| "loss_region": 0.06303444504737854, |
| "loss_total": 1.2046606540679932, |
| "lr": 0.0010913249898868375, |
| "router/selected_tokens_s0": 410.75, |
| "router/selected_tokens_s1": 16.1875, |
| "step": 3780, |
| "tokens_trained": 12.385193216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.075177304964539, |
| "grad_norm": 0.6519181132316589, |
| "loss": 1.1842, |
| "loss_ce": 1.1240960359573364, |
| "loss_region": 0.05678265169262886, |
| "loss_total": 1.1808786392211914, |
| "lr": 0.0010909180979029865, |
| "router/selected_tokens_s0": 423.125, |
| "router/selected_tokens_s1": 9.125, |
| "step": 3790, |
| "tokens_trained": 12.417958656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0780141843971631, |
| "grad_norm": 1.093948245048523, |
| "loss": 1.1766, |
| "loss_ce": 1.0316849946975708, |
| "loss_region": 0.06699247658252716, |
| "loss_total": 1.0986775159835815, |
| "lr": 0.0010905112059191354, |
| "router/selected_tokens_s0": 436.625, |
| "router/selected_tokens_s1": 21.375, |
| "step": 3800, |
| "tokens_trained": 12.450724096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0808510638297872, |
| "grad_norm": 2.533555746078491, |
| "loss": 1.1842, |
| "loss_ce": 1.0861109495162964, |
| "loss_region": 0.06075740605592728, |
| "loss_total": 1.146868348121643, |
| "lr": 0.0010901043139352844, |
| "router/selected_tokens_s0": 415.3125, |
| "router/selected_tokens_s1": 14.0, |
| "step": 3810, |
| "tokens_trained": 12.483489536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0836879432624114, |
| "grad_norm": 0.5290806293487549, |
| "loss": 1.1715, |
| "loss_ce": 1.1869332790374756, |
| "loss_region": 0.05955567955970764, |
| "loss_total": 1.2464889287948608, |
| "lr": 0.0010896974219514334, |
| "router/selected_tokens_s0": 391.4375, |
| "router/selected_tokens_s1": 11.875, |
| "step": 3820, |
| "tokens_trained": 12.516254976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0865248226950355, |
| "grad_norm": 1.6873115301132202, |
| "loss": 1.1825, |
| "loss_ce": 1.1999320983886719, |
| "loss_region": 0.06173175573348999, |
| "loss_total": 1.2616639137268066, |
| "lr": 0.0010892905299675823, |
| "router/selected_tokens_s0": 436.875, |
| "router/selected_tokens_s1": 15.5625, |
| "step": 3830, |
| "tokens_trained": 12.549020416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0893617021276596, |
| "grad_norm": 2.160799264907837, |
| "loss": 1.187, |
| "loss_ce": 1.1249465942382812, |
| "loss_region": 0.06040271744132042, |
| "loss_total": 1.1853493452072144, |
| "lr": 0.0010888836379837313, |
| "router/selected_tokens_s0": 313.25, |
| "router/selected_tokens_s1": 10.625, |
| "step": 3840, |
| "tokens_trained": 12.581785856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0921985815602837, |
| "grad_norm": 2.2160236835479736, |
| "loss": 1.1818, |
| "loss_ce": 1.1184971332550049, |
| "loss_region": 0.06124234199523926, |
| "loss_total": 1.1797394752502441, |
| "lr": 0.0010884767459998803, |
| "router/selected_tokens_s0": 381.25, |
| "router/selected_tokens_s1": 13.5, |
| "step": 3850, |
| "tokens_trained": 12.614549696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0950354609929078, |
| "grad_norm": 1.383900761604309, |
| "loss": 1.1753, |
| "loss_ce": 1.0543675422668457, |
| "loss_region": 0.058981068432331085, |
| "loss_total": 1.1133486032485962, |
| "lr": 0.0010880698540160292, |
| "router/selected_tokens_s0": 414.375, |
| "router/selected_tokens_s1": 11.875, |
| "step": 3860, |
| "tokens_trained": 12.647315136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.097872340425532, |
| "grad_norm": 1.4804929494857788, |
| "loss": 1.1723, |
| "loss_ce": 1.0903220176696777, |
| "loss_region": 0.06363452225923538, |
| "loss_total": 1.1539565324783325, |
| "lr": 0.0010876629620321782, |
| "router/selected_tokens_s0": 428.8125, |
| "router/selected_tokens_s1": 17.25, |
| "step": 3870, |
| "tokens_trained": 12.680079808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.100709219858156, |
| "grad_norm": 2.7304487228393555, |
| "loss": 1.1821, |
| "loss_ce": 1.077646017074585, |
| "loss_region": 0.057004958391189575, |
| "loss_total": 1.1346509456634521, |
| "lr": 0.0010872560700483272, |
| "router/selected_tokens_s0": 478.5, |
| "router/selected_tokens_s1": 10.75, |
| "step": 3880, |
| "tokens_trained": 12.712844448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1035460992907802, |
| "grad_norm": 1.97611665725708, |
| "loss": 1.203, |
| "loss_ce": 1.1368480920791626, |
| "loss_region": 0.05987485125660896, |
| "loss_total": 1.1967229843139648, |
| "lr": 0.0010868491780644761, |
| "router/selected_tokens_s0": 415.875, |
| "router/selected_tokens_s1": 13.1875, |
| "step": 3890, |
| "tokens_trained": 12.745609888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1063829787234043, |
| "grad_norm": 2.3967514038085938, |
| "loss": 1.1847, |
| "loss_ce": 1.129238486289978, |
| "loss_region": 0.06246556341648102, |
| "loss_total": 1.1917040348052979, |
| "lr": 0.001086442286080625, |
| "router/selected_tokens_s0": 472.0, |
| "router/selected_tokens_s1": 17.5625, |
| "step": 3900, |
| "tokens_trained": 12.778375328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1092198581560284, |
| "grad_norm": 1.5276507139205933, |
| "loss": 1.1882, |
| "loss_ce": 1.107843279838562, |
| "loss_region": 0.05788169056177139, |
| "loss_total": 1.1657249927520752, |
| "lr": 0.001086035394096774, |
| "router/selected_tokens_s0": 390.875, |
| "router/selected_tokens_s1": 10.25, |
| "step": 3910, |
| "tokens_trained": 12.811139968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1120567375886525, |
| "grad_norm": 1.167576789855957, |
| "loss": 1.1841, |
| "loss_ce": 1.1291859149932861, |
| "loss_region": 0.0615556500852108, |
| "loss_total": 1.1907415390014648, |
| "lr": 0.001085628502112923, |
| "router/selected_tokens_s0": 397.6875, |
| "router/selected_tokens_s1": 14.1875, |
| "step": 3920, |
| "tokens_trained": 12.843905408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1148936170212767, |
| "grad_norm": 1.0138903856277466, |
| "loss": 1.1774, |
| "loss_ce": 1.0464414358139038, |
| "loss_region": 0.060723938047885895, |
| "loss_total": 1.1071653366088867, |
| "lr": 0.001085221610129072, |
| "router/selected_tokens_s0": 419.4375, |
| "router/selected_tokens_s1": 14.0625, |
| "step": 3930, |
| "tokens_trained": 12.876670848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1177304964539008, |
| "grad_norm": 2.0475807189941406, |
| "loss": 1.1815, |
| "loss_ce": 1.0976098775863647, |
| "loss_region": 0.05905111879110336, |
| "loss_total": 1.156661033630371, |
| "lr": 0.001084814718145221, |
| "router/selected_tokens_s0": 430.875, |
| "router/selected_tokens_s1": 12.4375, |
| "step": 3940, |
| "tokens_trained": 12.909436288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1205673758865249, |
| "grad_norm": 1.7863166332244873, |
| "loss": 1.1712, |
| "loss_ce": 1.1482179164886475, |
| "loss_region": 0.06282122433185577, |
| "loss_total": 1.2110391855239868, |
| "lr": 0.00108440782616137, |
| "router/selected_tokens_s0": 410.0625, |
| "router/selected_tokens_s1": 15.9375, |
| "step": 3950, |
| "tokens_trained": 12.942201728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.123404255319149, |
| "grad_norm": 1.1568286418914795, |
| "loss": 1.17, |
| "loss_ce": 1.0623869895935059, |
| "loss_region": 0.059787455946207047, |
| "loss_total": 1.1221745014190674, |
| "lr": 0.001084000934177519, |
| "router/selected_tokens_s0": 492.125, |
| "router/selected_tokens_s1": 14.3125, |
| "step": 3960, |
| "tokens_trained": 12.974967168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1262411347517731, |
| "grad_norm": 1.1083351373672485, |
| "loss": 1.1777, |
| "loss_ce": 1.0901384353637695, |
| "loss_region": 0.06682166457176208, |
| "loss_total": 1.156960129737854, |
| "lr": 0.001083594042193668, |
| "router/selected_tokens_s0": 427.9375, |
| "router/selected_tokens_s1": 21.625, |
| "step": 3970, |
| "tokens_trained": 13.007732608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1290780141843972, |
| "grad_norm": 2.0190722942352295, |
| "loss": 1.1741, |
| "loss_ce": 1.1439288854599, |
| "loss_region": 0.057421792298555374, |
| "loss_total": 1.2013506889343262, |
| "lr": 0.001083187150209817, |
| "router/selected_tokens_s0": 421.875, |
| "router/selected_tokens_s1": 10.1875, |
| "step": 3980, |
| "tokens_trained": 13.040497248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1319148936170214, |
| "grad_norm": 1.738838791847229, |
| "loss": 1.1762, |
| "loss_ce": 1.0594556331634521, |
| "loss_region": 0.06316748261451721, |
| "loss_total": 1.122623085975647, |
| "lr": 0.0010827802582259658, |
| "router/selected_tokens_s0": 399.0, |
| "router/selected_tokens_s1": 16.0, |
| "step": 3990, |
| "tokens_trained": 13.073261888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1347517730496455, |
| "grad_norm": 1.5448288917541504, |
| "loss": 1.1853, |
| "loss_ce": 1.097516655921936, |
| "loss_region": 0.06060180068016052, |
| "loss_total": 1.158118486404419, |
| "lr": 0.0010823733662421147, |
| "router/selected_tokens_s0": 466.25, |
| "router/selected_tokens_s1": 14.875, |
| "step": 4000, |
| "tokens_trained": 13.106027328 |
| }, |
| { |
| "epoch": 1.1347517730496455, |
| "eval_ppl": 3.0780662311338154, |
| "eval_runtime": 2.0365, |
| "step": 4000, |
| "tokens_trained": 13.106027328 |
| }, |
| { |
| "epoch": 1.1347517730496455, |
| "eval_F": 0.001075576556592397, |
| "eval_F_cds": 0.0008444883658251729, |
| "eval_F_dig": 0.000664114383936245, |
| "eval_F_exon": 0.0010192848697353936, |
| "eval_F_intron": 0.00112246181023035, |
| "eval_F_nig": 0.001104810456558583, |
| "eval_F_promoter": 0.0009211883856568336, |
| "eval_F_utr": 0.0011298110231184916, |
| "eval_G": 0.013302638605348116, |
| "eval_G_cds": 0.011082541157577935, |
| "eval_G_dig": 0.021607370448863194, |
| "eval_G_exon": 0.01194230253054457, |
| "eval_G_intron": 0.013434109888838712, |
| "eval_G_nig": 0.013912727565888544, |
| "eval_G_promoter": 0.012317198595385109, |
| "eval_G_utr": 0.012318524937114292, |
| "eval_avg_bp_per_token": 929.733912356889, |
| "eval_bp_per_token/cds": 1184.1489361702127, |
| "eval_bp_per_token/dig": 1505.764705882353, |
| "eval_bp_per_token/exon": 981.08, |
| "eval_bp_per_token/intron": 890.8989071038251, |
| "eval_bp_per_token/nig": 905.1326352530541, |
| "eval_bp_per_token/promoter": 1085.5542857142857, |
| "eval_bp_per_token/utr": 885.1037735849056, |
| "eval_ppl_cds": 3.7825145100068065, |
| "eval_ppl_dig": 1.1088177473934662, |
| "eval_ppl_exon": 3.3321747505821633, |
| "eval_ppl_intron": 3.100516205807046, |
| "eval_ppl_nig": 2.9327303159960234, |
| "eval_ppl_promoter": 3.351815098328506, |
| "eval_ppl_utr": 3.4471430144116186, |
| "step": 4000, |
| "tokens_trained": 13.106027328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1375886524822696, |
| "grad_norm": 2.2905516624450684, |
| "loss": 1.1719, |
| "loss_ce": 1.1303597688674927, |
| "loss_region": 0.05621382221579552, |
| "loss_total": 1.1865736246109009, |
| "lr": 0.0010819664742582637, |
| "router/selected_tokens_s0": 454.5625, |
| "router/selected_tokens_s1": 9.625, |
| "step": 4010, |
| "tokens_trained": 13.138791968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1404255319148937, |
| "grad_norm": 1.3957611322402954, |
| "loss": 1.1742, |
| "loss_ce": 1.1473259925842285, |
| "loss_region": 0.062191255390644073, |
| "loss_total": 1.209517240524292, |
| "lr": 0.0010815595822744127, |
| "router/selected_tokens_s0": 383.125, |
| "router/selected_tokens_s1": 14.5, |
| "step": 4020, |
| "tokens_trained": 13.171557408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1432624113475178, |
| "grad_norm": 1.3832191228866577, |
| "loss": 1.1765, |
| "loss_ce": 1.0680574178695679, |
| "loss_region": 0.055804431438446045, |
| "loss_total": 1.1238617897033691, |
| "lr": 0.0010811526902905618, |
| "router/selected_tokens_s0": 423.1875, |
| "router/selected_tokens_s1": 8.625, |
| "step": 4030, |
| "tokens_trained": 13.204322848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1460992907801417, |
| "grad_norm": 0.8278810977935791, |
| "loss": 1.1763, |
| "loss_ce": 1.0880036354064941, |
| "loss_region": 0.06598131358623505, |
| "loss_total": 1.1539849042892456, |
| "lr": 0.0010807457983067108, |
| "router/selected_tokens_s0": 444.8125, |
| "router/selected_tokens_s1": 20.625, |
| "step": 4040, |
| "tokens_trained": 13.237088288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.148936170212766, |
| "grad_norm": 1.973814606666565, |
| "loss": 1.1785, |
| "loss_ce": 1.1111335754394531, |
| "loss_region": 0.056068602949380875, |
| "loss_total": 1.1672022342681885, |
| "lr": 0.0010803389063228598, |
| "router/selected_tokens_s0": 414.625, |
| "router/selected_tokens_s1": 8.5, |
| "step": 4050, |
| "tokens_trained": 13.269853704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.15177304964539, |
| "grad_norm": 1.9284107685089111, |
| "loss": 1.1835, |
| "loss_ce": 1.1678242683410645, |
| "loss_region": 0.06446538865566254, |
| "loss_total": 1.2322896718978882, |
| "lr": 0.0010799320143390087, |
| "router/selected_tokens_s0": 467.0625, |
| "router/selected_tokens_s1": 19.75, |
| "step": 4060, |
| "tokens_trained": 13.302619144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1546099290780143, |
| "grad_norm": 1.5813398361206055, |
| "loss": 1.177, |
| "loss_ce": 1.117352843284607, |
| "loss_region": 0.059066567569971085, |
| "loss_total": 1.1764193773269653, |
| "lr": 0.0010795251223551577, |
| "router/selected_tokens_s0": 399.375, |
| "router/selected_tokens_s1": 11.625, |
| "step": 4070, |
| "tokens_trained": 13.335384584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1574468085106382, |
| "grad_norm": 2.2116472721099854, |
| "loss": 1.1738, |
| "loss_ce": 1.0900934934616089, |
| "loss_region": 0.058406438678503036, |
| "loss_total": 1.1484999656677246, |
| "lr": 0.0010791182303713067, |
| "router/selected_tokens_s0": 458.0, |
| "router/selected_tokens_s1": 12.1875, |
| "step": 4080, |
| "tokens_trained": 13.368150024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1602836879432625, |
| "grad_norm": 0.8941004872322083, |
| "loss": 1.1785, |
| "loss_ce": 1.1180888414382935, |
| "loss_region": 0.061131663620471954, |
| "loss_total": 1.1792205572128296, |
| "lr": 0.0010787113383874556, |
| "router/selected_tokens_s0": 414.125, |
| "router/selected_tokens_s1": 14.4375, |
| "step": 4090, |
| "tokens_trained": 13.400914384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1631205673758864, |
| "grad_norm": 1.9441637992858887, |
| "loss": 1.1763, |
| "loss_ce": 1.132266879081726, |
| "loss_region": 0.05915014073252678, |
| "loss_total": 1.1914169788360596, |
| "lr": 0.0010783044464036046, |
| "router/selected_tokens_s0": 474.25, |
| "router/selected_tokens_s1": 13.75, |
| "step": 4100, |
| "tokens_trained": 13.433677248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1659574468085105, |
| "grad_norm": 0.49526798725128174, |
| "loss": 1.1742, |
| "loss_ce": 1.1343379020690918, |
| "loss_region": 0.059441614896059036, |
| "loss_total": 1.193779468536377, |
| "lr": 0.0010778975544197536, |
| "router/selected_tokens_s0": 354.125, |
| "router/selected_tokens_s1": 11.0625, |
| "step": 4110, |
| "tokens_trained": 13.466442688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1687943262411347, |
| "grad_norm": 1.8044946193695068, |
| "loss": 1.1762, |
| "loss_ce": 1.1329114437103271, |
| "loss_region": 0.05853970721364021, |
| "loss_total": 1.1914511919021606, |
| "lr": 0.0010774906624359025, |
| "router/selected_tokens_s0": 421.9375, |
| "router/selected_tokens_s1": 11.8125, |
| "step": 4120, |
| "tokens_trained": 13.499207928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1716312056737588, |
| "grad_norm": 1.7056463956832886, |
| "loss": 1.1762, |
| "loss_ce": 1.137860655784607, |
| "loss_region": 0.06000075116753578, |
| "loss_total": 1.1978614330291748, |
| "lr": 0.0010770837704520515, |
| "router/selected_tokens_s0": 380.0, |
| "router/selected_tokens_s1": 12.125, |
| "step": 4130, |
| "tokens_trained": 13.531973368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.174468085106383, |
| "grad_norm": 2.1075713634490967, |
| "loss": 1.1836, |
| "loss_ce": 1.1564133167266846, |
| "loss_region": 0.05947628244757652, |
| "loss_total": 1.215889573097229, |
| "lr": 0.0010766768784682005, |
| "router/selected_tokens_s0": 438.875, |
| "router/selected_tokens_s1": 13.3125, |
| "step": 4140, |
| "tokens_trained": 13.564734752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.177304964539007, |
| "grad_norm": 3.1112468242645264, |
| "loss": 1.1813, |
| "loss_ce": 1.1807456016540527, |
| "loss_region": 0.05970983952283859, |
| "loss_total": 1.2404553890228271, |
| "lr": 0.0010762699864843494, |
| "router/selected_tokens_s0": 345.625, |
| "router/selected_tokens_s1": 10.9375, |
| "step": 4150, |
| "tokens_trained": 13.597500192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1801418439716311, |
| "grad_norm": 2.5510308742523193, |
| "loss": 1.1811, |
| "loss_ce": 1.061288595199585, |
| "loss_region": 0.06100037693977356, |
| "loss_total": 1.1222889423370361, |
| "lr": 0.0010758630945004984, |
| "router/selected_tokens_s0": 363.625, |
| "router/selected_tokens_s1": 12.125, |
| "step": 4160, |
| "tokens_trained": 13.630265632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1829787234042553, |
| "grad_norm": 3.261460065841675, |
| "loss": 1.1814, |
| "loss_ce": 1.1258513927459717, |
| "loss_region": 0.06240670382976532, |
| "loss_total": 1.1882580518722534, |
| "lr": 0.0010754562025166474, |
| "router/selected_tokens_s0": 375.375, |
| "router/selected_tokens_s1": 14.1875, |
| "step": 4170, |
| "tokens_trained": 13.663031072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1858156028368794, |
| "grad_norm": 1.1999715566635132, |
| "loss": 1.177, |
| "loss_ce": 1.1389847993850708, |
| "loss_region": 0.05969350412487984, |
| "loss_total": 1.1986782550811768, |
| "lr": 0.0010750493105327963, |
| "router/selected_tokens_s0": 353.3125, |
| "router/selected_tokens_s1": 11.1875, |
| "step": 4180, |
| "tokens_trained": 13.695793424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1886524822695035, |
| "grad_norm": 2.061321973800659, |
| "loss": 1.1722, |
| "loss_ce": 1.1006853580474854, |
| "loss_region": 0.06308386474847794, |
| "loss_total": 1.163769245147705, |
| "lr": 0.0010746424185489453, |
| "router/selected_tokens_s0": 404.5, |
| "router/selected_tokens_s1": 16.3125, |
| "step": 4190, |
| "tokens_trained": 13.728557088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1914893617021276, |
| "grad_norm": 1.17081618309021, |
| "loss": 1.1768, |
| "loss_ce": 1.092057466506958, |
| "loss_region": 0.058908574283123016, |
| "loss_total": 1.1509660482406616, |
| "lr": 0.0010742355265650943, |
| "router/selected_tokens_s0": 392.625, |
| "router/selected_tokens_s1": 11.3125, |
| "step": 4200, |
| "tokens_trained": 13.761322528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1943262411347517, |
| "grad_norm": 0.3326140344142914, |
| "loss": 1.1747, |
| "loss_ce": 1.0654566287994385, |
| "loss_region": 0.05828992277383804, |
| "loss_total": 1.1237465143203735, |
| "lr": 0.0010738286345812434, |
| "router/selected_tokens_s0": 444.125, |
| "router/selected_tokens_s1": 11.75, |
| "step": 4210, |
| "tokens_trained": 13.794087968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1971631205673758, |
| "grad_norm": 1.0290011167526245, |
| "loss": 1.1824, |
| "loss_ce": 1.140906572341919, |
| "loss_region": 0.0639735609292984, |
| "loss_total": 1.2048801183700562, |
| "lr": 0.0010734217425973924, |
| "router/selected_tokens_s0": 438.6875, |
| "router/selected_tokens_s1": 18.1875, |
| "step": 4220, |
| "tokens_trained": 13.826853408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2, |
| "grad_norm": 0.9843527674674988, |
| "loss": 1.1796, |
| "loss_ce": 1.0733304023742676, |
| "loss_region": 0.058738574385643005, |
| "loss_total": 1.1320689916610718, |
| "lr": 0.0010730148506135414, |
| "router/selected_tokens_s0": 441.9375, |
| "router/selected_tokens_s1": 12.1875, |
| "step": 4230, |
| "tokens_trained": 13.859618048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.202836879432624, |
| "grad_norm": 1.3148319721221924, |
| "loss": 1.1696, |
| "loss_ce": 1.1151381731033325, |
| "loss_region": 0.062236238270998, |
| "loss_total": 1.1773743629455566, |
| "lr": 0.0010726079586296901, |
| "router/selected_tokens_s0": 469.8125, |
| "router/selected_tokens_s1": 17.3125, |
| "step": 4240, |
| "tokens_trained": 13.892383488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2056737588652482, |
| "grad_norm": 1.3859351873397827, |
| "loss": 1.1717, |
| "loss_ce": 1.1349636316299438, |
| "loss_region": 0.05773340165615082, |
| "loss_total": 1.1926970481872559, |
| "lr": 0.001072201066645839, |
| "router/selected_tokens_s0": 418.625, |
| "router/selected_tokens_s1": 10.6875, |
| "step": 4250, |
| "tokens_trained": 13.925146528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2085106382978723, |
| "grad_norm": 2.72090744972229, |
| "loss": 1.1765, |
| "loss_ce": 1.072594165802002, |
| "loss_region": 0.06244191154837608, |
| "loss_total": 1.1350361108779907, |
| "lr": 0.001071794174661988, |
| "router/selected_tokens_s0": 477.4375, |
| "router/selected_tokens_s1": 17.25, |
| "step": 4260, |
| "tokens_trained": 13.957911968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2113475177304964, |
| "grad_norm": 1.4798249006271362, |
| "loss": 1.1734, |
| "loss_ce": 1.1144499778747559, |
| "loss_region": 0.056226495653390884, |
| "loss_total": 1.1706764698028564, |
| "lr": 0.0010713872826781372, |
| "router/selected_tokens_s0": 426.6875, |
| "router/selected_tokens_s1": 9.1875, |
| "step": 4270, |
| "tokens_trained": 13.990673272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2141843971631205, |
| "grad_norm": 1.357850432395935, |
| "loss": 1.1693, |
| "loss_ce": 1.077507495880127, |
| "loss_region": 0.06390528380870819, |
| "loss_total": 1.1414127349853516, |
| "lr": 0.0010709803906942862, |
| "router/selected_tokens_s0": 412.6875, |
| "router/selected_tokens_s1": 17.3125, |
| "step": 4280, |
| "tokens_trained": 14.023438712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2170212765957447, |
| "grad_norm": 1.3726602792739868, |
| "loss": 1.176, |
| "loss_ce": 1.0596121549606323, |
| "loss_region": 0.05795986205339432, |
| "loss_total": 1.1175720691680908, |
| "lr": 0.0010705734987104352, |
| "router/selected_tokens_s0": 445.5625, |
| "router/selected_tokens_s1": 11.25, |
| "step": 4290, |
| "tokens_trained": 14.056204152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2198581560283688, |
| "grad_norm": 1.4125066995620728, |
| "loss": 1.1771, |
| "loss_ce": 1.0902185440063477, |
| "loss_region": 0.06225195899605751, |
| "loss_total": 1.1524704694747925, |
| "lr": 0.0010701666067265841, |
| "router/selected_tokens_s0": 385.0, |
| "router/selected_tokens_s1": 14.9375, |
| "step": 4300, |
| "tokens_trained": 14.088969592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.222695035460993, |
| "grad_norm": 1.2627815008163452, |
| "loss": 1.1721, |
| "loss_ce": 1.0882596969604492, |
| "loss_region": 0.05787515267729759, |
| "loss_total": 1.146134853363037, |
| "lr": 0.001069759714742733, |
| "router/selected_tokens_s0": 488.75, |
| "router/selected_tokens_s1": 11.875, |
| "step": 4310, |
| "tokens_trained": 14.121735032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.225531914893617, |
| "grad_norm": 1.7309290170669556, |
| "loss": 1.1713, |
| "loss_ce": 1.1801739931106567, |
| "loss_region": 0.062443725764751434, |
| "loss_total": 1.2426177263259888, |
| "lr": 0.001069352822758882, |
| "router/selected_tokens_s0": 420.9375, |
| "router/selected_tokens_s1": 16.125, |
| "step": 4320, |
| "tokens_trained": 14.154499672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2283687943262411, |
| "grad_norm": 1.483018398284912, |
| "loss": 1.1751, |
| "loss_ce": 1.1250663995742798, |
| "loss_region": 0.05841255187988281, |
| "loss_total": 1.1834789514541626, |
| "lr": 0.001068945930775031, |
| "router/selected_tokens_s0": 421.3125, |
| "router/selected_tokens_s1": 11.3125, |
| "step": 4330, |
| "tokens_trained": 14.187265112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2312056737588652, |
| "grad_norm": 1.1965560913085938, |
| "loss": 1.1727, |
| "loss_ce": 1.117280125617981, |
| "loss_region": 0.0637601837515831, |
| "loss_total": 1.1810402870178223, |
| "lr": 0.00106853903879118, |
| "router/selected_tokens_s0": 447.1875, |
| "router/selected_tokens_s1": 18.4375, |
| "step": 4340, |
| "tokens_trained": 14.220030552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2340425531914894, |
| "grad_norm": 1.5123695135116577, |
| "loss": 1.1695, |
| "loss_ce": 1.0661840438842773, |
| "loss_region": 0.060901377350091934, |
| "loss_total": 1.1270854473114014, |
| "lr": 0.001068132146807329, |
| "router/selected_tokens_s0": 424.75, |
| "router/selected_tokens_s1": 13.6875, |
| "step": 4350, |
| "tokens_trained": 14.252795992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2368794326241135, |
| "grad_norm": 1.7330703735351562, |
| "loss": 1.1654, |
| "loss_ce": 1.0469716787338257, |
| "loss_region": 0.055631160736083984, |
| "loss_total": 1.1026028394699097, |
| "lr": 0.001067725254823478, |
| "router/selected_tokens_s0": 447.125, |
| "router/selected_tokens_s1": 8.3125, |
| "step": 4360, |
| "tokens_trained": 14.285561432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2397163120567376, |
| "grad_norm": 2.1848738193511963, |
| "loss": 1.1806, |
| "loss_ce": 1.1433590650558472, |
| "loss_region": 0.06835979223251343, |
| "loss_total": 1.2117187976837158, |
| "lr": 0.0010673183628396269, |
| "router/selected_tokens_s0": 320.9375, |
| "router/selected_tokens_s1": 17.3125, |
| "step": 4370, |
| "tokens_trained": 14.318326792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2425531914893617, |
| "grad_norm": 1.8598670959472656, |
| "loss": 1.1732, |
| "loss_ce": 1.0439656972885132, |
| "loss_region": 0.05709763243794441, |
| "loss_total": 1.1010633707046509, |
| "lr": 0.0010669114708557758, |
| "router/selected_tokens_s0": 429.8125, |
| "router/selected_tokens_s1": 9.5625, |
| "step": 4380, |
| "tokens_trained": 14.351092216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2453900709219858, |
| "grad_norm": 0.9603729248046875, |
| "loss": 1.1674, |
| "loss_ce": 1.1185280084609985, |
| "loss_region": 0.06183166801929474, |
| "loss_total": 1.1803597211837769, |
| "lr": 0.0010665045788719248, |
| "router/selected_tokens_s0": 396.75, |
| "router/selected_tokens_s1": 14.6875, |
| "step": 4390, |
| "tokens_trained": 14.383857656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.24822695035461, |
| "grad_norm": 1.593279242515564, |
| "loss": 1.1763, |
| "loss_ce": 1.1112045049667358, |
| "loss_region": 0.06215018033981323, |
| "loss_total": 1.1733546257019043, |
| "lr": 0.0010660976868880738, |
| "router/selected_tokens_s0": 461.6875, |
| "router/selected_tokens_s1": 17.125, |
| "step": 4400, |
| "tokens_trained": 14.416623096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.251063829787234, |
| "grad_norm": 1.1648064851760864, |
| "loss": 1.1733, |
| "loss_ce": 0.9768592715263367, |
| "loss_region": 0.05571449548006058, |
| "loss_total": 1.0325738191604614, |
| "lr": 0.0010656907949042227, |
| "router/selected_tokens_s0": 511.875, |
| "router/selected_tokens_s1": 9.375, |
| "step": 4410, |
| "tokens_trained": 14.449387936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2539007092198582, |
| "grad_norm": 1.1375138759613037, |
| "loss": 1.177, |
| "loss_ce": 1.064677357673645, |
| "loss_region": 0.06464707851409912, |
| "loss_total": 1.1293244361877441, |
| "lr": 0.0010652839029203717, |
| "router/selected_tokens_s0": 384.4375, |
| "router/selected_tokens_s1": 17.125, |
| "step": 4420, |
| "tokens_trained": 14.48214904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2567375886524823, |
| "grad_norm": 1.069541096687317, |
| "loss": 1.1701, |
| "loss_ce": 1.1224985122680664, |
| "loss_region": 0.05752616375684738, |
| "loss_total": 1.1800246238708496, |
| "lr": 0.0010648770109365207, |
| "router/selected_tokens_s0": 443.1875, |
| "router/selected_tokens_s1": 11.25, |
| "step": 4430, |
| "tokens_trained": 14.51491448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2595744680851064, |
| "grad_norm": 1.6241931915283203, |
| "loss": 1.1755, |
| "loss_ce": 1.1491807699203491, |
| "loss_region": 0.0633014589548111, |
| "loss_total": 1.212482213973999, |
| "lr": 0.0010644701189526696, |
| "router/selected_tokens_s0": 413.1875, |
| "router/selected_tokens_s1": 16.5, |
| "step": 4440, |
| "tokens_trained": 14.54767992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2624113475177305, |
| "grad_norm": 1.3303278684616089, |
| "loss": 1.1711, |
| "loss_ce": 1.0399068593978882, |
| "loss_region": 0.05873655900359154, |
| "loss_total": 1.09864342212677, |
| "lr": 0.0010640632269688186, |
| "router/selected_tokens_s0": 471.9375, |
| "router/selected_tokens_s1": 12.75, |
| "step": 4450, |
| "tokens_trained": 14.58044536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2652482269503547, |
| "grad_norm": 1.3606629371643066, |
| "loss": 1.1715, |
| "loss_ce": 1.1313279867172241, |
| "loss_region": 0.061985936015844345, |
| "loss_total": 1.1933139562606812, |
| "lr": 0.0010636563349849678, |
| "router/selected_tokens_s0": 465.3125, |
| "router/selected_tokens_s1": 17.0, |
| "step": 4460, |
| "tokens_trained": 14.613210784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2680851063829788, |
| "grad_norm": 1.165449619293213, |
| "loss": 1.1774, |
| "loss_ce": 1.077015995979309, |
| "loss_region": 0.05757826194167137, |
| "loss_total": 1.134594202041626, |
| "lr": 0.0010632494430011167, |
| "router/selected_tokens_s0": 444.125, |
| "router/selected_tokens_s1": 10.8125, |
| "step": 4470, |
| "tokens_trained": 14.645976224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.270921985815603, |
| "grad_norm": 1.4742231369018555, |
| "loss": 1.1779, |
| "loss_ce": 1.1043974161148071, |
| "loss_region": 0.06537966430187225, |
| "loss_total": 1.1697770357131958, |
| "lr": 0.0010628425510172657, |
| "router/selected_tokens_s0": 465.375, |
| "router/selected_tokens_s1": 20.875, |
| "step": 4480, |
| "tokens_trained": 14.678741664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.273758865248227, |
| "grad_norm": 1.670129656791687, |
| "loss": 1.1805, |
| "loss_ce": 1.1499029397964478, |
| "loss_region": 0.057785190641880035, |
| "loss_total": 1.2076880931854248, |
| "lr": 0.0010624356590334145, |
| "router/selected_tokens_s0": 392.4375, |
| "router/selected_tokens_s1": 10.1875, |
| "step": 4490, |
| "tokens_trained": 14.711504648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2765957446808511, |
| "grad_norm": 0.8384753465652466, |
| "loss": 1.1612, |
| "loss_ce": 1.1587680578231812, |
| "loss_region": 0.058904800564050674, |
| "loss_total": 1.2176728248596191, |
| "lr": 0.0010620287670495634, |
| "router/selected_tokens_s0": 401.875, |
| "router/selected_tokens_s1": 11.625, |
| "step": 4500, |
| "tokens_trained": 14.744270088 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "eval_ppl": 3.0410384974007107, |
| "eval_runtime": 2.0139, |
| "step": 4500, |
| "tokens_trained": 14.744270088 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "eval_F": 0.0011583785418018391, |
| "eval_F_cds": 0.0011679094420986435, |
| "eval_F_dig": 0.0010157043519024925, |
| "eval_F_exon": 0.001141599054103641, |
| "eval_F_intron": 0.001173064596757128, |
| "eval_F_nig": 0.0012291739372706748, |
| "eval_F_promoter": 0.0010383109089760595, |
| "eval_F_utr": 0.0009805906993103888, |
| "eval_G": 0.014433089722771523, |
| "eval_G_cds": 0.012305126167909443, |
| "eval_G_dig": 0.021026957062075163, |
| "eval_G_exon": 0.01361236362817847, |
| "eval_G_intron": 0.01457564882380148, |
| "eval_G_nig": 0.015167288991204899, |
| "eval_G_promoter": 0.013283806400581402, |
| "eval_G_utr": 0.013029840400869741, |
| "eval_avg_bp_per_token": 863.2756598240469, |
| "eval_bp_per_token/cds": 856.2307692307693, |
| "eval_bp_per_token/dig": 984.5384615384615, |
| "eval_bp_per_token/exon": 875.9642857142857, |
| "eval_bp_per_token/intron": 852.4679738562091, |
| "eval_bp_per_token/nig": 813.5545098039215, |
| "eval_bp_per_token/promoter": 963.1026615969582, |
| "eval_bp_per_token/utr": 1019.7934782608696, |
| "eval_ppl_cds": 3.7443685934757203, |
| "eval_ppl_dig": 1.100676930462353, |
| "eval_ppl_exon": 3.289243520065538, |
| "eval_ppl_intron": 3.063192321562982, |
| "eval_ppl_nig": 2.893788578025708, |
| "eval_ppl_promoter": 3.3166590776504705, |
| "eval_ppl_utr": 3.4129745459503487, |
| "step": 4500, |
| "tokens_trained": 14.744270088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2794326241134752, |
| "grad_norm": 2.825514554977417, |
| "loss": 1.1776, |
| "loss_ce": 1.1078251600265503, |
| "loss_region": 0.06376127153635025, |
| "loss_total": 1.1715863943099976, |
| "lr": 0.0010616218750657124, |
| "router/selected_tokens_s0": 426.1875, |
| "router/selected_tokens_s1": 17.6875, |
| "step": 4510, |
| "tokens_trained": 14.777033928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2822695035460994, |
| "grad_norm": 3.562530279159546, |
| "loss": 1.1776, |
| "loss_ce": 1.1453254222869873, |
| "loss_region": 0.05877625197172165, |
| "loss_total": 1.2041016817092896, |
| "lr": 0.0010612149830818616, |
| "router/selected_tokens_s0": 604.375, |
| "router/selected_tokens_s1": 14.875, |
| "step": 4520, |
| "tokens_trained": 14.809799368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2851063829787235, |
| "grad_norm": 2.0739846229553223, |
| "loss": 1.1816, |
| "loss_ce": 1.167551040649414, |
| "loss_region": 0.05897127464413643, |
| "loss_total": 1.2265223264694214, |
| "lr": 0.0010608080910980105, |
| "router/selected_tokens_s0": 447.875, |
| "router/selected_tokens_s1": 12.9375, |
| "step": 4530, |
| "tokens_trained": 14.842564808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2879432624113476, |
| "grad_norm": 0.5496819019317627, |
| "loss": 1.1826, |
| "loss_ce": 1.048570156097412, |
| "loss_region": 0.05806339904665947, |
| "loss_total": 1.1066335439682007, |
| "lr": 0.0010604011991141595, |
| "router/selected_tokens_s0": 413.375, |
| "router/selected_tokens_s1": 11.3125, |
| "step": 4540, |
| "tokens_trained": 14.875330248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2907801418439715, |
| "grad_norm": 1.5685514211654663, |
| "loss": 1.1784, |
| "loss_ce": 1.1430702209472656, |
| "loss_region": 0.06285207718610764, |
| "loss_total": 1.205922245979309, |
| "lr": 0.0010599943071303085, |
| "router/selected_tokens_s0": 393.6875, |
| "router/selected_tokens_s1": 15.0, |
| "step": 4550, |
| "tokens_trained": 14.908095688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2936170212765958, |
| "grad_norm": 0.891741156578064, |
| "loss": 1.1716, |
| "loss_ce": 1.1201016902923584, |
| "loss_region": 0.05721219629049301, |
| "loss_total": 1.1773139238357544, |
| "lr": 0.0010595874151464574, |
| "router/selected_tokens_s0": 409.0625, |
| "router/selected_tokens_s1": 9.5625, |
| "step": 4560, |
| "tokens_trained": 14.940861128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2964539007092197, |
| "grad_norm": 1.012353777885437, |
| "loss": 1.1584, |
| "loss_ce": 1.009028673171997, |
| "loss_region": 0.06006960570812225, |
| "loss_total": 1.0690982341766357, |
| "lr": 0.0010591805231626064, |
| "router/selected_tokens_s0": 539.6875, |
| "router/selected_tokens_s1": 16.875, |
| "step": 4570, |
| "tokens_trained": 14.973625768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.299290780141844, |
| "grad_norm": 1.1502134799957275, |
| "loss": 1.1743, |
| "loss_ce": 1.073366641998291, |
| "loss_region": 0.06413622200489044, |
| "loss_total": 1.137502908706665, |
| "lr": 0.0010587736311787554, |
| "router/selected_tokens_s0": 393.625, |
| "router/selected_tokens_s1": 16.5625, |
| "step": 4580, |
| "tokens_trained": 15.006387136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.302127659574468, |
| "grad_norm": 1.2014880180358887, |
| "loss": 1.1678, |
| "loss_ce": 1.125981330871582, |
| "loss_region": 0.05629488825798035, |
| "loss_total": 1.1822762489318848, |
| "lr": 0.0010583667391949043, |
| "router/selected_tokens_s0": 466.1875, |
| "router/selected_tokens_s1": 9.3125, |
| "step": 4590, |
| "tokens_trained": 15.039152576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3049645390070923, |
| "grad_norm": 4.010960578918457, |
| "loss": 1.1722, |
| "loss_ce": 1.1003656387329102, |
| "loss_region": 0.05642012134194374, |
| "loss_total": 1.1567857265472412, |
| "lr": 0.0010579598472110533, |
| "router/selected_tokens_s0": 410.9375, |
| "router/selected_tokens_s1": 9.5625, |
| "step": 4600, |
| "tokens_trained": 15.071918016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3078014184397162, |
| "grad_norm": 2.229997158050537, |
| "loss": 1.1824, |
| "loss_ce": 1.129849910736084, |
| "loss_region": 0.06458568572998047, |
| "loss_total": 1.1944355964660645, |
| "lr": 0.0010575529552272023, |
| "router/selected_tokens_s0": 429.875, |
| "router/selected_tokens_s1": 18.375, |
| "step": 4610, |
| "tokens_trained": 15.104683456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3106382978723405, |
| "grad_norm": 2.0674681663513184, |
| "loss": 1.1844, |
| "loss_ce": 1.132249116897583, |
| "loss_region": 0.05796496197581291, |
| "loss_total": 1.1902140378952026, |
| "lr": 0.0010571460632433512, |
| "router/selected_tokens_s0": 403.8125, |
| "router/selected_tokens_s1": 10.25, |
| "step": 4620, |
| "tokens_trained": 15.137448096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3134751773049644, |
| "grad_norm": 1.2590811252593994, |
| "loss": 1.169, |
| "loss_ce": 1.0923088788986206, |
| "loss_region": 0.05617872625589371, |
| "loss_total": 1.1484875679016113, |
| "lr": 0.0010567391712595002, |
| "router/selected_tokens_s0": 477.125, |
| "router/selected_tokens_s1": 9.9375, |
| "step": 4630, |
| "tokens_trained": 15.170210824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3163120567375888, |
| "grad_norm": 1.314387559890747, |
| "loss": 1.1735, |
| "loss_ce": 1.1269707679748535, |
| "loss_region": 0.06321462988853455, |
| "loss_total": 1.1901854276657104, |
| "lr": 0.0010563322792756492, |
| "router/selected_tokens_s0": 412.375, |
| "router/selected_tokens_s1": 16.125, |
| "step": 4640, |
| "tokens_trained": 15.202976264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3191489361702127, |
| "grad_norm": 0.959001898765564, |
| "loss": 1.1731, |
| "loss_ce": 1.067009687423706, |
| "loss_region": 0.0584431029856205, |
| "loss_total": 1.1254527568817139, |
| "lr": 0.0010559253872917981, |
| "router/selected_tokens_s0": 434.125, |
| "router/selected_tokens_s1": 12.0625, |
| "step": 4650, |
| "tokens_trained": 15.235741704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.321985815602837, |
| "grad_norm": 0.6913428902626038, |
| "loss": 1.1645, |
| "loss_ce": 1.1260396242141724, |
| "loss_region": 0.05686100572347641, |
| "loss_total": 1.1829006671905518, |
| "lr": 0.001055518495307947, |
| "router/selected_tokens_s0": 452.375, |
| "router/selected_tokens_s1": 10.5625, |
| "step": 4660, |
| "tokens_trained": 15.268507144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.324822695035461, |
| "grad_norm": 1.705417275428772, |
| "loss": 1.1765, |
| "loss_ce": 1.1382602453231812, |
| "loss_region": 0.06457319855690002, |
| "loss_total": 1.2028334140777588, |
| "lr": 0.001055111603324096, |
| "router/selected_tokens_s0": 441.875, |
| "router/selected_tokens_s1": 19.3125, |
| "step": 4670, |
| "tokens_trained": 15.30127256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.327659574468085, |
| "grad_norm": 1.1720391511917114, |
| "loss": 1.17, |
| "loss_ce": 1.123241901397705, |
| "loss_region": 0.059922970831394196, |
| "loss_total": 1.1831648349761963, |
| "lr": 0.001054704711340245, |
| "router/selected_tokens_s0": 393.6875, |
| "router/selected_tokens_s1": 12.4375, |
| "step": 4680, |
| "tokens_trained": 15.334037984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3304964539007091, |
| "grad_norm": 1.7748150825500488, |
| "loss": 1.1743, |
| "loss_ce": 1.072164535522461, |
| "loss_region": 0.056693922728300095, |
| "loss_total": 1.1288584470748901, |
| "lr": 0.001054297819356394, |
| "router/selected_tokens_s0": 450.8125, |
| "router/selected_tokens_s1": 9.75, |
| "step": 4690, |
| "tokens_trained": 15.366803424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3333333333333333, |
| "grad_norm": 1.1712427139282227, |
| "loss": 1.1671, |
| "loss_ce": 1.1227179765701294, |
| "loss_region": 0.06404247134923935, |
| "loss_total": 1.186760425567627, |
| "lr": 0.0010538909273725432, |
| "router/selected_tokens_s0": 434.25, |
| "router/selected_tokens_s1": 17.875, |
| "step": 4700, |
| "tokens_trained": 15.399568864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3361702127659574, |
| "grad_norm": 1.1698198318481445, |
| "loss": 1.1689, |
| "loss_ce": 1.1782885789871216, |
| "loss_region": 0.059104278683662415, |
| "loss_total": 1.2373929023742676, |
| "lr": 0.0010534840353886921, |
| "router/selected_tokens_s0": 410.875, |
| "router/selected_tokens_s1": 12.3125, |
| "step": 4710, |
| "tokens_trained": 15.432334288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3390070921985815, |
| "grad_norm": 1.4182919263839722, |
| "loss": 1.1701, |
| "loss_ce": 0.9779119491577148, |
| "loss_region": 0.06040428206324577, |
| "loss_total": 1.038316249847412, |
| "lr": 0.001053077143404841, |
| "router/selected_tokens_s0": 482.375, |
| "router/selected_tokens_s1": 15.25, |
| "step": 4720, |
| "tokens_trained": 15.465099728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3418439716312056, |
| "grad_norm": 1.4712938070297241, |
| "loss": 1.1652, |
| "loss_ce": 1.099825143814087, |
| "loss_region": 0.06161494553089142, |
| "loss_total": 1.161440134048462, |
| "lr": 0.0010526702514209898, |
| "router/selected_tokens_s0": 408.625, |
| "router/selected_tokens_s1": 14.6875, |
| "step": 4730, |
| "tokens_trained": 15.497864368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3446808510638297, |
| "grad_norm": 1.376592755317688, |
| "loss": 1.1666, |
| "loss_ce": 1.1483793258666992, |
| "loss_region": 0.05899408087134361, |
| "loss_total": 1.2073733806610107, |
| "lr": 0.0010522633594371388, |
| "router/selected_tokens_s0": 450.25, |
| "router/selected_tokens_s1": 13.0, |
| "step": 4740, |
| "tokens_trained": 15.530629808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3475177304964538, |
| "grad_norm": 0.9113925695419312, |
| "loss": 1.1698, |
| "loss_ce": 1.167178988456726, |
| "loss_region": 0.06208512559533119, |
| "loss_total": 1.2292641401290894, |
| "lr": 0.0010518564674532878, |
| "router/selected_tokens_s0": 386.75, |
| "router/selected_tokens_s1": 14.5, |
| "step": 4750, |
| "tokens_trained": 15.563395248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.350354609929078, |
| "grad_norm": 1.6873319149017334, |
| "loss": 1.175, |
| "loss_ce": 1.144089937210083, |
| "loss_region": 0.05694330856204033, |
| "loss_total": 1.2010332345962524, |
| "lr": 0.0010514495754694367, |
| "router/selected_tokens_s0": 456.3125, |
| "router/selected_tokens_s1": 10.8125, |
| "step": 4760, |
| "tokens_trained": 15.596158264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.353191489361702, |
| "grad_norm": 0.7729793190956116, |
| "loss": 1.1667, |
| "loss_ce": 1.1456177234649658, |
| "loss_region": 0.06607680022716522, |
| "loss_total": 1.2116944789886475, |
| "lr": 0.001051042683485586, |
| "router/selected_tokens_s0": 388.75, |
| "router/selected_tokens_s1": 18.6875, |
| "step": 4770, |
| "tokens_trained": 15.628923704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3560283687943262, |
| "grad_norm": 2.3347153663635254, |
| "loss": 1.1724, |
| "loss_ce": 1.1867260932922363, |
| "loss_region": 0.05604035034775734, |
| "loss_total": 1.2427664995193481, |
| "lr": 0.0010506357915017349, |
| "router/selected_tokens_s0": 406.3125, |
| "router/selected_tokens_s1": 8.125, |
| "step": 4780, |
| "tokens_trained": 15.661689144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3588652482269503, |
| "grad_norm": 0.8608954548835754, |
| "loss": 1.1681, |
| "loss_ce": 1.1013946533203125, |
| "loss_region": 0.059269458055496216, |
| "loss_total": 1.1606640815734863, |
| "lr": 0.0010502288995178838, |
| "router/selected_tokens_s0": 420.75, |
| "router/selected_tokens_s1": 12.5, |
| "step": 4790, |
| "tokens_trained": 15.694454584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3617021276595744, |
| "grad_norm": 1.589377760887146, |
| "loss": 1.1774, |
| "loss_ce": 1.1131365299224854, |
| "loss_region": 0.06720637530088425, |
| "loss_total": 1.1803429126739502, |
| "lr": 0.0010498220075340328, |
| "router/selected_tokens_s0": 428.625, |
| "router/selected_tokens_s1": 21.5, |
| "step": 4800, |
| "tokens_trained": 15.727220024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3645390070921986, |
| "grad_norm": 1.1269258260726929, |
| "loss": 1.1722, |
| "loss_ce": 1.1789302825927734, |
| "loss_region": 0.0603567399084568, |
| "loss_total": 1.23928701877594, |
| "lr": 0.0010494151155501818, |
| "router/selected_tokens_s0": 408.0, |
| "router/selected_tokens_s1": 13.0625, |
| "step": 4810, |
| "tokens_trained": 15.759985464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3673758865248227, |
| "grad_norm": 0.5982851982116699, |
| "loss": 1.1615, |
| "loss_ce": 1.1606470346450806, |
| "loss_region": 0.057830557227134705, |
| "loss_total": 1.2184776067733765, |
| "lr": 0.0010490082235663307, |
| "router/selected_tokens_s0": 335.8125, |
| "router/selected_tokens_s1": 8.375, |
| "step": 4820, |
| "tokens_trained": 15.792750904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3702127659574468, |
| "grad_norm": 0.7754488587379456, |
| "loss": 1.1694, |
| "loss_ce": 1.076349139213562, |
| "loss_region": 0.05904084071516991, |
| "loss_total": 1.1353899240493774, |
| "lr": 0.0010486013315824797, |
| "router/selected_tokens_s0": 440.625, |
| "router/selected_tokens_s1": 13.1875, |
| "step": 4830, |
| "tokens_trained": 15.825515544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.373049645390071, |
| "grad_norm": 0.8074977993965149, |
| "loss": 1.1673, |
| "loss_ce": 1.1052080392837524, |
| "loss_region": 0.05970120057463646, |
| "loss_total": 1.1649092435836792, |
| "lr": 0.0010481944395986287, |
| "router/selected_tokens_s0": 442.0, |
| "router/selected_tokens_s1": 13.6875, |
| "step": 4840, |
| "tokens_trained": 15.858280984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.375886524822695, |
| "grad_norm": 1.8432601690292358, |
| "loss": 1.163, |
| "loss_ce": 1.1196966171264648, |
| "loss_region": 0.05893434211611748, |
| "loss_total": 1.1786309480667114, |
| "lr": 0.0010477875476147776, |
| "router/selected_tokens_s0": 467.8125, |
| "router/selected_tokens_s1": 13.25, |
| "step": 4850, |
| "tokens_trained": 15.891045624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3787234042553191, |
| "grad_norm": 0.80079185962677, |
| "loss": 1.1646, |
| "loss_ce": 1.0457513332366943, |
| "loss_region": 0.06003762036561966, |
| "loss_total": 1.1057889461517334, |
| "lr": 0.0010473806556309266, |
| "router/selected_tokens_s0": 355.25, |
| "router/selected_tokens_s1": 11.25, |
| "step": 4860, |
| "tokens_trained": 15.923810264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3815602836879433, |
| "grad_norm": 1.3119624853134155, |
| "loss": 1.1626, |
| "loss_ce": 1.0813933610916138, |
| "loss_region": 0.061243828386068344, |
| "loss_total": 1.1426371335983276, |
| "lr": 0.0010469737636470756, |
| "router/selected_tokens_s0": 470.3125, |
| "router/selected_tokens_s1": 16.0, |
| "step": 4870, |
| "tokens_trained": 15.956575704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3843971631205674, |
| "grad_norm": 1.396918773651123, |
| "loss": 1.1716, |
| "loss_ce": 1.0860676765441895, |
| "loss_region": 0.05864456295967102, |
| "loss_total": 1.144712209701538, |
| "lr": 0.0010465668716632245, |
| "router/selected_tokens_s0": 451.875, |
| "router/selected_tokens_s1": 12.375, |
| "step": 4880, |
| "tokens_trained": 15.989339352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3872340425531915, |
| "grad_norm": 1.2318918704986572, |
| "loss": 1.1647, |
| "loss_ce": 1.11076819896698, |
| "loss_region": 0.0610792338848114, |
| "loss_total": 1.1718474626541138, |
| "lr": 0.0010461599796793735, |
| "router/selected_tokens_s0": 357.1875, |
| "router/selected_tokens_s1": 12.25, |
| "step": 4890, |
| "tokens_trained": 16.022103992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3900709219858156, |
| "grad_norm": 1.2649383544921875, |
| "loss": 1.1718, |
| "loss_ce": 1.0666271448135376, |
| "loss_region": 0.058861564844846725, |
| "loss_total": 1.1254887580871582, |
| "lr": 0.0010457530876955225, |
| "router/selected_tokens_s0": 418.8125, |
| "router/selected_tokens_s1": 12.0625, |
| "step": 4900, |
| "tokens_trained": 16.054869432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3929078014184397, |
| "grad_norm": 1.6673067808151245, |
| "loss": 1.163, |
| "loss_ce": 1.1077296733856201, |
| "loss_region": 0.06258165091276169, |
| "loss_total": 1.1703113317489624, |
| "lr": 0.0010453461957116714, |
| "router/selected_tokens_s0": 439.5, |
| "router/selected_tokens_s1": 16.8125, |
| "step": 4910, |
| "tokens_trained": 16.087634872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3957446808510638, |
| "grad_norm": 1.2215776443481445, |
| "loss": 1.1713, |
| "loss_ce": 1.0790401697158813, |
| "loss_region": 0.05825230851769447, |
| "loss_total": 1.137292504310608, |
| "lr": 0.0010449393037278204, |
| "router/selected_tokens_s0": 470.5625, |
| "router/selected_tokens_s1": 12.375, |
| "step": 4920, |
| "tokens_trained": 16.120400312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.398581560283688, |
| "grad_norm": 0.9133660793304443, |
| "loss": 1.1672, |
| "loss_ce": 1.1354037523269653, |
| "loss_region": 0.062443241477012634, |
| "loss_total": 1.1978470087051392, |
| "lr": 0.0010445324117439694, |
| "router/selected_tokens_s0": 455.5625, |
| "router/selected_tokens_s1": 17.1875, |
| "step": 4930, |
| "tokens_trained": 16.153164952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.401418439716312, |
| "grad_norm": 0.7142453789710999, |
| "loss": 1.1637, |
| "loss_ce": 1.0729459524154663, |
| "loss_region": 0.05855097249150276, |
| "loss_total": 1.1314969062805176, |
| "lr": 0.0010441255197601183, |
| "router/selected_tokens_s0": 483.25, |
| "router/selected_tokens_s1": 13.0, |
| "step": 4940, |
| "tokens_trained": 16.185929592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4042553191489362, |
| "grad_norm": 1.346866488456726, |
| "loss": 1.1688, |
| "loss_ce": 1.1217612028121948, |
| "loss_region": 0.061914537101984024, |
| "loss_total": 1.183675765991211, |
| "lr": 0.0010437186277762675, |
| "router/selected_tokens_s0": 419.25, |
| "router/selected_tokens_s1": 15.4375, |
| "step": 4950, |
| "tokens_trained": 16.218695032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4070921985815603, |
| "grad_norm": 0.8455023765563965, |
| "loss": 1.1644, |
| "loss_ce": 0.9533401727676392, |
| "loss_region": 0.057561516761779785, |
| "loss_total": 1.010901689529419, |
| "lr": 0.0010433117357924165, |
| "router/selected_tokens_s0": 444.9375, |
| "router/selected_tokens_s1": 10.75, |
| "step": 4960, |
| "tokens_trained": 16.251457848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4099290780141844, |
| "grad_norm": 0.719833254814148, |
| "loss": 1.1677, |
| "loss_ce": 0.9225043654441833, |
| "loss_region": 0.061203230172395706, |
| "loss_total": 0.98370760679245, |
| "lr": 0.0010429048438085654, |
| "router/selected_tokens_s0": 442.375, |
| "router/selected_tokens_s1": 15.0, |
| "step": 4970, |
| "tokens_trained": 16.284222488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4127659574468086, |
| "grad_norm": 1.3556922674179077, |
| "loss": 1.1647, |
| "loss_ce": 1.0813281536102295, |
| "loss_region": 0.05944563448429108, |
| "loss_total": 1.1407737731933594, |
| "lr": 0.0010424979518247142, |
| "router/selected_tokens_s0": 478.5, |
| "router/selected_tokens_s1": 14.125, |
| "step": 4980, |
| "tokens_trained": 16.316987928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4156028368794327, |
| "grad_norm": 1.9594775438308716, |
| "loss": 1.1697, |
| "loss_ce": 1.1046741008758545, |
| "loss_region": 0.061166729778051376, |
| "loss_total": 1.1658408641815186, |
| "lr": 0.0010420910598408631, |
| "router/selected_tokens_s0": 412.1875, |
| "router/selected_tokens_s1": 14.25, |
| "step": 4990, |
| "tokens_trained": 16.349753368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4184397163120568, |
| "grad_norm": 1.9190337657928467, |
| "loss": 1.1659, |
| "loss_ce": 0.9583069682121277, |
| "loss_region": 0.05831551179289818, |
| "loss_total": 1.0166224241256714, |
| "lr": 0.0010416841678570121, |
| "router/selected_tokens_s0": 430.125, |
| "router/selected_tokens_s1": 12.3125, |
| "step": 5000, |
| "tokens_trained": 16.382518808 |
| }, |
| { |
| "epoch": 1.4184397163120568, |
| "eval_ppl": 3.0297582706547788, |
| "eval_runtime": 2.0152, |
| "step": 5000, |
| "tokens_trained": 16.382518808 |
| }, |
| { |
| "epoch": 1.4184397163120568, |
| "eval_F": 0.0008749409770464404, |
| "eval_F_cds": 0.0007276974216153086, |
| "eval_F_dig": 0.0004297210719587468, |
| "eval_F_exon": 0.0008561992905777307, |
| "eval_F_intron": 0.0009058665497180045, |
| "eval_F_nig": 0.0008965739307150804, |
| "eval_F_promoter": 0.0007803781609921462, |
| "eval_F_utr": 0.0008526875646177295, |
| "eval_G": 0.015531097207779055, |
| "eval_G_cds": 0.013802124275671549, |
| "eval_G_dig": 0.0182552103679975, |
| "eval_G_exon": 0.01449494753231133, |
| "eval_G_intron": 0.01584569984851749, |
| "eval_G_nig": 0.016464907279915203, |
| "eval_G_promoter": 0.013578435037466047, |
| "eval_G_utr": 0.014395785092090257, |
| "eval_avg_bp_per_token": 1142.934239262315, |
| "eval_bp_per_token/cds": 1374.1975308641975, |
| "eval_bp_per_token/dig": 2327.090909090909, |
| "eval_bp_per_token/exon": 1167.952380952381, |
| "eval_bp_per_token/intron": 1103.9153618281846, |
| "eval_bp_per_token/nig": 1115.3569892473117, |
| "eval_bp_per_token/promoter": 1281.4300168634063, |
| "eval_bp_per_token/utr": 1172.7625, |
| "eval_ppl_cds": 3.742546850083132, |
| "eval_ppl_dig": 1.1138565762627513, |
| "eval_ppl_exon": 3.2794754104779216, |
| "eval_ppl_intron": 3.05261384850238, |
| "eval_ppl_nig": 2.8751225098839135, |
| "eval_ppl_promoter": 3.309446838442905, |
| "eval_ppl_utr": 3.411906653203143, |
| "step": 5000, |
| "tokens_trained": 16.382518808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.421276595744681, |
| "grad_norm": 1.2634458541870117, |
| "loss": 1.1716, |
| "loss_ce": 1.0555835962295532, |
| "loss_region": 0.0644194483757019, |
| "loss_total": 1.1200029850006104, |
| "lr": 0.001041277275873161, |
| "router/selected_tokens_s0": 485.5625, |
| "router/selected_tokens_s1": 20.5, |
| "step": 5010, |
| "tokens_trained": 16.415284248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.424113475177305, |
| "grad_norm": 1.134238839149475, |
| "loss": 1.1675, |
| "loss_ce": 1.0413823127746582, |
| "loss_region": 0.05861315503716469, |
| "loss_total": 1.099995493888855, |
| "lr": 0.0010408703838893103, |
| "router/selected_tokens_s0": 453.25, |
| "router/selected_tokens_s1": 11.875, |
| "step": 5020, |
| "tokens_trained": 16.448049688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4269503546099291, |
| "grad_norm": 0.6608848571777344, |
| "loss": 1.16, |
| "loss_ce": 1.0865141153335571, |
| "loss_region": 0.06405461579561234, |
| "loss_total": 1.1505687236785889, |
| "lr": 0.0010404634919054592, |
| "router/selected_tokens_s0": 415.3125, |
| "router/selected_tokens_s1": 17.6875, |
| "step": 5030, |
| "tokens_trained": 16.480814968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4297872340425533, |
| "grad_norm": 1.7596572637557983, |
| "loss": 1.1605, |
| "loss_ce": 1.080395221710205, |
| "loss_region": 0.05989512428641319, |
| "loss_total": 1.140290379524231, |
| "lr": 0.0010400565999216082, |
| "router/selected_tokens_s0": 458.25, |
| "router/selected_tokens_s1": 14.125, |
| "step": 5040, |
| "tokens_trained": 16.513576176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4326241134751774, |
| "grad_norm": 1.449523687362671, |
| "loss": 1.1587, |
| "loss_ce": 1.1526217460632324, |
| "loss_region": 0.05976712331175804, |
| "loss_total": 1.2123888731002808, |
| "lr": 0.0010396497079377572, |
| "router/selected_tokens_s0": 470.0625, |
| "router/selected_tokens_s1": 14.4375, |
| "step": 5050, |
| "tokens_trained": 16.546341616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4354609929078015, |
| "grad_norm": 1.0609499216079712, |
| "loss": 1.17, |
| "loss_ce": 1.1456844806671143, |
| "loss_region": 0.060984060168266296, |
| "loss_total": 1.206668496131897, |
| "lr": 0.0010392428159539061, |
| "router/selected_tokens_s0": 421.9375, |
| "router/selected_tokens_s1": 14.5, |
| "step": 5060, |
| "tokens_trained": 16.579105456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4382978723404256, |
| "grad_norm": 1.4889984130859375, |
| "loss": 1.1639, |
| "loss_ce": 1.1069124937057495, |
| "loss_region": 0.0588313490152359, |
| "loss_total": 1.1657438278198242, |
| "lr": 0.001038835923970055, |
| "router/selected_tokens_s0": 412.625, |
| "router/selected_tokens_s1": 12.0, |
| "step": 5070, |
| "tokens_trained": 16.611870896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4411347517730497, |
| "grad_norm": 0.7745299935340881, |
| "loss": 1.1727, |
| "loss_ce": 1.1384263038635254, |
| "loss_region": 0.06151129677891731, |
| "loss_total": 1.1999375820159912, |
| "lr": 0.001038429031986204, |
| "router/selected_tokens_s0": 441.625, |
| "router/selected_tokens_s1": 15.9375, |
| "step": 5080, |
| "tokens_trained": 16.644635536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4439716312056738, |
| "grad_norm": 0.3887619376182556, |
| "loss": 1.1641, |
| "loss_ce": 1.0129387378692627, |
| "loss_region": 0.057277847081422806, |
| "loss_total": 1.0702165365219116, |
| "lr": 0.001038022140002353, |
| "router/selected_tokens_s0": 449.75, |
| "router/selected_tokens_s1": 10.625, |
| "step": 5090, |
| "tokens_trained": 16.67739828 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4468085106382977, |
| "grad_norm": 0.9931978583335876, |
| "loss": 1.1593, |
| "loss_ce": 1.1114816665649414, |
| "loss_region": 0.0629933550953865, |
| "loss_total": 1.174475073814392, |
| "lr": 0.001037615248018502, |
| "router/selected_tokens_s0": 430.25, |
| "router/selected_tokens_s1": 16.6875, |
| "step": 5100, |
| "tokens_trained": 16.71016372 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.449645390070922, |
| "grad_norm": 1.7544564008712769, |
| "loss": 1.1669, |
| "loss_ce": 1.1303014755249023, |
| "loss_region": 0.05773321911692619, |
| "loss_total": 1.1880346536636353, |
| "lr": 0.001037208356034651, |
| "router/selected_tokens_s0": 436.8125, |
| "router/selected_tokens_s1": 10.875, |
| "step": 5110, |
| "tokens_trained": 16.74292916 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.452482269503546, |
| "grad_norm": 0.46224844455718994, |
| "loss": 1.1623, |
| "loss_ce": 1.22366201877594, |
| "loss_region": 0.06073080375790596, |
| "loss_total": 1.2843928337097168, |
| "lr": 0.0010368014640508, |
| "router/selected_tokens_s0": 371.875, |
| "router/selected_tokens_s1": 12.625, |
| "step": 5120, |
| "tokens_trained": 16.775691312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4553191489361703, |
| "grad_norm": 0.4505470097064972, |
| "loss": 1.1699, |
| "loss_ce": 1.0685856342315674, |
| "loss_region": 0.06006813421845436, |
| "loss_total": 1.1286537647247314, |
| "lr": 0.0010363945720669489, |
| "router/selected_tokens_s0": 453.3125, |
| "router/selected_tokens_s1": 14.3125, |
| "step": 5130, |
| "tokens_trained": 16.808456752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4581560283687942, |
| "grad_norm": 0.443447470664978, |
| "loss": 1.1616, |
| "loss_ce": 1.0702182054519653, |
| "loss_region": 0.057352010160684586, |
| "loss_total": 1.1275702714920044, |
| "lr": 0.0010359876800830978, |
| "router/selected_tokens_s0": 478.625, |
| "router/selected_tokens_s1": 11.5625, |
| "step": 5140, |
| "tokens_trained": 16.841222192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4609929078014185, |
| "grad_norm": 1.0394787788391113, |
| "loss": 1.1641, |
| "loss_ce": 1.0516167879104614, |
| "loss_region": 0.06636694073677063, |
| "loss_total": 1.1179836988449097, |
| "lr": 0.0010355807880992468, |
| "router/selected_tokens_s0": 492.0625, |
| "router/selected_tokens_s1": 23.3125, |
| "step": 5150, |
| "tokens_trained": 16.873987632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4638297872340424, |
| "grad_norm": 0.7827674150466919, |
| "loss": 1.1617, |
| "loss_ce": 1.1885493993759155, |
| "loss_region": 0.057987358421087265, |
| "loss_total": 1.2465367317199707, |
| "lr": 0.0010351738961153958, |
| "router/selected_tokens_s0": 357.6875, |
| "router/selected_tokens_s1": 8.625, |
| "step": 5160, |
| "tokens_trained": 16.906753072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4666666666666668, |
| "grad_norm": 0.9744030237197876, |
| "loss": 1.1617, |
| "loss_ce": 1.0603646039962769, |
| "loss_region": 0.05650093033909798, |
| "loss_total": 1.1168655157089233, |
| "lr": 0.0010347670041315447, |
| "router/selected_tokens_s0": 504.5625, |
| "router/selected_tokens_s1": 10.3125, |
| "step": 5170, |
| "tokens_trained": 16.939515096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4695035460992907, |
| "grad_norm": 0.2740679383277893, |
| "loss": 1.1609, |
| "loss_ce": 1.0844225883483887, |
| "loss_region": 0.06521714478731155, |
| "loss_total": 1.1496397256851196, |
| "lr": 0.0010343601121476937, |
| "router/selected_tokens_s0": 428.0, |
| "router/selected_tokens_s1": 19.0625, |
| "step": 5180, |
| "tokens_trained": 16.972279352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.472340425531915, |
| "grad_norm": 1.3795894384384155, |
| "loss": 1.1686, |
| "loss_ce": 1.124297857284546, |
| "loss_region": 0.05923352763056755, |
| "loss_total": 1.183531403541565, |
| "lr": 0.0010339532201638427, |
| "router/selected_tokens_s0": 442.0, |
| "router/selected_tokens_s1": 13.0, |
| "step": 5190, |
| "tokens_trained": 17.005044792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.475177304964539, |
| "grad_norm": 0.8263188004493713, |
| "loss": 1.1675, |
| "loss_ce": 1.0006603002548218, |
| "loss_region": 0.06344455480575562, |
| "loss_total": 1.0641047954559326, |
| "lr": 0.0010335463281799918, |
| "router/selected_tokens_s0": 427.125, |
| "router/selected_tokens_s1": 17.3125, |
| "step": 5200, |
| "tokens_trained": 17.037810232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4780141843971633, |
| "grad_norm": 0.5774252414703369, |
| "loss": 1.1614, |
| "loss_ce": 1.1466084718704224, |
| "loss_region": 0.05995073914527893, |
| "loss_total": 1.206559181213379, |
| "lr": 0.0010331394361961408, |
| "router/selected_tokens_s0": 391.25, |
| "router/selected_tokens_s1": 12.3125, |
| "step": 5210, |
| "tokens_trained": 17.070575672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4808510638297872, |
| "grad_norm": 1.3741319179534912, |
| "loss": 1.1559, |
| "loss_ce": 1.1331387758255005, |
| "loss_region": 0.059787504374980927, |
| "loss_total": 1.192926287651062, |
| "lr": 0.0010327325442122898, |
| "router/selected_tokens_s0": 431.9375, |
| "router/selected_tokens_s1": 13.5625, |
| "step": 5220, |
| "tokens_trained": 17.103341112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4836879432624113, |
| "grad_norm": 0.9346311092376709, |
| "loss": 1.1677, |
| "loss_ce": 1.1160879135131836, |
| "loss_region": 0.05967224761843681, |
| "loss_total": 1.1757601499557495, |
| "lr": 0.0010323256522284385, |
| "router/selected_tokens_s0": 423.9375, |
| "router/selected_tokens_s1": 13.0625, |
| "step": 5230, |
| "tokens_trained": 17.136106552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4865248226950354, |
| "grad_norm": 0.9840303659439087, |
| "loss": 1.1563, |
| "loss_ce": 1.1366236209869385, |
| "loss_region": 0.057842716574668884, |
| "loss_total": 1.1944663524627686, |
| "lr": 0.0010319187602445875, |
| "router/selected_tokens_s0": 437.0625, |
| "router/selected_tokens_s1": 11.1875, |
| "step": 5240, |
| "tokens_trained": 17.168867192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4893617021276595, |
| "grad_norm": 0.6291745901107788, |
| "loss": 1.1678, |
| "loss_ce": 1.1215194463729858, |
| "loss_region": 0.061599425971508026, |
| "loss_total": 1.1831188201904297, |
| "lr": 0.0010315118682607365, |
| "router/selected_tokens_s0": 442.25, |
| "router/selected_tokens_s1": 15.9375, |
| "step": 5250, |
| "tokens_trained": 17.201632632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4921985815602836, |
| "grad_norm": 0.9545523524284363, |
| "loss": 1.1654, |
| "loss_ce": 1.0638751983642578, |
| "loss_region": 0.056087564677000046, |
| "loss_total": 1.1199628114700317, |
| "lr": 0.0010311049762768854, |
| "router/selected_tokens_s0": 481.5625, |
| "router/selected_tokens_s1": 9.5625, |
| "step": 5260, |
| "tokens_trained": 17.234398072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4950354609929077, |
| "grad_norm": 1.1328867673873901, |
| "loss": 1.1627, |
| "loss_ce": 1.1201285123825073, |
| "loss_region": 0.06611510366201401, |
| "loss_total": 1.1862436532974243, |
| "lr": 0.0010306980842930346, |
| "router/selected_tokens_s0": 423.25, |
| "router/selected_tokens_s1": 20.3125, |
| "step": 5270, |
| "tokens_trained": 17.26716272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4978723404255319, |
| "grad_norm": 1.4490420818328857, |
| "loss": 1.1635, |
| "loss_ce": 1.0298516750335693, |
| "loss_region": 0.05839422717690468, |
| "loss_total": 1.0882458686828613, |
| "lr": 0.0010302911923091836, |
| "router/selected_tokens_s0": 565.0625, |
| "router/selected_tokens_s1": 13.6875, |
| "step": 5280, |
| "tokens_trained": 17.29992816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.500709219858156, |
| "grad_norm": 0.5782109498977661, |
| "loss": 1.167, |
| "loss_ce": 1.129813313484192, |
| "loss_region": 0.05724978446960449, |
| "loss_total": 1.1870630979537964, |
| "lr": 0.0010298843003253325, |
| "router/selected_tokens_s0": 409.9375, |
| "router/selected_tokens_s1": 10.0, |
| "step": 5290, |
| "tokens_trained": 17.3326928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.50354609929078, |
| "grad_norm": 1.1590781211853027, |
| "loss": 1.1645, |
| "loss_ce": 1.1144260168075562, |
| "loss_region": 0.06544779241085052, |
| "loss_total": 1.1798738241195679, |
| "lr": 0.0010294774083414815, |
| "router/selected_tokens_s0": 403.5625, |
| "router/selected_tokens_s1": 18.1875, |
| "step": 5300, |
| "tokens_trained": 17.36545824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5063829787234042, |
| "grad_norm": 1.357226848602295, |
| "loss": 1.1685, |
| "loss_ce": 1.1005100011825562, |
| "loss_region": 0.05805032700300217, |
| "loss_total": 1.1585602760314941, |
| "lr": 0.0010290705163576305, |
| "router/selected_tokens_s0": 439.5, |
| "router/selected_tokens_s1": 10.875, |
| "step": 5310, |
| "tokens_trained": 17.39822368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5092198581560283, |
| "grad_norm": 0.7000742554664612, |
| "loss": 1.1605, |
| "loss_ce": 1.1245887279510498, |
| "loss_region": 0.054603416472673416, |
| "loss_total": 1.1791921854019165, |
| "lr": 0.0010286636243737794, |
| "router/selected_tokens_s0": 480.5, |
| "router/selected_tokens_s1": 7.9375, |
| "step": 5320, |
| "tokens_trained": 17.43098912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5120567375886524, |
| "grad_norm": 1.1702980995178223, |
| "loss": 1.1649, |
| "loss_ce": 1.088313341140747, |
| "loss_region": 0.06897804886102676, |
| "loss_total": 1.1572914123535156, |
| "lr": 0.0010282567323899284, |
| "router/selected_tokens_s0": 401.875, |
| "router/selected_tokens_s1": 21.875, |
| "step": 5330, |
| "tokens_trained": 17.46375296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5148936170212766, |
| "grad_norm": 1.2126129865646362, |
| "loss": 1.1653, |
| "loss_ce": 1.1331862211227417, |
| "loss_region": 0.06121586263179779, |
| "loss_total": 1.1944020986557007, |
| "lr": 0.0010278498404060774, |
| "router/selected_tokens_s0": 462.125, |
| "router/selected_tokens_s1": 16.0625, |
| "step": 5340, |
| "tokens_trained": 17.4965184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5177304964539007, |
| "grad_norm": 0.9559081792831421, |
| "loss": 1.1667, |
| "loss_ce": 1.076651930809021, |
| "loss_region": 0.05728324502706528, |
| "loss_total": 1.1339352130889893, |
| "lr": 0.0010274429484222263, |
| "router/selected_tokens_s0": 429.375, |
| "router/selected_tokens_s1": 10.25, |
| "step": 5350, |
| "tokens_trained": 17.52928304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5205673758865248, |
| "grad_norm": 1.1095921993255615, |
| "loss": 1.163, |
| "loss_ce": 1.112319827079773, |
| "loss_region": 0.06240734085440636, |
| "loss_total": 1.174727201461792, |
| "lr": 0.0010270360564383753, |
| "router/selected_tokens_s0": 468.75, |
| "router/selected_tokens_s1": 17.25, |
| "step": 5360, |
| "tokens_trained": 17.56204848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.523404255319149, |
| "grad_norm": 0.711679995059967, |
| "loss": 1.1636, |
| "loss_ce": 1.0815953016281128, |
| "loss_region": 0.05967709422111511, |
| "loss_total": 1.1412724256515503, |
| "lr": 0.0010266291644545243, |
| "router/selected_tokens_s0": 441.1875, |
| "router/selected_tokens_s1": 13.375, |
| "step": 5370, |
| "tokens_trained": 17.59481392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.526241134751773, |
| "grad_norm": 1.6682300567626953, |
| "loss": 1.16, |
| "loss_ce": 1.119455099105835, |
| "loss_region": 0.05931823328137398, |
| "loss_total": 1.178773283958435, |
| "lr": 0.0010262222724706732, |
| "router/selected_tokens_s0": 456.625, |
| "router/selected_tokens_s1": 13.75, |
| "step": 5380, |
| "tokens_trained": 17.62757936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5290780141843971, |
| "grad_norm": 0.7429375052452087, |
| "loss": 1.1681, |
| "loss_ce": 1.1057552099227905, |
| "loss_region": 0.06057373434305191, |
| "loss_total": 1.1663289070129395, |
| "lr": 0.0010258153804868222, |
| "router/selected_tokens_s0": 455.625, |
| "router/selected_tokens_s1": 14.6875, |
| "step": 5390, |
| "tokens_trained": 17.6603448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5319148936170213, |
| "grad_norm": 1.3467381000518799, |
| "loss": 1.163, |
| "loss_ce": 1.1104803085327148, |
| "loss_region": 0.05636877194046974, |
| "loss_total": 1.166849136352539, |
| "lr": 0.0010254084885029712, |
| "router/selected_tokens_s0": 436.8125, |
| "router/selected_tokens_s1": 9.375, |
| "step": 5400, |
| "tokens_trained": 17.69310944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5347517730496454, |
| "grad_norm": 0.7579314708709717, |
| "loss": 1.1633, |
| "loss_ce": 1.1696826219558716, |
| "loss_region": 0.06521567702293396, |
| "loss_total": 1.234898328781128, |
| "lr": 0.0010250015965191201, |
| "router/selected_tokens_s0": 378.1875, |
| "router/selected_tokens_s1": 17.0, |
| "step": 5410, |
| "tokens_trained": 17.72587488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5375886524822695, |
| "grad_norm": 1.506962776184082, |
| "loss": 1.1663, |
| "loss_ce": 1.0815457105636597, |
| "loss_region": 0.0584101639688015, |
| "loss_total": 1.1399558782577515, |
| "lr": 0.001024594704535269, |
| "router/selected_tokens_s0": 421.25, |
| "router/selected_tokens_s1": 11.375, |
| "step": 5420, |
| "tokens_trained": 17.75863776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5404255319148936, |
| "grad_norm": 1.0177658796310425, |
| "loss": 1.1602, |
| "loss_ce": 1.1353583335876465, |
| "loss_region": 0.05683788284659386, |
| "loss_total": 1.192196249961853, |
| "lr": 0.001024187812551418, |
| "router/selected_tokens_s0": 431.375, |
| "router/selected_tokens_s1": 10.0625, |
| "step": 5430, |
| "tokens_trained": 17.7914032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5432624113475177, |
| "grad_norm": 1.3501242399215698, |
| "loss": 1.1612, |
| "loss_ce": 1.0791661739349365, |
| "loss_region": 0.06427235901355743, |
| "loss_total": 1.1434385776519775, |
| "lr": 0.001023780920567567, |
| "router/selected_tokens_s0": 431.6875, |
| "router/selected_tokens_s1": 18.8125, |
| "step": 5440, |
| "tokens_trained": 17.824165472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5460992907801419, |
| "grad_norm": 0.5377880930900574, |
| "loss": 1.1609, |
| "loss_ce": 1.1013327836990356, |
| "loss_region": 0.05677126348018646, |
| "loss_total": 1.1581040620803833, |
| "lr": 0.0010233740285837162, |
| "router/selected_tokens_s0": 467.1875, |
| "router/selected_tokens_s1": 10.125, |
| "step": 5450, |
| "tokens_trained": 17.856930912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.548936170212766, |
| "grad_norm": 0.5063560605049133, |
| "loss": 1.1603, |
| "loss_ce": 1.0957387685775757, |
| "loss_region": 0.057693321257829666, |
| "loss_total": 1.1534321308135986, |
| "lr": 0.0010229671365998652, |
| "router/selected_tokens_s0": 456.125, |
| "router/selected_tokens_s1": 11.6875, |
| "step": 5460, |
| "tokens_trained": 17.88969364 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.55177304964539, |
| "grad_norm": 1.4588333368301392, |
| "loss": 1.1709, |
| "loss_ce": 1.1442478895187378, |
| "loss_region": 0.06362947821617126, |
| "loss_total": 1.2078773975372314, |
| "lr": 0.0010225602446160141, |
| "router/selected_tokens_s0": 434.8125, |
| "router/selected_tokens_s1": 18.0625, |
| "step": 5470, |
| "tokens_trained": 17.92245908 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5546099290780142, |
| "grad_norm": 1.0143531560897827, |
| "loss": 1.1552, |
| "loss_ce": 1.123971700668335, |
| "loss_region": 0.05629895254969597, |
| "loss_total": 1.1802706718444824, |
| "lr": 0.0010221533526321629, |
| "router/selected_tokens_s0": 445.1875, |
| "router/selected_tokens_s1": 9.5625, |
| "step": 5480, |
| "tokens_trained": 17.95522452 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5574468085106383, |
| "grad_norm": 0.7020089030265808, |
| "loss": 1.1643, |
| "loss_ce": 1.1418280601501465, |
| "loss_region": 0.06544672697782516, |
| "loss_total": 1.2072747945785522, |
| "lr": 0.0010217464606483118, |
| "router/selected_tokens_s0": 442.6875, |
| "router/selected_tokens_s1": 20.5625, |
| "step": 5490, |
| "tokens_trained": 17.98798996 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5602836879432624, |
| "grad_norm": 0.9480003714561462, |
| "loss": 1.1674, |
| "loss_ce": 1.1325876712799072, |
| "loss_region": 0.057331230491399765, |
| "loss_total": 1.189918875694275, |
| "lr": 0.0010213395686644608, |
| "router/selected_tokens_s0": 416.5625, |
| "router/selected_tokens_s1": 9.375, |
| "step": 5500, |
| "tokens_trained": 18.0207554 |
| }, |
| { |
| "epoch": 1.5602836879432624, |
| "eval_ppl": 3.016075433537383, |
| "eval_runtime": 2.0117, |
| "step": 5500, |
| "tokens_trained": 18.0207554 |
| }, |
| { |
| "epoch": 1.5602836879432624, |
| "eval_F": 0.000746067118015334, |
| "eval_F_cds": 0.0009792471476057856, |
| "eval_F_dig": 0.0005859832799437456, |
| "eval_F_exon": 0.0006795232464902625, |
| "eval_F_intron": 0.0007563583167979783, |
| "eval_F_nig": 0.0008204133495037994, |
| "eval_F_promoter": 0.0005987724506769419, |
| "eval_F_utr": 0.0006501742680210188, |
| "eval_G": 0.012171905510010335, |
| "eval_G_cds": 0.010553139177746832, |
| "eval_G_dig": 0.017514414578287365, |
| "eval_G_exon": 0.011079122200364224, |
| "eval_G_intron": 0.012297019961888818, |
| "eval_G_nig": 0.013013662524390667, |
| "eval_G_promoter": 0.0109304193923973, |
| "eval_G_utr": 0.01076197459790452, |
| "eval_avg_bp_per_token": 1340.3619806488332, |
| "eval_bp_per_token/cds": 1021.1926605504588, |
| "eval_bp_per_token/dig": 1706.5333333333333, |
| "eval_bp_per_token/exon": 1471.62, |
| "eval_bp_per_token/intron": 1322.1246832235174, |
| "eval_bp_per_token/nig": 1218.8977673325498, |
| "eval_bp_per_token/promoter": 1670.0835164835164, |
| "eval_bp_per_token/utr": 1538.049180327869, |
| "eval_ppl_cds": 3.7454442422362386, |
| "eval_ppl_dig": 1.1207937865323054, |
| "eval_ppl_exon": 3.2698117991844704, |
| "eval_ppl_intron": 3.037524573763349, |
| "eval_ppl_nig": 2.8559054717599377, |
| "eval_ppl_promoter": 3.304611442870541, |
| "eval_ppl_utr": 3.4102085366729313, |
| "step": 5500, |
| "tokens_trained": 18.0207554 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5631205673758866, |
| "grad_norm": 1.2048259973526, |
| "loss": 1.1553, |
| "loss_ce": 1.1304912567138672, |
| "loss_region": 0.05862328037619591, |
| "loss_total": 1.1891145706176758, |
| "lr": 0.0010209326766806098, |
| "router/selected_tokens_s0": 446.1875, |
| "router/selected_tokens_s1": 12.5625, |
| "step": 5510, |
| "tokens_trained": 18.05352084 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5659574468085107, |
| "grad_norm": 0.827768087387085, |
| "loss": 1.1641, |
| "loss_ce": 1.0551944971084595, |
| "loss_region": 0.0649186372756958, |
| "loss_total": 1.1201131343841553, |
| "lr": 0.001020525784696759, |
| "router/selected_tokens_s0": 453.125, |
| "router/selected_tokens_s1": 20.1875, |
| "step": 5520, |
| "tokens_trained": 18.08628548 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5687943262411348, |
| "grad_norm": 0.8574738502502441, |
| "loss": 1.1586, |
| "loss_ce": 1.1029974222183228, |
| "loss_region": 0.05974757671356201, |
| "loss_total": 1.1627449989318848, |
| "lr": 0.001020118892712908, |
| "router/selected_tokens_s0": 400.1875, |
| "router/selected_tokens_s1": 12.0, |
| "step": 5530, |
| "tokens_trained": 18.11905092 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.571631205673759, |
| "grad_norm": 0.4658717215061188, |
| "loss": 1.1563, |
| "loss_ce": 1.116166591644287, |
| "loss_region": 0.05434912443161011, |
| "loss_total": 1.170515775680542, |
| "lr": 0.0010197120007290569, |
| "router/selected_tokens_s0": 510.4375, |
| "router/selected_tokens_s1": 7.8125, |
| "step": 5540, |
| "tokens_trained": 18.15181636 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.574468085106383, |
| "grad_norm": 0.6120085716247559, |
| "loss": 1.1627, |
| "loss_ce": 1.064978003501892, |
| "loss_region": 0.06429442018270493, |
| "loss_total": 1.1292724609375, |
| "lr": 0.0010193051087452058, |
| "router/selected_tokens_s0": 464.5625, |
| "router/selected_tokens_s1": 19.75, |
| "step": 5550, |
| "tokens_trained": 18.1845818 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5773049645390071, |
| "grad_norm": 1.1164491176605225, |
| "loss": 1.162, |
| "loss_ce": 1.0743745565414429, |
| "loss_region": 0.059213023632764816, |
| "loss_total": 1.1335875988006592, |
| "lr": 0.0010188982167613548, |
| "router/selected_tokens_s0": 455.0625, |
| "router/selected_tokens_s1": 13.4375, |
| "step": 5560, |
| "tokens_trained": 18.2173404 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.580141843971631, |
| "grad_norm": 0.7541922926902771, |
| "loss": 1.1649, |
| "loss_ce": 0.9796167612075806, |
| "loss_region": 0.060397908091545105, |
| "loss_total": 1.040014624595642, |
| "lr": 0.0010184913247775038, |
| "router/selected_tokens_s0": 482.5625, |
| "router/selected_tokens_s1": 16.5625, |
| "step": 5570, |
| "tokens_trained": 18.25010584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5829787234042554, |
| "grad_norm": 0.6305317878723145, |
| "loss": 1.1622, |
| "loss_ce": 1.0828227996826172, |
| "loss_region": 0.06084812805056572, |
| "loss_total": 1.143670916557312, |
| "lr": 0.0010180844327936527, |
| "router/selected_tokens_s0": 499.9375, |
| "router/selected_tokens_s1": 16.375, |
| "step": 5580, |
| "tokens_trained": 18.28287128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5858156028368793, |
| "grad_norm": 1.2468684911727905, |
| "loss": 1.1579, |
| "loss_ce": 1.1797648668289185, |
| "loss_region": 0.06281707435846329, |
| "loss_total": 1.2425819635391235, |
| "lr": 0.0010176775408098017, |
| "router/selected_tokens_s0": 443.6875, |
| "router/selected_tokens_s1": 17.4375, |
| "step": 5590, |
| "tokens_trained": 18.31563672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5886524822695036, |
| "grad_norm": 0.6272963285446167, |
| "loss": 1.1579, |
| "loss_ce": 1.1127893924713135, |
| "loss_region": 0.05846928060054779, |
| "loss_total": 1.1712586879730225, |
| "lr": 0.0010172706488259507, |
| "router/selected_tokens_s0": 441.0, |
| "router/selected_tokens_s1": 12.1875, |
| "step": 5600, |
| "tokens_trained": 18.34840216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5914893617021275, |
| "grad_norm": 0.9740651249885559, |
| "loss": 1.1603, |
| "loss_ce": 1.0885473489761353, |
| "loss_region": 0.06197122111916542, |
| "loss_total": 1.150518536567688, |
| "lr": 0.0010168637568420996, |
| "router/selected_tokens_s0": 472.875, |
| "router/selected_tokens_s1": 17.3125, |
| "step": 5610, |
| "tokens_trained": 18.38116752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5943262411347519, |
| "grad_norm": 0.9614301919937134, |
| "loss": 1.1543, |
| "loss_ce": 1.0123392343521118, |
| "loss_region": 0.05750362575054169, |
| "loss_total": 1.06984281539917, |
| "lr": 0.0010164568648582486, |
| "router/selected_tokens_s0": 494.625, |
| "router/selected_tokens_s1": 11.9375, |
| "step": 5620, |
| "tokens_trained": 18.41393296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5971631205673757, |
| "grad_norm": 0.8865266442298889, |
| "loss": 1.1602, |
| "loss_ce": 1.1279913187026978, |
| "loss_region": 0.06505395472049713, |
| "loss_total": 1.1930452585220337, |
| "lr": 0.0010160499728743976, |
| "router/selected_tokens_s0": 424.8125, |
| "router/selected_tokens_s1": 19.4375, |
| "step": 5630, |
| "tokens_trained": 18.4466984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6, |
| "grad_norm": 0.6749231219291687, |
| "loss": 1.1557, |
| "loss_ce": 1.1030737161636353, |
| "loss_region": 0.0562567301094532, |
| "loss_total": 1.1593304872512817, |
| "lr": 0.0010156430808905465, |
| "router/selected_tokens_s0": 428.25, |
| "router/selected_tokens_s1": 8.75, |
| "step": 5640, |
| "tokens_trained": 18.47946384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.602836879432624, |
| "grad_norm": 1.4124048948287964, |
| "loss": 1.1577, |
| "loss_ce": 1.0829676389694214, |
| "loss_region": 0.061727654188871384, |
| "loss_total": 1.1446952819824219, |
| "lr": 0.0010152361889066955, |
| "router/selected_tokens_s0": 464.625, |
| "router/selected_tokens_s1": 16.6875, |
| "step": 5650, |
| "tokens_trained": 18.51222928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6056737588652483, |
| "grad_norm": 1.0576155185699463, |
| "loss": 1.1663, |
| "loss_ce": 1.144495964050293, |
| "loss_region": 0.06459303945302963, |
| "loss_total": 1.2090890407562256, |
| "lr": 0.0010148292969228445, |
| "router/selected_tokens_s0": 389.9375, |
| "router/selected_tokens_s1": 17.9375, |
| "step": 5660, |
| "tokens_trained": 18.54499392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6085106382978722, |
| "grad_norm": 0.6192797422409058, |
| "loss": 1.159, |
| "loss_ce": 1.1436125040054321, |
| "loss_region": 0.056959912180900574, |
| "loss_total": 1.2005723714828491, |
| "lr": 0.0010144224049389934, |
| "router/selected_tokens_s0": 447.125, |
| "router/selected_tokens_s1": 9.8125, |
| "step": 5670, |
| "tokens_trained": 18.57775936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6113475177304966, |
| "grad_norm": 0.9440255165100098, |
| "loss": 1.1491, |
| "loss_ce": 1.0525715351104736, |
| "loss_region": 0.05895181745290756, |
| "loss_total": 1.1115233898162842, |
| "lr": 0.0010140155129551424, |
| "router/selected_tokens_s0": 494.875, |
| "router/selected_tokens_s1": 14.3125, |
| "step": 5680, |
| "tokens_trained": 18.6105248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6141843971631205, |
| "grad_norm": 0.8564097881317139, |
| "loss": 1.1583, |
| "loss_ce": 1.1537847518920898, |
| "loss_region": 0.06267160922288895, |
| "loss_total": 1.216456413269043, |
| "lr": 0.0010136086209712914, |
| "router/selected_tokens_s0": 426.3125, |
| "router/selected_tokens_s1": 16.625, |
| "step": 5690, |
| "tokens_trained": 18.64329024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6170212765957448, |
| "grad_norm": 1.5163027048110962, |
| "loss": 1.1573, |
| "loss_ce": 1.1002916097640991, |
| "loss_region": 0.05866721272468567, |
| "loss_total": 1.1589587926864624, |
| "lr": 0.0010132017289874405, |
| "router/selected_tokens_s0": 478.5, |
| "router/selected_tokens_s1": 12.875, |
| "step": 5700, |
| "tokens_trained": 18.67605568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6198581560283687, |
| "grad_norm": 1.0508079528808594, |
| "loss": 1.1576, |
| "loss_ce": 1.0324606895446777, |
| "loss_region": 0.05855386331677437, |
| "loss_total": 1.0910145044326782, |
| "lr": 0.0010127948370035895, |
| "router/selected_tokens_s0": 445.6875, |
| "router/selected_tokens_s1": 12.5625, |
| "step": 5710, |
| "tokens_trained": 18.70882112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.622695035460993, |
| "grad_norm": 1.1140241622924805, |
| "loss": 1.1626, |
| "loss_ce": 1.1185026168823242, |
| "loss_region": 0.06571623682975769, |
| "loss_total": 1.1842188835144043, |
| "lr": 0.0010123879450197385, |
| "router/selected_tokens_s0": 455.25, |
| "router/selected_tokens_s1": 21.25, |
| "step": 5720, |
| "tokens_trained": 18.74158656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.625531914893617, |
| "grad_norm": 0.9186303615570068, |
| "loss": 1.1624, |
| "loss_ce": 1.1634204387664795, |
| "loss_region": 0.058083634823560715, |
| "loss_total": 1.2215040922164917, |
| "lr": 0.0010119810530358872, |
| "router/selected_tokens_s0": 412.5, |
| "router/selected_tokens_s1": 10.6875, |
| "step": 5730, |
| "tokens_trained": 18.774352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6283687943262413, |
| "grad_norm": 1.6307847499847412, |
| "loss": 1.1612, |
| "loss_ce": 1.114676833152771, |
| "loss_region": 0.05957353860139847, |
| "loss_total": 1.1742503643035889, |
| "lr": 0.0010115741610520362, |
| "router/selected_tokens_s0": 475.8125, |
| "router/selected_tokens_s1": 14.375, |
| "step": 5740, |
| "tokens_trained": 18.80711744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6312056737588652, |
| "grad_norm": 0.432007759809494, |
| "loss": 1.1634, |
| "loss_ce": 1.0877857208251953, |
| "loss_region": 0.06144358590245247, |
| "loss_total": 1.1492292881011963, |
| "lr": 0.0010111672690681851, |
| "router/selected_tokens_s0": 473.625, |
| "router/selected_tokens_s1": 16.75, |
| "step": 5750, |
| "tokens_trained": 18.83988288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6340425531914895, |
| "grad_norm": 0.32113170623779297, |
| "loss": 1.1568, |
| "loss_ce": 1.0600965023040771, |
| "loss_region": 0.05585326626896858, |
| "loss_total": 1.1159497499465942, |
| "lr": 0.0010107603770843341, |
| "router/selected_tokens_s0": 512.875, |
| "router/selected_tokens_s1": 10.0, |
| "step": 5760, |
| "tokens_trained": 18.87264832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6368794326241134, |
| "grad_norm": 0.5704630613327026, |
| "loss": 1.1561, |
| "loss_ce": 1.140356421470642, |
| "loss_region": 0.06394833326339722, |
| "loss_total": 1.2043046951293945, |
| "lr": 0.0010103534851004833, |
| "router/selected_tokens_s0": 443.4375, |
| "router/selected_tokens_s1": 18.6875, |
| "step": 5770, |
| "tokens_trained": 18.90541376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6397163120567377, |
| "grad_norm": 0.886239230632782, |
| "loss": 1.1629, |
| "loss_ce": 1.092922329902649, |
| "loss_region": 0.060600440949201584, |
| "loss_total": 1.1535227298736572, |
| "lr": 0.0010099465931166323, |
| "router/selected_tokens_s0": 464.0625, |
| "router/selected_tokens_s1": 15.5625, |
| "step": 5780, |
| "tokens_trained": 18.9381792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6425531914893616, |
| "grad_norm": 0.7641704082489014, |
| "loss": 1.1497, |
| "loss_ce": 0.9872031807899475, |
| "loss_region": 0.056345850229263306, |
| "loss_total": 1.0435490608215332, |
| "lr": 0.0010095397011327812, |
| "router/selected_tokens_s0": 468.5625, |
| "router/selected_tokens_s1": 9.625, |
| "step": 5790, |
| "tokens_trained": 18.97094464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.645390070921986, |
| "grad_norm": 0.7847086191177368, |
| "loss": 1.1551, |
| "loss_ce": 1.0804718732833862, |
| "loss_region": 0.059678997844457626, |
| "loss_total": 1.1401509046554565, |
| "lr": 0.0010091328091489302, |
| "router/selected_tokens_s0": 463.1875, |
| "router/selected_tokens_s1": 14.4375, |
| "step": 5800, |
| "tokens_trained": 19.00370848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6482269503546099, |
| "grad_norm": 0.5308611989021301, |
| "loss": 1.1598, |
| "loss_ce": 1.1176682710647583, |
| "loss_region": 0.06210538372397423, |
| "loss_total": 1.1797736883163452, |
| "lr": 0.0010087259171650792, |
| "router/selected_tokens_s0": 466.8125, |
| "router/selected_tokens_s1": 17.4375, |
| "step": 5810, |
| "tokens_trained": 19.03647392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6510638297872342, |
| "grad_norm": 0.9863501787185669, |
| "loss": 1.1553, |
| "loss_ce": 1.0464540719985962, |
| "loss_region": 0.05664392560720444, |
| "loss_total": 1.1030980348587036, |
| "lr": 0.0010083190251812281, |
| "router/selected_tokens_s0": 457.5625, |
| "router/selected_tokens_s1": 10.0625, |
| "step": 5820, |
| "tokens_trained": 19.06923936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.653900709219858, |
| "grad_norm": 0.6776580214500427, |
| "loss": 1.154, |
| "loss_ce": 1.0867358446121216, |
| "loss_region": 0.06509334594011307, |
| "loss_total": 1.1518292427062988, |
| "lr": 0.001007912133197377, |
| "router/selected_tokens_s0": 422.4375, |
| "router/selected_tokens_s1": 19.0, |
| "step": 5830, |
| "tokens_trained": 19.1020048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6567375886524822, |
| "grad_norm": 0.976962685585022, |
| "loss": 1.1597, |
| "loss_ce": 1.1119444370269775, |
| "loss_region": 0.0586853101849556, |
| "loss_total": 1.1706297397613525, |
| "lr": 0.001007505241213526, |
| "router/selected_tokens_s0": 402.0625, |
| "router/selected_tokens_s1": 11.0625, |
| "step": 5840, |
| "tokens_trained": 19.13477024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6595744680851063, |
| "grad_norm": 0.7124935984611511, |
| "loss": 1.1514, |
| "loss_ce": 1.1340878009796143, |
| "loss_region": 0.05913223326206207, |
| "loss_total": 1.1932200193405151, |
| "lr": 0.001007098349229675, |
| "router/selected_tokens_s0": 475.4375, |
| "router/selected_tokens_s1": 13.8125, |
| "step": 5850, |
| "tokens_trained": 19.16753568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6624113475177305, |
| "grad_norm": 0.6097173094749451, |
| "loss": 1.155, |
| "loss_ce": 1.017051339149475, |
| "loss_region": 0.06248475983738899, |
| "loss_total": 1.0795360803604126, |
| "lr": 0.001006691457245824, |
| "router/selected_tokens_s0": 446.25, |
| "router/selected_tokens_s1": 17.125, |
| "step": 5860, |
| "tokens_trained": 19.20030112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6652482269503546, |
| "grad_norm": 1.3263115882873535, |
| "loss": 1.1563, |
| "loss_ce": 1.0393388271331787, |
| "loss_region": 0.05702516809105873, |
| "loss_total": 1.0963640213012695, |
| "lr": 0.001006284565261973, |
| "router/selected_tokens_s0": 438.6875, |
| "router/selected_tokens_s1": 9.875, |
| "step": 5870, |
| "tokens_trained": 19.23306656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6680851063829787, |
| "grad_norm": 0.650032103061676, |
| "loss": 1.1522, |
| "loss_ce": 1.1351990699768066, |
| "loss_region": 0.05994255840778351, |
| "loss_total": 1.1951416730880737, |
| "lr": 0.001005877673278122, |
| "router/selected_tokens_s0": 459.4375, |
| "router/selected_tokens_s1": 14.375, |
| "step": 5880, |
| "tokens_trained": 19.265832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6709219858156028, |
| "grad_norm": 1.0562461614608765, |
| "loss": 1.1563, |
| "loss_ce": 1.1028565168380737, |
| "loss_region": 0.06198059767484665, |
| "loss_total": 1.164837121963501, |
| "lr": 0.0010054707812942709, |
| "router/selected_tokens_s0": 429.5625, |
| "router/selected_tokens_s1": 15.9375, |
| "step": 5890, |
| "tokens_trained": 19.29859664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.673758865248227, |
| "grad_norm": 0.6909924149513245, |
| "loss": 1.1567, |
| "loss_ce": 1.1044297218322754, |
| "loss_region": 0.05730178952217102, |
| "loss_total": 1.161731481552124, |
| "lr": 0.0010050638893104198, |
| "router/selected_tokens_s0": 439.75, |
| "router/selected_tokens_s1": 10.5, |
| "step": 5900, |
| "tokens_trained": 19.33136116 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.676595744680851, |
| "grad_norm": 0.5099886059761047, |
| "loss": 1.1534, |
| "loss_ce": 1.1194809675216675, |
| "loss_region": 0.06314481794834137, |
| "loss_total": 1.1826257705688477, |
| "lr": 0.0010046569973265688, |
| "router/selected_tokens_s0": 453.375, |
| "router/selected_tokens_s1": 18.0625, |
| "step": 5910, |
| "tokens_trained": 19.3641258 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6794326241134752, |
| "grad_norm": 0.703726053237915, |
| "loss": 1.1572, |
| "loss_ce": 1.098257303237915, |
| "loss_region": 0.059644293040037155, |
| "loss_total": 1.157901644706726, |
| "lr": 0.0010042501053427178, |
| "router/selected_tokens_s0": 455.5625, |
| "router/selected_tokens_s1": 13.6875, |
| "step": 5920, |
| "tokens_trained": 19.39688964 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6822695035460993, |
| "grad_norm": 0.728217363357544, |
| "loss": 1.1559, |
| "loss_ce": 1.0854500532150269, |
| "loss_region": 0.05772150680422783, |
| "loss_total": 1.1431715488433838, |
| "lr": 0.0010038432133588667, |
| "router/selected_tokens_s0": 494.6875, |
| "router/selected_tokens_s1": 12.125, |
| "step": 5930, |
| "tokens_trained": 19.42965508 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6851063829787234, |
| "grad_norm": 0.6884320378303528, |
| "loss": 1.1525, |
| "loss_ce": 1.1034066677093506, |
| "loss_region": 0.06277105957269669, |
| "loss_total": 1.166177749633789, |
| "lr": 0.0010034363213750157, |
| "router/selected_tokens_s0": 469.4375, |
| "router/selected_tokens_s1": 18.25, |
| "step": 5940, |
| "tokens_trained": 19.46241972 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6879432624113475, |
| "grad_norm": 0.7436274290084839, |
| "loss": 1.1584, |
| "loss_ce": 1.1485267877578735, |
| "loss_region": 0.06026006489992142, |
| "loss_total": 1.2087868452072144, |
| "lr": 0.0010030294293911649, |
| "router/selected_tokens_s0": 454.25, |
| "router/selected_tokens_s1": 14.6875, |
| "step": 5950, |
| "tokens_trained": 19.49518516 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6907801418439716, |
| "grad_norm": 0.6739595532417297, |
| "loss": 1.1518, |
| "loss_ce": 1.1541627645492554, |
| "loss_region": 0.05630716681480408, |
| "loss_total": 1.2104699611663818, |
| "lr": 0.0010026225374073139, |
| "router/selected_tokens_s0": 432.0, |
| "router/selected_tokens_s1": 9.125, |
| "step": 5960, |
| "tokens_trained": 19.5279506 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6936170212765957, |
| "grad_norm": 0.9516991972923279, |
| "loss": 1.1595, |
| "loss_ce": 1.1228985786437988, |
| "loss_region": 0.06748107820749283, |
| "loss_total": 1.1903796195983887, |
| "lr": 0.0010022156454234628, |
| "router/selected_tokens_s0": 403.3125, |
| "router/selected_tokens_s1": 20.9375, |
| "step": 5970, |
| "tokens_trained": 19.56071604 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6964539007092199, |
| "grad_norm": 0.7473562955856323, |
| "loss": 1.1562, |
| "loss_ce": 1.0967330932617188, |
| "loss_region": 0.059221576899290085, |
| "loss_total": 1.1559547185897827, |
| "lr": 0.0010018087534396116, |
| "router/selected_tokens_s0": 490.375, |
| "router/selected_tokens_s1": 14.1875, |
| "step": 5980, |
| "tokens_trained": 19.59348068 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.699290780141844, |
| "grad_norm": 1.1160359382629395, |
| "loss": 1.151, |
| "loss_ce": 1.089220404624939, |
| "loss_region": 0.05939415842294693, |
| "loss_total": 1.148614525794983, |
| "lr": 0.0010014018614557605, |
| "router/selected_tokens_s0": 511.4375, |
| "router/selected_tokens_s1": 14.9375, |
| "step": 5990, |
| "tokens_trained": 19.62624612 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.702127659574468, |
| "grad_norm": 1.0865445137023926, |
| "loss": 1.158, |
| "loss_ce": 1.0744832754135132, |
| "loss_region": 0.06320250779390335, |
| "loss_total": 1.137685775756836, |
| "lr": 0.0010009949694719095, |
| "router/selected_tokens_s0": 443.0625, |
| "router/selected_tokens_s1": 17.9375, |
| "step": 6000, |
| "tokens_trained": 19.659011552 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "eval_ppl": 2.998067346037042, |
| "eval_runtime": 2.067, |
| "step": 6000, |
| "tokens_trained": 19.659011552 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "eval_F": 0.0013543432401308525, |
| "eval_F_cds": 0.001446410924445243, |
| "eval_F_dig": 0.002304867567778733, |
| "eval_F_exon": 0.0012367323086122776, |
| "eval_F_intron": 0.0013766258061943945, |
| "eval_F_nig": 0.0013487171280326855, |
| "eval_F_promoter": 0.0012541321879013748, |
| "eval_F_utr": 0.0013323243197152024, |
| "eval_G": 0.013059948922586937, |
| "eval_G_cds": 0.012359187601069086, |
| "eval_G_dig": 0.02614904606805219, |
| "eval_G_exon": 0.012316438474266455, |
| "eval_G_intron": 0.013067059240385087, |
| "eval_G_nig": 0.013578898008810526, |
| "eval_G_promoter": 0.012155479142156476, |
| "eval_G_utr": 0.012293668761524606, |
| "eval_avg_bp_per_token": 738.3652610126979, |
| "eval_bp_per_token/cds": 691.3664596273292, |
| "eval_bp_per_token/dig": 433.864406779661, |
| "eval_bp_per_token/exon": 808.5824175824176, |
| "eval_bp_per_token/intron": 726.4138123085492, |
| "eval_bp_per_token/nig": 741.445318084346, |
| "eval_bp_per_token/promoter": 797.3641133263379, |
| "eval_bp_per_token/utr": 750.568, |
| "eval_ppl_cds": 3.7357774747386774, |
| "eval_ppl_dig": 1.0956703388366842, |
| "eval_ppl_exon": 3.2539563358776444, |
| "eval_ppl_intron": 3.019567682292059, |
| "eval_ppl_nig": 2.8358108842977168, |
| "eval_ppl_promoter": 3.2906528782913433, |
| "eval_ppl_utr": 3.398305967468575, |
| "step": 6000, |
| "tokens_trained": 19.659011552 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 30600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 3000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|