| { |
| "best_global_step": 9000, |
| "best_metric": 2.7250221948753346, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/HNet_BPT3_12.8K-100B/checkpoint-9000", |
| "epoch": 2.5525849230551025, |
| "eval_steps": 500, |
| "global_step": 9000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0028366782497695198, |
| "grad_norm": 1632.1500244140625, |
| "loss": 88.4255, |
| "loss_ce": 61.45643997192383, |
| "loss_region": 0.09446925669908524, |
| "loss_total": 61.55091094970703, |
| "lr": 2.20454076850486e-05, |
| "router/selected_tokens_s0": 685.5, |
| "router/selected_tokens_s1": 127.125, |
| "step": 10, |
| "tokens_trained": 0.03276544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0056733564995390395, |
| "grad_norm": 470.2360534667969, |
| "loss": 43.9906, |
| "loss_ce": 31.559911727905273, |
| "loss_region": 0.1008022278547287, |
| "loss_total": 31.66071319580078, |
| "lr": 4.654030511288038e-05, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 20, |
| "tokens_trained": 0.06553088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.00851003474930856, |
| "grad_norm": 349.49151611328125, |
| "loss": 13.8617, |
| "loss_ce": 5.999421119689941, |
| "loss_region": 0.10440575331449509, |
| "loss_total": 6.103826999664307, |
| "lr": 7.103520254071216e-05, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 30, |
| "tokens_trained": 0.09829632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.011346712999078079, |
| "grad_norm": 394.0719909667969, |
| "loss": 5.4272, |
| "loss_ce": 6.045137882232666, |
| "loss_region": 0.10674509406089783, |
| "loss_total": 6.151883125305176, |
| "lr": 9.553009996854394e-05, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 40, |
| "tokens_trained": 0.13106176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.014183391248847599, |
| "grad_norm": 460.50262451171875, |
| "loss": 4.7094, |
| "loss_ce": 6.701486110687256, |
| "loss_region": 0.10931951552629471, |
| "loss_total": 6.810805797576904, |
| "lr": 0.00012002499739637572, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 50, |
| "tokens_trained": 0.1638272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.01702006949861712, |
| "grad_norm": 626.8062133789062, |
| "loss": 9.494, |
| "loss_ce": 10.466922760009766, |
| "loss_region": 0.11075878143310547, |
| "loss_total": 10.577681541442871, |
| "lr": 0.00014451989482420748, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 60, |
| "tokens_trained": 0.19659264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.01985674774838664, |
| "grad_norm": 448.8774108886719, |
| "loss": 12.7236, |
| "loss_ce": 14.8423433303833, |
| "loss_region": 0.11327756941318512, |
| "loss_total": 14.955620765686035, |
| "lr": 0.00016901479225203927, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 70, |
| "tokens_trained": 0.22935808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.022693425998156158, |
| "grad_norm": 386.830322265625, |
| "loss": 14.3151, |
| "loss_ce": 13.370331764221191, |
| "loss_region": 0.11444944888353348, |
| "loss_total": 13.484781265258789, |
| "lr": 0.00019350968967987104, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 80, |
| "tokens_trained": 0.26212192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.025530104247925678, |
| "grad_norm": 376.66900634765625, |
| "loss": 10.4163, |
| "loss_ce": 5.837803363800049, |
| "loss_region": 0.11401050537824631, |
| "loss_total": 5.951813697814941, |
| "lr": 0.0002180045871077028, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 90, |
| "tokens_trained": 0.29488736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.028366782497695198, |
| "grad_norm": 294.97216796875, |
| "loss": 8.8793, |
| "loss_ce": 6.233154773712158, |
| "loss_region": 0.11450602859258652, |
| "loss_total": 6.347661018371582, |
| "lr": 0.00024249948453553463, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 100, |
| "tokens_trained": 0.3276528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.031203460747464717, |
| "grad_norm": 497.3160095214844, |
| "loss": 11.7289, |
| "loss_ce": 20.51309585571289, |
| "loss_region": 0.11433509737253189, |
| "loss_total": 20.627431869506836, |
| "lr": 0.00026699438196336637, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 110, |
| "tokens_trained": 0.36041744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03404013899723424, |
| "grad_norm": 493.2754821777344, |
| "loss": 20.8633, |
| "loss_ce": 18.29560661315918, |
| "loss_region": 0.10929017513990402, |
| "loss_total": 18.404897689819336, |
| "lr": 0.00029148927939119814, |
| "router/selected_tokens_s0": 1.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 120, |
| "tokens_trained": 0.39318128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03687681724700376, |
| "grad_norm": 451.9964599609375, |
| "loss": 15.6879, |
| "loss_ce": 20.399511337280273, |
| "loss_region": 0.1114715188741684, |
| "loss_total": 20.510982513427734, |
| "lr": 0.00031598417681902996, |
| "router/selected_tokens_s0": 2386.25, |
| "router/selected_tokens_s1": 1.0, |
| "step": 130, |
| "tokens_trained": 0.42594672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03971349549677328, |
| "grad_norm": 207.16168212890625, |
| "loss": 7.2589, |
| "loss_ce": 6.26461124420166, |
| "loss_region": 0.11115273833274841, |
| "loss_total": 6.375763893127441, |
| "lr": 0.00034047907424686173, |
| "router/selected_tokens_s0": 2255.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 140, |
| "tokens_trained": 0.458709112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.042550173746542796, |
| "grad_norm": 216.46034240722656, |
| "loss": 9.9827, |
| "loss_ce": 2.39017653465271, |
| "loss_region": 0.1067117229104042, |
| "loss_total": 2.4968881607055664, |
| "lr": 0.0003649739716746935, |
| "router/selected_tokens_s0": 3026.125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 150, |
| "tokens_trained": 0.491469992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.045386851996312316, |
| "grad_norm": 278.6517028808594, |
| "loss": 8.6499, |
| "loss_ce": 5.831767559051514, |
| "loss_region": 0.10348913073539734, |
| "loss_total": 5.935256481170654, |
| "lr": 0.00038946886910252526, |
| "router/selected_tokens_s0": 4061.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 160, |
| "tokens_trained": 0.524234632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.048223530246081836, |
| "grad_norm": 270.7173156738281, |
| "loss": 6.2812, |
| "loss_ce": 9.848775863647461, |
| "loss_region": 0.11538821458816528, |
| "loss_total": 9.964163780212402, |
| "lr": 0.0004139637665303571, |
| "router/selected_tokens_s0": 1436.0, |
| "router/selected_tokens_s1": 1.0, |
| "step": 170, |
| "tokens_trained": 0.556999272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.051060208495851356, |
| "grad_norm": 225.5234375, |
| "loss": 7.4079, |
| "loss_ce": 4.911764144897461, |
| "loss_region": 0.11397740244865417, |
| "loss_total": 5.0257415771484375, |
| "lr": 0.0004384586639581888, |
| "router/selected_tokens_s0": 1610.375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 180, |
| "tokens_trained": 0.589762952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.053896886745620876, |
| "grad_norm": 236.20843505859375, |
| "loss": 7.1161, |
| "loss_ce": 8.928335189819336, |
| "loss_region": 0.11002947390079498, |
| "loss_total": 9.03836441040039, |
| "lr": 0.0004629535613860206, |
| "router/selected_tokens_s0": 1923.375, |
| "router/selected_tokens_s1": 1.0, |
| "step": 190, |
| "tokens_trained": 0.622527592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.056733564995390395, |
| "grad_norm": 108.58197021484375, |
| "loss": 5.231, |
| "loss_ce": 5.8081512451171875, |
| "loss_region": 0.11359135806560516, |
| "loss_total": 5.9217424392700195, |
| "lr": 0.00048744845881385244, |
| "router/selected_tokens_s0": 1059.625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 200, |
| "tokens_trained": 0.655293032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.059570243245159915, |
| "grad_norm": 195.470458984375, |
| "loss": 7.8328, |
| "loss_ce": 10.872328758239746, |
| "loss_region": 0.10074600577354431, |
| "loss_total": 10.973074913024902, |
| "lr": 0.0005119433562416841, |
| "router/selected_tokens_s0": 2660.875, |
| "router/selected_tokens_s1": 1.0, |
| "step": 210, |
| "tokens_trained": 0.688057672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.062406921494929435, |
| "grad_norm": 141.9326629638672, |
| "loss": 5.994, |
| "loss_ce": 4.301753044128418, |
| "loss_region": 0.09836956113576889, |
| "loss_total": 4.40012264251709, |
| "lr": 0.0005364382536695159, |
| "router/selected_tokens_s0": 2709.625, |
| "router/selected_tokens_s1": 1.0, |
| "step": 220, |
| "tokens_trained": 0.720823112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.06524359974469895, |
| "grad_norm": 128.35894775390625, |
| "loss": 3.2054, |
| "loss_ce": 3.196018695831299, |
| "loss_region": 0.09772488474845886, |
| "loss_total": 3.29374361038208, |
| "lr": 0.0005609331510973477, |
| "router/selected_tokens_s0": 1876.125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 230, |
| "tokens_trained": 0.753588552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.06808027799446847, |
| "grad_norm": 76.50245666503906, |
| "loss": 3.7556, |
| "loss_ce": 1.953817367553711, |
| "loss_region": 0.08583591133356094, |
| "loss_total": 2.0396533012390137, |
| "lr": 0.0005854280485251795, |
| "router/selected_tokens_s0": 4237.5, |
| "router/selected_tokens_s1": 1.0, |
| "step": 240, |
| "tokens_trained": 0.786353992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.070916956244238, |
| "grad_norm": 98.34822082519531, |
| "loss": 3.5987, |
| "loss_ce": 2.239224433898926, |
| "loss_region": 0.08410990983247757, |
| "loss_total": 2.3233344554901123, |
| "lr": 0.0006099229459530113, |
| "router/selected_tokens_s0": 2309.125, |
| "router/selected_tokens_s1": 1.0, |
| "step": 250, |
| "tokens_trained": 0.819119432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07375363449400751, |
| "grad_norm": 127.2884292602539, |
| "loss": 5.4797, |
| "loss_ce": 6.326235294342041, |
| "loss_region": 0.06658253818750381, |
| "loss_total": 6.392817974090576, |
| "lr": 0.0006344178433808431, |
| "router/selected_tokens_s0": 6774.375, |
| "router/selected_tokens_s1": 6774.375, |
| "step": 260, |
| "tokens_trained": 0.851884072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07659031274377703, |
| "grad_norm": 91.72901916503906, |
| "loss": 3.183, |
| "loss_ce": 2.804473876953125, |
| "loss_region": 0.06812562048435211, |
| "loss_total": 2.8725996017456055, |
| "lr": 0.0006589127408086749, |
| "router/selected_tokens_s0": 5535.25, |
| "router/selected_tokens_s1": 5535.25, |
| "step": 270, |
| "tokens_trained": 0.884649512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07942699099354655, |
| "grad_norm": 80.58329772949219, |
| "loss": 2.5646, |
| "loss_ce": 2.606522798538208, |
| "loss_region": 0.06925090402364731, |
| "loss_total": 2.6757736206054688, |
| "lr": 0.0006834076382365066, |
| "router/selected_tokens_s0": 3777.75, |
| "router/selected_tokens_s1": 3777.75, |
| "step": 280, |
| "tokens_trained": 0.917414936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08226366924331607, |
| "grad_norm": 85.70040130615234, |
| "loss": 3.1077, |
| "loss_ce": 2.233630895614624, |
| "loss_region": 0.06535117328166962, |
| "loss_total": 2.2989821434020996, |
| "lr": 0.0007079025356643384, |
| "router/selected_tokens_s0": 3477.125, |
| "router/selected_tokens_s1": 3193.375, |
| "step": 290, |
| "tokens_trained": 0.950180376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08510034749308559, |
| "grad_norm": 102.1810073852539, |
| "loss": 3.1872, |
| "loss_ce": 2.971275568008423, |
| "loss_region": 0.06759099662303925, |
| "loss_total": 3.0388665199279785, |
| "lr": 0.0007323974330921702, |
| "router/selected_tokens_s0": 3964.5, |
| "router/selected_tokens_s1": 3964.5, |
| "step": 300, |
| "tokens_trained": 0.982945816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08793702574285511, |
| "grad_norm": 67.37890625, |
| "loss": 3.2037, |
| "loss_ce": 2.0559582710266113, |
| "loss_region": 0.06636148691177368, |
| "loss_total": 2.1223196983337402, |
| "lr": 0.000756892330520002, |
| "router/selected_tokens_s0": 5813.875, |
| "router/selected_tokens_s1": 5813.875, |
| "step": 310, |
| "tokens_trained": 1.015711256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09077370399262463, |
| "grad_norm": 61.368873596191406, |
| "loss": 3.5341, |
| "loss_ce": 3.0156445503234863, |
| "loss_region": 0.06766507029533386, |
| "loss_total": 3.0833096504211426, |
| "lr": 0.0007813872279478337, |
| "router/selected_tokens_s0": 4320.5, |
| "router/selected_tokens_s1": 4320.5, |
| "step": 320, |
| "tokens_trained": 1.048476696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09361038224239415, |
| "grad_norm": 54.85828399658203, |
| "loss": 3.4888, |
| "loss_ce": 3.9015138149261475, |
| "loss_region": 0.05761323869228363, |
| "loss_total": 3.9591269493103027, |
| "lr": 0.0008058821253756655, |
| "router/selected_tokens_s0": 8438.375, |
| "router/selected_tokens_s1": 7710.25, |
| "step": 330, |
| "tokens_trained": 1.081242136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09644706049216367, |
| "grad_norm": 86.64398193359375, |
| "loss": 2.7011, |
| "loss_ce": 3.5396924018859863, |
| "loss_region": 0.0677424967288971, |
| "loss_total": 3.6074349880218506, |
| "lr": 0.0008303770228034974, |
| "router/selected_tokens_s0": 9649.25, |
| "router/selected_tokens_s1": 5.875, |
| "step": 340, |
| "tokens_trained": 1.114007576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09928373874193319, |
| "grad_norm": 87.52863311767578, |
| "loss": 3.3207, |
| "loss_ce": 4.275536060333252, |
| "loss_region": 0.07108451426029205, |
| "loss_total": 4.346620559692383, |
| "lr": 0.0008548719202313291, |
| "router/selected_tokens_s0": 5297.875, |
| "router/selected_tokens_s1": 1.5, |
| "step": 350, |
| "tokens_trained": 1.146773016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10212041699170271, |
| "grad_norm": 59.0909423828125, |
| "loss": 2.8189, |
| "loss_ce": 2.441368341445923, |
| "loss_region": 0.06068379059433937, |
| "loss_total": 2.502052068710327, |
| "lr": 0.0008793668176591608, |
| "router/selected_tokens_s0": 5374.625, |
| "router/selected_tokens_s1": 4637.25, |
| "step": 360, |
| "tokens_trained": 1.179538456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10495709524147223, |
| "grad_norm": 60.50209045410156, |
| "loss": 2.0942, |
| "loss_ce": 2.445530414581299, |
| "loss_region": 0.06311789900064468, |
| "loss_total": 2.50864839553833, |
| "lr": 0.0009038617150869926, |
| "router/selected_tokens_s0": 7771.875, |
| "router/selected_tokens_s1": 2119.625, |
| "step": 370, |
| "tokens_trained": 1.212303896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10779377349124175, |
| "grad_norm": 63.17313003540039, |
| "loss": 2.5238, |
| "loss_ce": 2.970167398452759, |
| "loss_region": 0.06014455109834671, |
| "loss_total": 3.0303120613098145, |
| "lr": 0.0009283566125148244, |
| "router/selected_tokens_s0": 6445.875, |
| "router/selected_tokens_s1": 6445.875, |
| "step": 380, |
| "tokens_trained": 1.245068536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11063045174101127, |
| "grad_norm": 79.03811645507812, |
| "loss": 2.8523, |
| "loss_ce": 2.7662765979766846, |
| "loss_region": 0.06919371336698532, |
| "loss_total": 2.835470199584961, |
| "lr": 0.0009528515099426562, |
| "router/selected_tokens_s0": 3670.875, |
| "router/selected_tokens_s1": 690.625, |
| "step": 390, |
| "tokens_trained": 1.277833176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11346712999078079, |
| "grad_norm": 39.57363510131836, |
| "loss": 2.0454, |
| "loss_ce": 1.830534815788269, |
| "loss_region": 0.06083514541387558, |
| "loss_total": 1.8913699388504028, |
| "lr": 0.000977346407370488, |
| "router/selected_tokens_s0": 9311.375, |
| "router/selected_tokens_s1": 3812.875, |
| "step": 400, |
| "tokens_trained": 1.310598616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11630380824055031, |
| "grad_norm": 79.65859985351562, |
| "loss": 2.3664, |
| "loss_ce": 2.813112735748291, |
| "loss_region": 0.06279323250055313, |
| "loss_total": 2.875905990600586, |
| "lr": 0.0010018413047983197, |
| "router/selected_tokens_s0": 6855.25, |
| "router/selected_tokens_s1": 2318.875, |
| "step": 410, |
| "tokens_trained": 1.343364056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11914048649031983, |
| "grad_norm": 71.34756469726562, |
| "loss": 2.9128, |
| "loss_ce": 3.0529584884643555, |
| "loss_region": 0.06804051250219345, |
| "loss_total": 3.1209990978240967, |
| "lr": 0.0010263362022261515, |
| "router/selected_tokens_s0": 2438.25, |
| "router/selected_tokens_s1": 1069.0, |
| "step": 420, |
| "tokens_trained": 1.376129496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.12197716474008935, |
| "grad_norm": 69.63412475585938, |
| "loss": 2.6515, |
| "loss_ce": 2.2142796516418457, |
| "loss_region": 0.056970782577991486, |
| "loss_total": 2.2712504863739014, |
| "lr": 0.0010508310996539833, |
| "router/selected_tokens_s0": 11183.125, |
| "router/selected_tokens_s1": 11031.5, |
| "step": 430, |
| "tokens_trained": 1.408889864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.12481384298985887, |
| "grad_norm": 45.879173278808594, |
| "loss": 2.0195, |
| "loss_ce": 1.895429015159607, |
| "loss_region": 0.06761395186185837, |
| "loss_total": 1.963042974472046, |
| "lr": 0.0010753259970818151, |
| "router/selected_tokens_s0": 1293.875, |
| "router/selected_tokens_s1": 873.25, |
| "step": 440, |
| "tokens_trained": 1.441655304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1276505212396284, |
| "grad_norm": 35.60654067993164, |
| "loss": 1.8072, |
| "loss_ce": 1.6901588439941406, |
| "loss_region": 0.061801426112651825, |
| "loss_total": 1.751960277557373, |
| "lr": 0.001099820894509647, |
| "router/selected_tokens_s0": 6973.125, |
| "router/selected_tokens_s1": 2913.125, |
| "step": 450, |
| "tokens_trained": 1.474420744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1304871994893979, |
| "grad_norm": 49.47394943237305, |
| "loss": 1.8686, |
| "loss_ce": 2.0007646083831787, |
| "loss_region": 0.06083563342690468, |
| "loss_total": 2.0616002082824707, |
| "lr": 0.0011243157919374788, |
| "router/selected_tokens_s0": 8894.125, |
| "router/selected_tokens_s1": 3425.875, |
| "step": 460, |
| "tokens_trained": 1.507186184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13332387773916743, |
| "grad_norm": 36.48300552368164, |
| "loss": 1.8564, |
| "loss_ce": 1.7348560094833374, |
| "loss_region": 0.06224008649587631, |
| "loss_total": 1.7970961332321167, |
| "lr": 0.0011488106893653104, |
| "router/selected_tokens_s0": 4828.125, |
| "router/selected_tokens_s1": 2957.625, |
| "step": 470, |
| "tokens_trained": 1.539950832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13616055598893695, |
| "grad_norm": 59.48313903808594, |
| "loss": 2.1381, |
| "loss_ce": 2.6884067058563232, |
| "loss_region": 0.05959445983171463, |
| "loss_total": 2.7480010986328125, |
| "lr": 0.0011733055867931422, |
| "router/selected_tokens_s0": 8242.5, |
| "router/selected_tokens_s1": 4600.25, |
| "step": 480, |
| "tokens_trained": 1.572715472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13899723423870647, |
| "grad_norm": 13.365839004516602, |
| "loss": 1.9219, |
| "loss_ce": 1.376577377319336, |
| "loss_region": 0.06748919934034348, |
| "loss_total": 1.4440665245056152, |
| "lr": 0.001197800484220974, |
| "router/selected_tokens_s0": 1833.25, |
| "router/selected_tokens_s1": 820.125, |
| "step": 490, |
| "tokens_trained": 1.605480912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.141833912488476, |
| "grad_norm": 35.323246002197266, |
| "loss": 1.8414, |
| "loss_ce": 2.028839111328125, |
| "loss_region": 0.06109331175684929, |
| "loss_total": 2.089932441711426, |
| "lr": 0.0012222953816488059, |
| "router/selected_tokens_s0": 10474.625, |
| "router/selected_tokens_s1": 3541.375, |
| "step": 500, |
| "tokens_trained": 1.638244216 |
| }, |
| { |
| "epoch": 0.141833912488476, |
| "eval_ppl": 5.586714635791497, |
| "eval_runtime": 1.0612, |
| "step": 500, |
| "tokens_trained": 1.638244216 |
| }, |
| { |
| "epoch": 0.141833912488476, |
| "eval_F": 0.1523211448811105, |
| "eval_F_cds": 0.1718298886393841, |
| "eval_F_dig": 0.16399718728025628, |
| "eval_F_exon": 0.1541913028892523, |
| "eval_F_intron": 0.15165489305779373, |
| "eval_F_nig": 0.15221594667480792, |
| "eval_F_promoter": 0.1501688968516134, |
| "eval_F_utr": 0.15336648715504136, |
| "eval_G": 0.372060118832526, |
| "eval_G_cds": 0.3656581890724531, |
| "eval_G_dig": 0.3759247548636612, |
| "eval_G_exon": 0.37130833411959574, |
| "eval_G_intron": 0.3726392825038316, |
| "eval_G_nig": 0.3703948690443253, |
| "eval_G_promoter": 0.37331512304800496, |
| "eval_G_utr": 0.37175619902922, |
| "eval_avg_bp_per_token": 6.565076705407635, |
| "eval_bp_per_token/cds": 5.819709294572609, |
| "eval_bp_per_token/dig": 6.097665555026203, |
| "eval_bp_per_token/exon": 6.485450095186293, |
| "eval_bp_per_token/intron": 6.593918467364669, |
| "eval_bp_per_token/nig": 6.5696139060671905, |
| "eval_bp_per_token/promoter": 6.6591685826135585, |
| "eval_bp_per_token/utr": 6.520329301075269, |
| "eval_ppl_cds": 6.264007437664035, |
| "eval_ppl_dig": 5.376989468471789, |
| "eval_ppl_exon": 5.6381595387903625, |
| "eval_ppl_intron": 5.4896203277448326, |
| "eval_ppl_nig": 5.436721243945483, |
| "eval_ppl_promoter": 6.014557891578177, |
| "eval_ppl_utr": 5.561276789916418, |
| "step": 500, |
| "tokens_trained": 1.638244216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1446705907382455, |
| "grad_norm": 24.587987899780273, |
| "loss": 2.1546, |
| "loss_ce": 1.7202653884887695, |
| "loss_region": 0.06705278903245926, |
| "loss_total": 1.787318229675293, |
| "lr": 0.0012243786686061229, |
| "router/selected_tokens_s0": 2551.0, |
| "router/selected_tokens_s1": 1029.625, |
| "step": 510, |
| "tokens_trained": 1.671005424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.14750726898801503, |
| "grad_norm": 31.121719360351562, |
| "loss": 1.9491, |
| "loss_ce": 1.5548391342163086, |
| "loss_region": 0.06364650279283524, |
| "loss_total": 1.618485689163208, |
| "lr": 0.0012239717766222718, |
| "router/selected_tokens_s0": 11385.75, |
| "router/selected_tokens_s1": 1565.75, |
| "step": 520, |
| "tokens_trained": 1.703770864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15034394723778455, |
| "grad_norm": 36.0692024230957, |
| "loss": 1.6872, |
| "loss_ce": 1.6581867933273315, |
| "loss_region": 0.05713363736867905, |
| "loss_total": 1.7153204679489136, |
| "lr": 0.001223564884638421, |
| "router/selected_tokens_s0": 8757.75, |
| "router/selected_tokens_s1": 8757.75, |
| "step": 530, |
| "tokens_trained": 1.736536304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15318062548755407, |
| "grad_norm": 24.032297134399414, |
| "loss": 1.6695, |
| "loss_ce": 1.7199375629425049, |
| "loss_region": 0.061875585466623306, |
| "loss_total": 1.781813144683838, |
| "lr": 0.00122315799265457, |
| "router/selected_tokens_s0": 4319.5, |
| "router/selected_tokens_s1": 3071.25, |
| "step": 540, |
| "tokens_trained": 1.769301744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1560173037373236, |
| "grad_norm": 29.07722282409668, |
| "loss": 1.7629, |
| "loss_ce": 1.444469928741455, |
| "loss_region": 0.06348070502281189, |
| "loss_total": 1.5079506635665894, |
| "lr": 0.001222751100670719, |
| "router/selected_tokens_s0": 9708.375, |
| "router/selected_tokens_s1": 1077.875, |
| "step": 550, |
| "tokens_trained": 1.802067184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1588539819870931, |
| "grad_norm": 46.87091064453125, |
| "loss": 1.6825, |
| "loss_ce": 1.963318943977356, |
| "loss_region": 0.06386174261569977, |
| "loss_total": 2.0271806716918945, |
| "lr": 0.001222344208686868, |
| "router/selected_tokens_s0": 5445.5, |
| "router/selected_tokens_s1": 1900.75, |
| "step": 560, |
| "tokens_trained": 1.834832624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16169066023686263, |
| "grad_norm": 32.49260330200195, |
| "loss": 1.6009, |
| "loss_ce": 1.5414655208587646, |
| "loss_region": 0.06434743851423264, |
| "loss_total": 1.605812907218933, |
| "lr": 0.0012219373167030169, |
| "router/selected_tokens_s0": 8551.125, |
| "router/selected_tokens_s1": 655.75, |
| "step": 570, |
| "tokens_trained": 1.867598064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16452733848663215, |
| "grad_norm": 16.306018829345703, |
| "loss": 1.4792, |
| "loss_ce": 1.3217185735702515, |
| "loss_region": 0.06380423158407211, |
| "loss_total": 1.3855228424072266, |
| "lr": 0.0012215304247191658, |
| "router/selected_tokens_s0": 7361.875, |
| "router/selected_tokens_s1": 1414.875, |
| "step": 580, |
| "tokens_trained": 1.900363504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16736401673640167, |
| "grad_norm": 11.52273178100586, |
| "loss": 1.444, |
| "loss_ce": 1.2657556533813477, |
| "loss_region": 0.05732131749391556, |
| "loss_total": 1.3230769634246826, |
| "lr": 0.0012211235327353148, |
| "router/selected_tokens_s0": 7275.0, |
| "router/selected_tokens_s1": 6492.625, |
| "step": 590, |
| "tokens_trained": 1.933128944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17020069498617119, |
| "grad_norm": 9.447604179382324, |
| "loss": 1.4029, |
| "loss_ce": 1.2989217042922974, |
| "loss_region": 0.05687510594725609, |
| "loss_total": 1.3557968139648438, |
| "lr": 0.0012207166407514638, |
| "router/selected_tokens_s0": 8629.875, |
| "router/selected_tokens_s1": 7220.0, |
| "step": 600, |
| "tokens_trained": 1.96589048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1730373732359407, |
| "grad_norm": 15.3342924118042, |
| "loss": 1.5934, |
| "loss_ce": 1.3760124444961548, |
| "loss_region": 0.05879288166761398, |
| "loss_total": 1.4348052740097046, |
| "lr": 0.0012203097487676127, |
| "router/selected_tokens_s0": 12745.375, |
| "router/selected_tokens_s1": 8453.75, |
| "step": 610, |
| "tokens_trained": 1.99865592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17587405148571023, |
| "grad_norm": 13.668978691101074, |
| "loss": 1.4177, |
| "loss_ce": 1.3025627136230469, |
| "loss_region": 0.05752723664045334, |
| "loss_total": 1.360089898109436, |
| "lr": 0.0012199028567837617, |
| "router/selected_tokens_s0": 6367.875, |
| "router/selected_tokens_s1": 6018.25, |
| "step": 620, |
| "tokens_trained": 2.03142136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17871072973547975, |
| "grad_norm": 10.949312210083008, |
| "loss": 1.3518, |
| "loss_ce": 1.2872226238250732, |
| "loss_region": 0.05511040613055229, |
| "loss_total": 1.3423330783843994, |
| "lr": 0.0012194959647999107, |
| "router/selected_tokens_s0": 10267.0, |
| "router/selected_tokens_s1": 9664.25, |
| "step": 630, |
| "tokens_trained": 2.0641868 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18154740798524927, |
| "grad_norm": 37.183555603027344, |
| "loss": 1.3917, |
| "loss_ce": 1.5382307767868042, |
| "loss_region": 0.062242139130830765, |
| "loss_total": 1.6004729270935059, |
| "lr": 0.0012190890728160596, |
| "router/selected_tokens_s0": 4478.0, |
| "router/selected_tokens_s1": 2642.125, |
| "step": 640, |
| "tokens_trained": 2.09695224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18438408623501878, |
| "grad_norm": 49.40961837768555, |
| "loss": 1.5734, |
| "loss_ce": 1.797605037689209, |
| "loss_region": 0.06264789402484894, |
| "loss_total": 1.8602529764175415, |
| "lr": 0.0012186821808322086, |
| "router/selected_tokens_s0": 9251.875, |
| "router/selected_tokens_s1": 1637.625, |
| "step": 650, |
| "tokens_trained": 2.12971768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1872207644847883, |
| "grad_norm": 24.992237091064453, |
| "loss": 1.5306, |
| "loss_ce": 1.3852466344833374, |
| "loss_region": 0.06734807044267654, |
| "loss_total": 1.4525947570800781, |
| "lr": 0.0012182752888483576, |
| "router/selected_tokens_s0": 5227.5, |
| "router/selected_tokens_s1": 45.625, |
| "step": 660, |
| "tokens_trained": 2.16248312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19005744273455782, |
| "grad_norm": 13.878795623779297, |
| "loss": 1.4926, |
| "loss_ce": 1.2988413572311401, |
| "loss_region": 0.06691248714923859, |
| "loss_total": 1.3657538890838623, |
| "lr": 0.0012178683968645065, |
| "router/selected_tokens_s0": 5859.75, |
| "router/selected_tokens_s1": 19.75, |
| "step": 670, |
| "tokens_trained": 2.19524856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19289412098432734, |
| "grad_norm": 32.076271057128906, |
| "loss": 1.4449, |
| "loss_ce": 1.4827113151550293, |
| "loss_region": 0.06534749269485474, |
| "loss_total": 1.5480587482452393, |
| "lr": 0.0012174615048806555, |
| "router/selected_tokens_s0": 4726.125, |
| "router/selected_tokens_s1": 1088.125, |
| "step": 680, |
| "tokens_trained": 2.228014 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19573079923409686, |
| "grad_norm": 10.969571113586426, |
| "loss": 1.3951, |
| "loss_ce": 1.2787609100341797, |
| "loss_region": 0.05903833732008934, |
| "loss_total": 1.3377991914749146, |
| "lr": 0.0012170546128968045, |
| "router/selected_tokens_s0": 9336.625, |
| "router/selected_tokens_s1": 5102.375, |
| "step": 690, |
| "tokens_trained": 2.26077944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19856747748386638, |
| "grad_norm": 17.22534942626953, |
| "loss": 1.4386, |
| "loss_ce": 1.2547720670700073, |
| "loss_region": 0.060146868228912354, |
| "loss_total": 1.3149189949035645, |
| "lr": 0.0012166477209129534, |
| "router/selected_tokens_s0": 9219.125, |
| "router/selected_tokens_s1": 4007.875, |
| "step": 700, |
| "tokens_trained": 2.29354488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2014041557336359, |
| "grad_norm": 43.11760711669922, |
| "loss": 1.4423, |
| "loss_ce": 1.7917805910110474, |
| "loss_region": 0.05554146692156792, |
| "loss_total": 1.8473221063613892, |
| "lr": 0.0012162408289291026, |
| "router/selected_tokens_s0": 8606.125, |
| "router/selected_tokens_s1": 8472.0, |
| "step": 710, |
| "tokens_trained": 2.32631032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20424083398340542, |
| "grad_norm": 31.703508377075195, |
| "loss": 1.5672, |
| "loss_ce": 1.4175491333007812, |
| "loss_region": 0.05690694972872734, |
| "loss_total": 1.4744560718536377, |
| "lr": 0.0012158339369452516, |
| "router/selected_tokens_s0": 9807.125, |
| "router/selected_tokens_s1": 7348.125, |
| "step": 720, |
| "tokens_trained": 2.35907576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20707751223317494, |
| "grad_norm": 16.408870697021484, |
| "loss": 1.3753, |
| "loss_ce": 1.2296346426010132, |
| "loss_region": 0.062433354556560516, |
| "loss_total": 1.2920680046081543, |
| "lr": 0.0012154270449614005, |
| "router/selected_tokens_s0": 7699.0, |
| "router/selected_tokens_s1": 2266.625, |
| "step": 730, |
| "tokens_trained": 2.3918396 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20991419048294446, |
| "grad_norm": 8.016473770141602, |
| "loss": 1.3183, |
| "loss_ce": 1.194352388381958, |
| "loss_region": 0.05566234141588211, |
| "loss_total": 1.2500147819519043, |
| "lr": 0.0012150201529775495, |
| "router/selected_tokens_s0": 8457.5, |
| "router/selected_tokens_s1": 8438.25, |
| "step": 740, |
| "tokens_trained": 2.424600048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.21275086873271398, |
| "grad_norm": 8.739754676818848, |
| "loss": 1.295, |
| "loss_ce": 1.2655029296875, |
| "loss_region": 0.06317088007926941, |
| "loss_total": 1.3286738395690918, |
| "lr": 0.0012146132609936982, |
| "router/selected_tokens_s0": 8715.875, |
| "router/selected_tokens_s1": 1289.125, |
| "step": 750, |
| "tokens_trained": 2.457364688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2155875469824835, |
| "grad_norm": 18.954246520996094, |
| "loss": 1.3007, |
| "loss_ce": 1.2969638109207153, |
| "loss_region": 0.0547361820936203, |
| "loss_total": 1.351699948310852, |
| "lr": 0.0012142063690098472, |
| "router/selected_tokens_s0": 9943.625, |
| "router/selected_tokens_s1": 9932.125, |
| "step": 760, |
| "tokens_trained": 2.490130128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.21842422523225302, |
| "grad_norm": 6.552379608154297, |
| "loss": 1.3183, |
| "loss_ce": 1.2186139822006226, |
| "loss_region": 0.05473633110523224, |
| "loss_total": 1.2733503580093384, |
| "lr": 0.0012137994770259962, |
| "router/selected_tokens_s0": 9790.875, |
| "router/selected_tokens_s1": 9770.5, |
| "step": 770, |
| "tokens_trained": 2.522895568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22126090348202254, |
| "grad_norm": 26.2993106842041, |
| "loss": 1.3727, |
| "loss_ce": 1.42295503616333, |
| "loss_region": 0.06471094489097595, |
| "loss_total": 1.4876660108566284, |
| "lr": 0.0012133925850421454, |
| "router/selected_tokens_s0": 5200.125, |
| "router/selected_tokens_s1": 1332.0, |
| "step": 780, |
| "tokens_trained": 2.555659392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22409758173179206, |
| "grad_norm": 11.26626205444336, |
| "loss": 1.407, |
| "loss_ce": 1.2448495626449585, |
| "loss_region": 0.05585968494415283, |
| "loss_total": 1.3007092475891113, |
| "lr": 0.0012129856930582943, |
| "router/selected_tokens_s0": 9143.375, |
| "router/selected_tokens_s1": 8323.125, |
| "step": 790, |
| "tokens_trained": 2.588422136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22693425998156158, |
| "grad_norm": 7.997785568237305, |
| "loss": 1.3067, |
| "loss_ce": 1.2697006464004517, |
| "loss_region": 0.06230790168046951, |
| "loss_total": 1.3320086002349854, |
| "lr": 0.0012125788010744433, |
| "router/selected_tokens_s0": 7774.375, |
| "router/selected_tokens_s1": 2329.625, |
| "step": 800, |
| "tokens_trained": 2.621187576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2297709382313311, |
| "grad_norm": 6.670467376708984, |
| "loss": 1.2878, |
| "loss_ce": 1.2548019886016846, |
| "loss_region": 0.061417948454618454, |
| "loss_total": 1.3162199258804321, |
| "lr": 0.0012121719090905923, |
| "router/selected_tokens_s0": 9487.75, |
| "router/selected_tokens_s1": 2631.625, |
| "step": 810, |
| "tokens_trained": 2.653953016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23260761648110062, |
| "grad_norm": 16.567520141601562, |
| "loss": 1.3082, |
| "loss_ce": 1.3074324131011963, |
| "loss_region": 0.05492587015032768, |
| "loss_total": 1.3623583316802979, |
| "lr": 0.0012117650171067412, |
| "router/selected_tokens_s0": 9648.0, |
| "router/selected_tokens_s1": 9643.5, |
| "step": 820, |
| "tokens_trained": 2.686718456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23544429473087014, |
| "grad_norm": 3.946514844894409, |
| "loss": 1.2958, |
| "loss_ce": 1.1369494199752808, |
| "loss_region": 0.06178716570138931, |
| "loss_total": 1.198736548423767, |
| "lr": 0.0012113581251228902, |
| "router/selected_tokens_s0": 9761.0, |
| "router/selected_tokens_s1": 2224.75, |
| "step": 830, |
| "tokens_trained": 2.71948036 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23828097298063966, |
| "grad_norm": 9.472439765930176, |
| "loss": 1.3045, |
| "loss_ce": 1.3183592557907104, |
| "loss_region": 0.06152363494038582, |
| "loss_total": 1.3798829317092896, |
| "lr": 0.0012109512331390391, |
| "router/selected_tokens_s0": 10037.5, |
| "router/selected_tokens_s1": 2320.0, |
| "step": 840, |
| "tokens_trained": 2.7522458 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24111765123040918, |
| "grad_norm": 11.188100814819336, |
| "loss": 1.2803, |
| "loss_ce": 1.2120747566223145, |
| "loss_region": 0.05707934871315956, |
| "loss_total": 1.2691540718078613, |
| "lr": 0.0012105443411551881, |
| "router/selected_tokens_s0": 7336.75, |
| "router/selected_tokens_s1": 6639.875, |
| "step": 850, |
| "tokens_trained": 2.78501124 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2439543294801787, |
| "grad_norm": 3.799870014190674, |
| "loss": 1.2797, |
| "loss_ce": 1.1893348693847656, |
| "loss_region": 0.06335902214050293, |
| "loss_total": 1.2526938915252686, |
| "lr": 0.001210137449171337, |
| "router/selected_tokens_s0": 6500.25, |
| "router/selected_tokens_s1": 1867.125, |
| "step": 860, |
| "tokens_trained": 2.81777668 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24679100772994822, |
| "grad_norm": 5.527514457702637, |
| "loss": 1.2928, |
| "loss_ce": 1.2499254941940308, |
| "loss_region": 0.062063559889793396, |
| "loss_total": 1.3119890689849854, |
| "lr": 0.001209730557187486, |
| "router/selected_tokens_s0": 6788.875, |
| "router/selected_tokens_s1": 2726.0, |
| "step": 870, |
| "tokens_trained": 2.85054212 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24962768597971774, |
| "grad_norm": 12.200733184814453, |
| "loss": 1.3112, |
| "loss_ce": 1.310202717781067, |
| "loss_region": 0.06234995648264885, |
| "loss_total": 1.3725526332855225, |
| "lr": 0.001209323665203635, |
| "router/selected_tokens_s0": 7554.625, |
| "router/selected_tokens_s1": 2368.0, |
| "step": 880, |
| "tokens_trained": 2.88330756 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.25246436422948726, |
| "grad_norm": 12.507109642028809, |
| "loss": 1.3048, |
| "loss_ce": 1.2949899435043335, |
| "loss_region": 0.05900288373231888, |
| "loss_total": 1.3539928197860718, |
| "lr": 0.001208916773219784, |
| "router/selected_tokens_s0": 8159.375, |
| "router/selected_tokens_s1": 5136.375, |
| "step": 890, |
| "tokens_trained": 2.916073 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2553010424792568, |
| "grad_norm": 8.910531997680664, |
| "loss": 1.3012, |
| "loss_ce": 1.2440317869186401, |
| "loss_region": 0.06203978508710861, |
| "loss_total": 1.3060715198516846, |
| "lr": 0.001208509881235933, |
| "router/selected_tokens_s0": 7256.375, |
| "router/selected_tokens_s1": 2613.75, |
| "step": 900, |
| "tokens_trained": 2.94883828 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2581377207290263, |
| "grad_norm": 13.657557487487793, |
| "loss": 1.2896, |
| "loss_ce": 1.2510647773742676, |
| "loss_region": 0.05596454069018364, |
| "loss_total": 1.307029366493225, |
| "lr": 0.001208102989252082, |
| "router/selected_tokens_s0": 8780.875, |
| "router/selected_tokens_s1": 8493.25, |
| "step": 910, |
| "tokens_trained": 2.981597288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2609743989787958, |
| "grad_norm": 11.587185859680176, |
| "loss": 1.3346, |
| "loss_ce": 1.319909930229187, |
| "loss_region": 0.05452951416373253, |
| "loss_total": 1.3744394779205322, |
| "lr": 0.0012076960972682309, |
| "router/selected_tokens_s0": 12476.125, |
| "router/selected_tokens_s1": 11398.75, |
| "step": 920, |
| "tokens_trained": 3.014362456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26381107722856534, |
| "grad_norm": 6.298766136169434, |
| "loss": 1.28, |
| "loss_ce": 1.1877198219299316, |
| "loss_region": 0.05859007313847542, |
| "loss_total": 1.2463098764419556, |
| "lr": 0.0012072892052843798, |
| "router/selected_tokens_s0": 8980.625, |
| "router/selected_tokens_s1": 5598.875, |
| "step": 930, |
| "tokens_trained": 3.047127096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26664775547833486, |
| "grad_norm": 10.869803428649902, |
| "loss": 1.3065, |
| "loss_ce": 1.2654238939285278, |
| "loss_region": 0.057959433645009995, |
| "loss_total": 1.3233833312988281, |
| "lr": 0.0012068823133005288, |
| "router/selected_tokens_s0": 9552.375, |
| "router/selected_tokens_s1": 6223.0, |
| "step": 940, |
| "tokens_trained": 3.079892536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2694844337281044, |
| "grad_norm": 12.055986404418945, |
| "loss": 1.3382, |
| "loss_ce": 1.270828127861023, |
| "loss_region": 0.06640933454036713, |
| "loss_total": 1.3372374773025513, |
| "lr": 0.0012064754213166778, |
| "router/selected_tokens_s0": 4240.5, |
| "router/selected_tokens_s1": 659.375, |
| "step": 950, |
| "tokens_trained": 3.11265336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2723211119778739, |
| "grad_norm": 5.883482456207275, |
| "loss": 1.2889, |
| "loss_ce": 1.268551230430603, |
| "loss_region": 0.06191962957382202, |
| "loss_total": 1.3304708003997803, |
| "lr": 0.001206068529332827, |
| "router/selected_tokens_s0": 6975.625, |
| "router/selected_tokens_s1": 2819.0, |
| "step": 960, |
| "tokens_trained": 3.1454188 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2751577902276434, |
| "grad_norm": 9.4501371383667, |
| "loss": 1.2741, |
| "loss_ce": 1.213870882987976, |
| "loss_region": 0.062040336430072784, |
| "loss_total": 1.2759112119674683, |
| "lr": 0.001205661637348976, |
| "router/selected_tokens_s0": 6684.75, |
| "router/selected_tokens_s1": 2734.375, |
| "step": 970, |
| "tokens_trained": 3.17818424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.27799446847741294, |
| "grad_norm": 4.556408405303955, |
| "loss": 1.2736, |
| "loss_ce": 1.223037600517273, |
| "loss_region": 0.060666751116514206, |
| "loss_total": 1.283704400062561, |
| "lr": 0.0012052547453651249, |
| "router/selected_tokens_s0": 7010.5, |
| "router/selected_tokens_s1": 3735.625, |
| "step": 980, |
| "tokens_trained": 3.21094968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.28083114672718246, |
| "grad_norm": 3.892756700515747, |
| "loss": 1.2613, |
| "loss_ce": 1.2192856073379517, |
| "loss_region": 0.059792179614305496, |
| "loss_total": 1.2790777683258057, |
| "lr": 0.0012048478533812738, |
| "router/selected_tokens_s0": 6889.75, |
| "router/selected_tokens_s1": 4386.25, |
| "step": 990, |
| "tokens_trained": 3.24371512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.283667824976952, |
| "grad_norm": 2.7137346267700195, |
| "loss": 1.2637, |
| "loss_ce": 1.2060933113098145, |
| "loss_region": 0.056109219789505005, |
| "loss_total": 1.262202501296997, |
| "lr": 0.0012044409613974226, |
| "router/selected_tokens_s0": 8637.25, |
| "router/selected_tokens_s1": 8186.375, |
| "step": 1000, |
| "tokens_trained": 3.27648056 |
| }, |
| { |
| "epoch": 0.283667824976952, |
| "eval_ppl": 3.347568264113319, |
| "eval_runtime": 1.1031, |
| "step": 1000, |
| "tokens_trained": 3.27648056 |
| }, |
| { |
| "epoch": 0.283667824976952, |
| "eval_F": 0.38812667656327315, |
| "eval_F_cds": 0.42947160991705235, |
| "eval_F_dig": 0.25877021642315806, |
| "eval_F_exon": 0.38583469618819977, |
| "eval_F_intron": 0.38635905509328233, |
| "eval_F_nig": 0.38592072457131227, |
| "eval_F_promoter": 0.39958568949106993, |
| "eval_F_utr": 0.38073642711742123, |
| "eval_G": 0.29773473589855654, |
| "eval_G_cds": 0.295751199031323, |
| "eval_G_dig": 0.24374554408547544, |
| "eval_G_exon": 0.2947081099037615, |
| "eval_G_intron": 0.29904633539385567, |
| "eval_G_nig": 0.2965493859974463, |
| "eval_G_promoter": 0.2989992781350895, |
| "eval_G_utr": 0.29223971887964134, |
| "eval_avg_bp_per_token": 2.5764784035321986, |
| "eval_bp_per_token/cds": 2.3284426185776024, |
| "eval_bp_per_token/dig": 3.8644323671497585, |
| "eval_bp_per_token/exon": 2.5917835017932833, |
| "eval_bp_per_token/intron": 2.5882659842372804, |
| "eval_bp_per_token/nig": 2.591205748566155, |
| "eval_bp_per_token/promoter": 2.5025921255429453, |
| "eval_bp_per_token/utr": 2.6264889009204113, |
| "eval_ppl_cds": 3.903673949165709, |
| "eval_ppl_dig": 2.2362627784464695, |
| "eval_ppl_exon": 3.620784098443495, |
| "eval_ppl_intron": 3.3564639530661875, |
| "eval_ppl_nig": 3.33227950140761, |
| "eval_ppl_promoter": 3.551608871404792, |
| "eval_ppl_utr": 3.539918557582459, |
| "step": 1000, |
| "tokens_trained": 3.27648056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2865045032267215, |
| "grad_norm": 4.771963596343994, |
| "loss": 1.2563, |
| "loss_ce": 1.224116325378418, |
| "loss_region": 0.06507018953561783, |
| "loss_total": 1.2891864776611328, |
| "lr": 0.0012040340694135716, |
| "router/selected_tokens_s0": 5890.875, |
| "router/selected_tokens_s1": 891.375, |
| "step": 1010, |
| "tokens_trained": 3.309246 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.289341181476491, |
| "grad_norm": 7.340773105621338, |
| "loss": 1.2573, |
| "loss_ce": 1.214971899986267, |
| "loss_region": 0.062319301068782806, |
| "loss_total": 1.277291178703308, |
| "lr": 0.0012036271774297205, |
| "router/selected_tokens_s0": 7923.625, |
| "router/selected_tokens_s1": 2285.625, |
| "step": 1020, |
| "tokens_trained": 3.34201144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.29217785972626054, |
| "grad_norm": 6.7468695640563965, |
| "loss": 1.266, |
| "loss_ce": 1.1905633211135864, |
| "loss_region": 0.06161874532699585, |
| "loss_total": 1.2521820068359375, |
| "lr": 0.0012032202854458697, |
| "router/selected_tokens_s0": 7226.75, |
| "router/selected_tokens_s1": 2999.375, |
| "step": 1030, |
| "tokens_trained": 3.37477688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.29501453797603006, |
| "grad_norm": 14.67007064819336, |
| "loss": 1.2801, |
| "loss_ce": 1.2074471712112427, |
| "loss_region": 0.0633462518453598, |
| "loss_total": 1.2707934379577637, |
| "lr": 0.0012028133934620187, |
| "router/selected_tokens_s0": 7503.625, |
| "router/selected_tokens_s1": 1578.5, |
| "step": 1040, |
| "tokens_trained": 3.40754232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2978512162257996, |
| "grad_norm": 13.262823104858398, |
| "loss": 1.2987, |
| "loss_ce": 1.2933048009872437, |
| "loss_region": 0.05701930820941925, |
| "loss_total": 1.3503241539001465, |
| "lr": 0.0012024065014781676, |
| "router/selected_tokens_s0": 7261.0, |
| "router/selected_tokens_s1": 6672.0, |
| "step": 1050, |
| "tokens_trained": 3.44030696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3006878944755691, |
| "grad_norm": 9.318674087524414, |
| "loss": 1.2813, |
| "loss_ce": 1.2383909225463867, |
| "loss_region": 0.057740017771720886, |
| "loss_total": 1.296130895614624, |
| "lr": 0.0012019996094943166, |
| "router/selected_tokens_s0": 7121.25, |
| "router/selected_tokens_s1": 6015.625, |
| "step": 1060, |
| "tokens_trained": 3.4730724 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3035245727253386, |
| "grad_norm": 5.476803302764893, |
| "loss": 1.2668, |
| "loss_ce": 1.1896486282348633, |
| "loss_region": 0.06287642568349838, |
| "loss_total": 1.2525250911712646, |
| "lr": 0.0012015927175104656, |
| "router/selected_tokens_s0": 7913.25, |
| "router/selected_tokens_s1": 1923.375, |
| "step": 1070, |
| "tokens_trained": 3.50583784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30636125097510813, |
| "grad_norm": 3.5230581760406494, |
| "loss": 1.2474, |
| "loss_ce": 1.2183582782745361, |
| "loss_region": 0.058486904948949814, |
| "loss_total": 1.2768452167510986, |
| "lr": 0.0012011858255266145, |
| "router/selected_tokens_s0": 7827.0, |
| "router/selected_tokens_s1": 5555.25, |
| "step": 1080, |
| "tokens_trained": 3.53860328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30919792922487765, |
| "grad_norm": 2.5523219108581543, |
| "loss": 1.2465, |
| "loss_ce": 1.1812347173690796, |
| "loss_region": 0.05868714675307274, |
| "loss_total": 1.2399218082427979, |
| "lr": 0.0012007789335427635, |
| "router/selected_tokens_s0": 6969.25, |
| "router/selected_tokens_s1": 5216.25, |
| "step": 1090, |
| "tokens_trained": 3.57136872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3120346074746472, |
| "grad_norm": 3.7527999877929688, |
| "loss": 1.2463, |
| "loss_ce": 1.141882300376892, |
| "loss_region": 0.06176374852657318, |
| "loss_total": 1.2036460638046265, |
| "lr": 0.0012003720415589125, |
| "router/selected_tokens_s0": 7818.0, |
| "router/selected_tokens_s1": 2807.0, |
| "step": 1100, |
| "tokens_trained": 3.60413416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3148712857244167, |
| "grad_norm": 4.556453227996826, |
| "loss": 1.2456, |
| "loss_ce": 1.186132788658142, |
| "loss_region": 0.057535041123628616, |
| "loss_total": 1.2436678409576416, |
| "lr": 0.0011999651495750614, |
| "router/selected_tokens_s0": 7479.125, |
| "router/selected_tokens_s1": 6318.5, |
| "step": 1110, |
| "tokens_trained": 3.6368996 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3177079639741862, |
| "grad_norm": 5.769139289855957, |
| "loss": 1.2386, |
| "loss_ce": 1.205984115600586, |
| "loss_region": 0.0598924346268177, |
| "loss_total": 1.2658765316009521, |
| "lr": 0.0011995582575912104, |
| "router/selected_tokens_s0": 6577.125, |
| "router/selected_tokens_s1": 4283.625, |
| "step": 1120, |
| "tokens_trained": 3.669661712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32054464222395573, |
| "grad_norm": 5.950888633728027, |
| "loss": 1.2417, |
| "loss_ce": 1.1921892166137695, |
| "loss_region": 0.05988103523850441, |
| "loss_total": 1.2520703077316284, |
| "lr": 0.0011991513656073594, |
| "router/selected_tokens_s0": 5852.625, |
| "router/selected_tokens_s1": 4155.75, |
| "step": 1130, |
| "tokens_trained": 3.702426352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32338132047372525, |
| "grad_norm": 9.715873718261719, |
| "loss": 1.2429, |
| "loss_ce": 1.213543176651001, |
| "loss_region": 0.060164351016283035, |
| "loss_total": 1.2737075090408325, |
| "lr": 0.0011987444736235083, |
| "router/selected_tokens_s0": 6526.5, |
| "router/selected_tokens_s1": 4105.125, |
| "step": 1140, |
| "tokens_trained": 3.735191792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3262179987234948, |
| "grad_norm": 3.8173344135284424, |
| "loss": 1.2484, |
| "loss_ce": 1.1513862609863281, |
| "loss_region": 0.06401993334293365, |
| "loss_total": 1.2154061794281006, |
| "lr": 0.0011983375816396573, |
| "router/selected_tokens_s0": 5951.125, |
| "router/selected_tokens_s1": 1578.5, |
| "step": 1150, |
| "tokens_trained": 3.767957232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3290546769732643, |
| "grad_norm": 4.6551833152771, |
| "loss": 1.2419, |
| "loss_ce": 1.2406567335128784, |
| "loss_region": 0.06077444925904274, |
| "loss_total": 1.3014311790466309, |
| "lr": 0.0011979306896558062, |
| "router/selected_tokens_s0": 7148.375, |
| "router/selected_tokens_s1": 3660.125, |
| "step": 1160, |
| "tokens_trained": 3.800722672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3318913552230338, |
| "grad_norm": 8.04861831665039, |
| "loss": 1.2343, |
| "loss_ce": 1.1574338674545288, |
| "loss_region": 0.060126934200525284, |
| "loss_total": 1.2175607681274414, |
| "lr": 0.0011975237976719552, |
| "router/selected_tokens_s0": 7432.375, |
| "router/selected_tokens_s1": 4170.875, |
| "step": 1170, |
| "tokens_trained": 3.833488112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.33472803347280333, |
| "grad_norm": 1.6629078388214111, |
| "loss": 1.2338, |
| "loss_ce": 1.1838352680206299, |
| "loss_region": 0.06075272709131241, |
| "loss_total": 1.244588017463684, |
| "lr": 0.0011971169056881042, |
| "router/selected_tokens_s0": 7034.375, |
| "router/selected_tokens_s1": 3672.375, |
| "step": 1180, |
| "tokens_trained": 3.866252752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.33756471172257285, |
| "grad_norm": 2.558566093444824, |
| "loss": 1.2326, |
| "loss_ce": 1.1726329326629639, |
| "loss_region": 0.05713306739926338, |
| "loss_total": 1.2297660112380981, |
| "lr": 0.0011967100137042531, |
| "router/selected_tokens_s0": 7075.5, |
| "router/selected_tokens_s1": 6530.625, |
| "step": 1190, |
| "tokens_trained": 3.899018184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.34040138997234237, |
| "grad_norm": 5.142763137817383, |
| "loss": 1.2306, |
| "loss_ce": 1.1462597846984863, |
| "loss_region": 0.06102370843291283, |
| "loss_total": 1.2072834968566895, |
| "lr": 0.0011963031217204021, |
| "router/selected_tokens_s0": 7709.875, |
| "router/selected_tokens_s1": 3417.375, |
| "step": 1200, |
| "tokens_trained": 3.931783624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3432380682221119, |
| "grad_norm": 2.3559539318084717, |
| "loss": 1.227, |
| "loss_ce": 1.1722111701965332, |
| "loss_region": 0.05798247084021568, |
| "loss_total": 1.2301936149597168, |
| "lr": 0.0011958962297365513, |
| "router/selected_tokens_s0": 7702.75, |
| "router/selected_tokens_s1": 5993.375, |
| "step": 1210, |
| "tokens_trained": 3.964549064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3460747464718814, |
| "grad_norm": 5.716832160949707, |
| "loss": 1.2248, |
| "loss_ce": 1.1278504133224487, |
| "loss_region": 0.06037168204784393, |
| "loss_total": 1.188222050666809, |
| "lr": 0.0011954893377527003, |
| "router/selected_tokens_s0": 6809.375, |
| "router/selected_tokens_s1": 3940.0, |
| "step": 1220, |
| "tokens_trained": 3.997311912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.34891142472165093, |
| "grad_norm": 8.192803382873535, |
| "loss": 1.2482, |
| "loss_ce": 1.208156704902649, |
| "loss_region": 0.0598159022629261, |
| "loss_total": 1.2679725885391235, |
| "lr": 0.0011950824457688492, |
| "router/selected_tokens_s0": 7796.0, |
| "router/selected_tokens_s1": 4444.75, |
| "step": 1230, |
| "tokens_trained": 4.030077352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.35174810297142045, |
| "grad_norm": 4.9980010986328125, |
| "loss": 1.2321, |
| "loss_ce": 1.1176339387893677, |
| "loss_region": 0.05978400260210037, |
| "loss_total": 1.1774179935455322, |
| "lr": 0.0011946755537849982, |
| "router/selected_tokens_s0": 7440.75, |
| "router/selected_tokens_s1": 4458.25, |
| "step": 1240, |
| "tokens_trained": 4.062842792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.35458478122118997, |
| "grad_norm": 1.7752015590667725, |
| "loss": 1.2207, |
| "loss_ce": 1.184716820716858, |
| "loss_region": 0.05878889188170433, |
| "loss_total": 1.2435057163238525, |
| "lr": 0.001194268661801147, |
| "router/selected_tokens_s0": 7180.375, |
| "router/selected_tokens_s1": 5210.5, |
| "step": 1250, |
| "tokens_trained": 4.095608232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3574214594709595, |
| "grad_norm": 3.125553607940674, |
| "loss": 1.2205, |
| "loss_ce": 1.1985727548599243, |
| "loss_region": 0.06049454212188721, |
| "loss_total": 1.2590672969818115, |
| "lr": 0.001193861769817296, |
| "router/selected_tokens_s0": 7793.875, |
| "router/selected_tokens_s1": 3874.875, |
| "step": 1260, |
| "tokens_trained": 4.128373672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.360258137720729, |
| "grad_norm": 3.002474546432495, |
| "loss": 1.2223, |
| "loss_ce": 1.215548038482666, |
| "loss_region": 0.06029829755425453, |
| "loss_total": 1.2758463621139526, |
| "lr": 0.0011934548778334449, |
| "router/selected_tokens_s0": 7393.25, |
| "router/selected_tokens_s1": 4025.25, |
| "step": 1270, |
| "tokens_trained": 4.161136768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36309481597049853, |
| "grad_norm": 3.3697590827941895, |
| "loss": 1.2242, |
| "loss_ce": 1.167251467704773, |
| "loss_region": 0.061523206532001495, |
| "loss_total": 1.2287746667861938, |
| "lr": 0.001193047985849594, |
| "router/selected_tokens_s0": 7325.625, |
| "router/selected_tokens_s1": 3059.375, |
| "step": 1280, |
| "tokens_trained": 4.193902208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36593149422026805, |
| "grad_norm": 2.45526123046875, |
| "loss": 1.2241, |
| "loss_ce": 1.2129099369049072, |
| "loss_region": 0.059980034828186035, |
| "loss_total": 1.2728899717330933, |
| "lr": 0.001192641093865743, |
| "router/selected_tokens_s0": 7482.25, |
| "router/selected_tokens_s1": 4287.625, |
| "step": 1290, |
| "tokens_trained": 4.226667648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36876817247003757, |
| "grad_norm": 2.280808448791504, |
| "loss": 1.2237, |
| "loss_ce": 1.1102901697158813, |
| "loss_region": 0.05960178002715111, |
| "loss_total": 1.1698919534683228, |
| "lr": 0.001192234201881892, |
| "router/selected_tokens_s0": 7548.625, |
| "router/selected_tokens_s1": 4616.25, |
| "step": 1300, |
| "tokens_trained": 4.259424272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3716048507198071, |
| "grad_norm": 1.7201220989227295, |
| "loss": 1.2133, |
| "loss_ce": 1.0806467533111572, |
| "loss_region": 0.06031251698732376, |
| "loss_total": 1.1409592628479004, |
| "lr": 0.001191827309898041, |
| "router/selected_tokens_s0": 7452.0, |
| "router/selected_tokens_s1": 4023.25, |
| "step": 1310, |
| "tokens_trained": 4.292189712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3744415289695766, |
| "grad_norm": 1.477432131767273, |
| "loss": 1.2134, |
| "loss_ce": 1.1256206035614014, |
| "loss_region": 0.058301862329244614, |
| "loss_total": 1.1839224100112915, |
| "lr": 0.00119142041791419, |
| "router/selected_tokens_s0": 7398.875, |
| "router/selected_tokens_s1": 5646.875, |
| "step": 1320, |
| "tokens_trained": 4.32495164 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.37727820721934613, |
| "grad_norm": 2.436877727508545, |
| "loss": 1.2179, |
| "loss_ce": 1.0498384237289429, |
| "loss_region": 0.05817071720957756, |
| "loss_total": 1.1080090999603271, |
| "lr": 0.0011910135259303389, |
| "router/selected_tokens_s0": 7537.0, |
| "router/selected_tokens_s1": 5800.375, |
| "step": 1330, |
| "tokens_trained": 4.35771708 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.38011488546911565, |
| "grad_norm": 4.487268447875977, |
| "loss": 1.2305, |
| "loss_ce": 1.2367991209030151, |
| "loss_region": 0.06230239197611809, |
| "loss_total": 1.29910147190094, |
| "lr": 0.0011906066339464878, |
| "router/selected_tokens_s0": 7214.25, |
| "router/selected_tokens_s1": 2476.375, |
| "step": 1340, |
| "tokens_trained": 4.39048252 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.38295156371888517, |
| "grad_norm": 3.622452735900879, |
| "loss": 1.214, |
| "loss_ce": 1.0572057962417603, |
| "loss_region": 0.059732843190431595, |
| "loss_total": 1.116938591003418, |
| "lr": 0.0011901997419626368, |
| "router/selected_tokens_s0": 7352.625, |
| "router/selected_tokens_s1": 4475.625, |
| "step": 1350, |
| "tokens_trained": 4.42324796 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3857882419686547, |
| "grad_norm": 2.1303977966308594, |
| "loss": 1.213, |
| "loss_ce": 1.159555435180664, |
| "loss_region": 0.060326918959617615, |
| "loss_total": 1.2198823690414429, |
| "lr": 0.0011897928499787858, |
| "router/selected_tokens_s0": 7290.625, |
| "router/selected_tokens_s1": 3996.0, |
| "step": 1360, |
| "tokens_trained": 4.4560134 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3886249202184242, |
| "grad_norm": 1.6034486293792725, |
| "loss": 1.2153, |
| "loss_ce": 1.1271262168884277, |
| "loss_region": 0.06016794219613075, |
| "loss_total": 1.1872941255569458, |
| "lr": 0.0011893859579949347, |
| "router/selected_tokens_s0": 7241.375, |
| "router/selected_tokens_s1": 4116.75, |
| "step": 1370, |
| "tokens_trained": 4.48877884 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3914615984681937, |
| "grad_norm": 0.8884724974632263, |
| "loss": 1.2086, |
| "loss_ce": 1.1884993314743042, |
| "loss_region": 0.05992608517408371, |
| "loss_total": 1.2484253644943237, |
| "lr": 0.0011889790660110837, |
| "router/selected_tokens_s0": 7768.375, |
| "router/selected_tokens_s1": 4367.875, |
| "step": 1380, |
| "tokens_trained": 4.52154428 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.39429827671796325, |
| "grad_norm": 1.7758926153182983, |
| "loss": 1.2076, |
| "loss_ce": 1.1677520275115967, |
| "loss_region": 0.05958346277475357, |
| "loss_total": 1.2273354530334473, |
| "lr": 0.0011885721740272327, |
| "router/selected_tokens_s0": 7557.875, |
| "router/selected_tokens_s1": 4631.375, |
| "step": 1390, |
| "tokens_trained": 4.55430972 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.39713495496773277, |
| "grad_norm": 1.3425244092941284, |
| "loss": 1.2121, |
| "loss_ce": 0.9700872302055359, |
| "loss_region": 0.05964002013206482, |
| "loss_total": 1.0297272205352783, |
| "lr": 0.0011881652820433816, |
| "router/selected_tokens_s0": 7196.75, |
| "router/selected_tokens_s1": 4533.0, |
| "step": 1400, |
| "tokens_trained": 4.58707516 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3999716332175023, |
| "grad_norm": 1.086186408996582, |
| "loss": 1.2112, |
| "loss_ce": 1.13883638381958, |
| "loss_region": 0.05995158478617668, |
| "loss_total": 1.1987879276275635, |
| "lr": 0.0011877583900595306, |
| "router/selected_tokens_s0": 7518.25, |
| "router/selected_tokens_s1": 4321.125, |
| "step": 1410, |
| "tokens_trained": 4.6198406 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4028083114672718, |
| "grad_norm": 1.7367637157440186, |
| "loss": 1.2049, |
| "loss_ce": 1.155705213546753, |
| "loss_region": 0.06088727340102196, |
| "loss_total": 1.2165924310684204, |
| "lr": 0.0011873514980756796, |
| "router/selected_tokens_s0": 6863.0, |
| "router/selected_tokens_s1": 3570.375, |
| "step": 1420, |
| "tokens_trained": 4.652606024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4056449897170413, |
| "grad_norm": 5.4363789558410645, |
| "loss": 1.238, |
| "loss_ce": 1.1930514574050903, |
| "loss_region": 0.05885593220591545, |
| "loss_total": 1.2519073486328125, |
| "lr": 0.0011869446060918285, |
| "router/selected_tokens_s0": 7346.125, |
| "router/selected_tokens_s1": 5190.25, |
| "step": 1430, |
| "tokens_trained": 4.685371464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.40848166796681085, |
| "grad_norm": 3.524890899658203, |
| "loss": 1.223, |
| "loss_ce": 1.184119701385498, |
| "loss_region": 0.05786283686757088, |
| "loss_total": 1.2419825792312622, |
| "lr": 0.0011865377141079775, |
| "router/selected_tokens_s0": 7959.5, |
| "router/selected_tokens_s1": 6231.5, |
| "step": 1440, |
| "tokens_trained": 4.718136904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.41131834621658037, |
| "grad_norm": 2.0465176105499268, |
| "loss": 1.2131, |
| "loss_ce": 1.2069507837295532, |
| "loss_region": 0.061854004859924316, |
| "loss_total": 1.2688047885894775, |
| "lr": 0.0011861308221241265, |
| "router/selected_tokens_s0": 7283.625, |
| "router/selected_tokens_s1": 2811.625, |
| "step": 1450, |
| "tokens_trained": 4.750902344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4141550244663499, |
| "grad_norm": 1.2570910453796387, |
| "loss": 1.2008, |
| "loss_ce": 1.2179498672485352, |
| "loss_region": 0.062061723321676254, |
| "loss_total": 1.280011534690857, |
| "lr": 0.0011857239301402756, |
| "router/selected_tokens_s0": 7247.25, |
| "router/selected_tokens_s1": 2646.375, |
| "step": 1460, |
| "tokens_trained": 4.783666984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4169917027161194, |
| "grad_norm": 1.7545820474624634, |
| "loss": 1.2037, |
| "loss_ce": 1.206125020980835, |
| "loss_region": 0.06104295700788498, |
| "loss_total": 1.2671679258346558, |
| "lr": 0.0011853170381564246, |
| "router/selected_tokens_s0": 7784.375, |
| "router/selected_tokens_s1": 3447.5, |
| "step": 1470, |
| "tokens_trained": 4.816432424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4198283809658889, |
| "grad_norm": 0.8268412947654724, |
| "loss": 1.1988, |
| "loss_ce": 1.0823899507522583, |
| "loss_region": 0.06159386783838272, |
| "loss_total": 1.1439838409423828, |
| "lr": 0.0011849101461725736, |
| "router/selected_tokens_s0": 7545.125, |
| "router/selected_tokens_s1": 2985.0, |
| "step": 1480, |
| "tokens_trained": 4.849197864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.42266505921565845, |
| "grad_norm": 1.3997455835342407, |
| "loss": 1.2004, |
| "loss_ce": 1.211706280708313, |
| "loss_region": 0.061140816658735275, |
| "loss_total": 1.272847056388855, |
| "lr": 0.0011845032541887225, |
| "router/selected_tokens_s0": 7438.125, |
| "router/selected_tokens_s1": 3359.0, |
| "step": 1490, |
| "tokens_trained": 4.881963248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.42550173746542796, |
| "grad_norm": 1.2946134805679321, |
| "loss": 1.208, |
| "loss_ce": 1.131969928741455, |
| "loss_region": 0.06115822494029999, |
| "loss_total": 1.1931281089782715, |
| "lr": 0.0011840963622048713, |
| "router/selected_tokens_s0": 7596.125, |
| "router/selected_tokens_s1": 3365.25, |
| "step": 1500, |
| "tokens_trained": 4.914728608 |
| }, |
| { |
| "epoch": 0.42550173746542796, |
| "eval_ppl": 3.1621453960135706, |
| "eval_runtime": 1.0551, |
| "step": 1500, |
| "tokens_trained": 4.914728608 |
| }, |
| { |
| "epoch": 0.42550173746542796, |
| "eval_F": 0.3383701851707165, |
| "eval_F_cds": 0.32189175564822875, |
| "eval_F_dig": 0.30971169622626765, |
| "eval_F_exon": 0.34561999412924055, |
| "eval_F_intron": 0.34410581946232166, |
| "eval_F_nig": 0.3375343459013891, |
| "eval_F_promoter": 0.31860851389433215, |
| "eval_F_utr": 0.3465690947975985, |
| "eval_G": 0.2924601907782509, |
| "eval_G_cds": 0.2914013539852665, |
| "eval_G_dig": 0.2889913884873818, |
| "eval_G_exon": 0.2899214097947331, |
| "eval_G_intron": 0.2925929726588761, |
| "eval_G_nig": 0.29211948150012085, |
| "eval_G_promoter": 0.29322862355481216, |
| "eval_G_utr": 0.29183338191527736, |
| "eval_avg_bp_per_token": 2.955343123672301, |
| "eval_bp_per_token/cds": 3.106634396355353, |
| "eval_bp_per_token/dig": 3.2288092835519677, |
| "eval_bp_per_token/exon": 2.893351128366901, |
| "eval_bp_per_token/intron": 2.9060827903536706, |
| "eval_bp_per_token/nig": 2.962661465841318, |
| "eval_bp_per_token/promoter": 3.1386480787253985, |
| "eval_bp_per_token/utr": 2.88542750929368, |
| "eval_ppl_cds": 3.7980160314314775, |
| "eval_ppl_dig": 1.3756820840176516, |
| "eval_ppl_exon": 3.52970478822946, |
| "eval_ppl_intron": 3.1966549688737778, |
| "eval_ppl_nig": 3.043349946903372, |
| "eval_ppl_promoter": 3.4552084267408065, |
| "eval_ppl_utr": 3.438440616943856, |
| "step": 1500, |
| "tokens_trained": 4.914728608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4283384157151975, |
| "grad_norm": 1.6150286197662354, |
| "loss": 1.2087, |
| "loss_ce": 1.2292938232421875, |
| "loss_region": 0.06086890026926994, |
| "loss_total": 1.2901626825332642, |
| "lr": 0.0011836894702210202, |
| "router/selected_tokens_s0": 7728.625, |
| "router/selected_tokens_s1": 3601.125, |
| "step": 1510, |
| "tokens_trained": 4.947494048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.431175093964967, |
| "grad_norm": 5.560505390167236, |
| "loss": 1.2111, |
| "loss_ce": 1.2069859504699707, |
| "loss_region": 0.0635954886674881, |
| "loss_total": 1.2705814838409424, |
| "lr": 0.0011832825782371692, |
| "router/selected_tokens_s0": 6884.125, |
| "router/selected_tokens_s1": 1571.875, |
| "step": 1520, |
| "tokens_trained": 4.980259488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4340117722147365, |
| "grad_norm": 2.3368101119995117, |
| "loss": 1.2138, |
| "loss_ce": 1.1481984853744507, |
| "loss_region": 0.05834292247891426, |
| "loss_total": 1.2065414190292358, |
| "lr": 0.0011828756862533184, |
| "router/selected_tokens_s0": 7559.875, |
| "router/selected_tokens_s1": 5684.875, |
| "step": 1530, |
| "tokens_trained": 5.013024928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.43684845046450604, |
| "grad_norm": 1.1860406398773193, |
| "loss": 1.2027, |
| "loss_ce": 1.1155500411987305, |
| "loss_region": 0.058888982981443405, |
| "loss_total": 1.1744390726089478, |
| "lr": 0.0011824687942694674, |
| "router/selected_tokens_s0": 7458.25, |
| "router/selected_tokens_s1": 5186.0, |
| "step": 1540, |
| "tokens_trained": 5.04578704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.43968512871427556, |
| "grad_norm": 1.4350230693817139, |
| "loss": 1.202, |
| "loss_ce": 1.1809712648391724, |
| "loss_region": 0.06128743663430214, |
| "loss_total": 1.2422586679458618, |
| "lr": 0.0011820619022856163, |
| "router/selected_tokens_s0": 7819.25, |
| "router/selected_tokens_s1": 3293.5, |
| "step": 1550, |
| "tokens_trained": 5.078551904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4425218069640451, |
| "grad_norm": 0.8890047073364258, |
| "loss": 1.2026, |
| "loss_ce": 1.07762610912323, |
| "loss_region": 0.06053788587450981, |
| "loss_total": 1.1381640434265137, |
| "lr": 0.0011816550103017653, |
| "router/selected_tokens_s0": 7270.375, |
| "router/selected_tokens_s1": 3828.25, |
| "step": 1560, |
| "tokens_trained": 5.111317344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4453584852138146, |
| "grad_norm": 0.7954491376876831, |
| "loss": 1.2054, |
| "loss_ce": 1.219533085823059, |
| "loss_region": 0.06046753376722336, |
| "loss_total": 1.2800005674362183, |
| "lr": 0.0011812481183179143, |
| "router/selected_tokens_s0": 7368.375, |
| "router/selected_tokens_s1": 3893.25, |
| "step": 1570, |
| "tokens_trained": 5.144082784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4481951634635841, |
| "grad_norm": 1.1368037462234497, |
| "loss": 1.199, |
| "loss_ce": 1.144284725189209, |
| "loss_region": 0.060155730694532394, |
| "loss_total": 1.2044404745101929, |
| "lr": 0.0011808412263340632, |
| "router/selected_tokens_s0": 7437.125, |
| "router/selected_tokens_s1": 4154.25, |
| "step": 1580, |
| "tokens_trained": 5.176848224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.45103184171335364, |
| "grad_norm": 2.1435985565185547, |
| "loss": 1.2037, |
| "loss_ce": 1.131495714187622, |
| "loss_region": 0.05997314676642418, |
| "loss_total": 1.1914688348770142, |
| "lr": 0.0011804343343502122, |
| "router/selected_tokens_s0": 7080.125, |
| "router/selected_tokens_s1": 4246.5, |
| "step": 1590, |
| "tokens_trained": 5.209613664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.45386851996312316, |
| "grad_norm": 4.92233419418335, |
| "loss": 1.2057, |
| "loss_ce": 1.214929223060608, |
| "loss_region": 0.06080314517021179, |
| "loss_total": 1.275732398033142, |
| "lr": 0.0011800274423663611, |
| "router/selected_tokens_s0": 7388.875, |
| "router/selected_tokens_s1": 3633.0, |
| "step": 1600, |
| "tokens_trained": 5.242378304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4567051982128927, |
| "grad_norm": 5.72175931930542, |
| "loss": 1.2192, |
| "loss_ce": 1.1509543657302856, |
| "loss_region": 0.05994424968957901, |
| "loss_total": 1.2108986377716064, |
| "lr": 0.0011796205503825101, |
| "router/selected_tokens_s0": 7110.75, |
| "router/selected_tokens_s1": 4277.75, |
| "step": 1610, |
| "tokens_trained": 5.275142944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4595418764626622, |
| "grad_norm": 0.9513784050941467, |
| "loss": 1.211, |
| "loss_ce": 1.1676510572433472, |
| "loss_region": 0.058869313448667526, |
| "loss_total": 1.2265204191207886, |
| "lr": 0.001179213658398659, |
| "router/selected_tokens_s0": 8026.375, |
| "router/selected_tokens_s1": 5480.0, |
| "step": 1620, |
| "tokens_trained": 5.307906784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4623785547124317, |
| "grad_norm": 1.5998790264129639, |
| "loss": 1.2047, |
| "loss_ce": 1.0538767576217651, |
| "loss_region": 0.05941499024629593, |
| "loss_total": 1.1132917404174805, |
| "lr": 0.001178806766414808, |
| "router/selected_tokens_s0": 7281.5, |
| "router/selected_tokens_s1": 4720.625, |
| "step": 1630, |
| "tokens_trained": 5.340672224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.46521523296220124, |
| "grad_norm": 0.5995892286300659, |
| "loss": 1.2167, |
| "loss_ce": 1.1786582469940186, |
| "loss_region": 0.059631455689668655, |
| "loss_total": 1.238289713859558, |
| "lr": 0.001178399874430957, |
| "router/selected_tokens_s0": 7377.5, |
| "router/selected_tokens_s1": 4563.125, |
| "step": 1640, |
| "tokens_trained": 5.373436896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.46805191121197076, |
| "grad_norm": 1.8776949644088745, |
| "loss": 1.2252, |
| "loss_ce": 1.2116706371307373, |
| "loss_region": 0.058519087731838226, |
| "loss_total": 1.2701897621154785, |
| "lr": 0.001177992982447106, |
| "router/selected_tokens_s0": 7360.625, |
| "router/selected_tokens_s1": 5456.625, |
| "step": 1650, |
| "tokens_trained": 5.406202336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4708885894617403, |
| "grad_norm": 0.9382694363594055, |
| "loss": 1.2314, |
| "loss_ce": 1.1839154958724976, |
| "loss_region": 0.05979941412806511, |
| "loss_total": 1.2437149286270142, |
| "lr": 0.001177586090463255, |
| "router/selected_tokens_s0": 7499.125, |
| "router/selected_tokens_s1": 4458.75, |
| "step": 1660, |
| "tokens_trained": 5.438967776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4737252677115098, |
| "grad_norm": 1.2017229795455933, |
| "loss": 1.2312, |
| "loss_ce": 1.1372504234313965, |
| "loss_region": 0.059748563915491104, |
| "loss_total": 1.196998953819275, |
| "lr": 0.001177179198479404, |
| "router/selected_tokens_s0": 7295.375, |
| "router/selected_tokens_s1": 4452.5, |
| "step": 1670, |
| "tokens_trained": 5.471733216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4765619459612793, |
| "grad_norm": 1.8002878427505493, |
| "loss": 1.2231, |
| "loss_ce": 1.1727172136306763, |
| "loss_region": 0.060064543038606644, |
| "loss_total": 1.2327817678451538, |
| "lr": 0.0011767723064955529, |
| "router/selected_tokens_s0": 7462.25, |
| "router/selected_tokens_s1": 4250.375, |
| "step": 1680, |
| "tokens_trained": 5.504498656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.47939862421104884, |
| "grad_norm": 1.8060473203659058, |
| "loss": 1.2174, |
| "loss_ce": 1.152571201324463, |
| "loss_region": 0.0597725547850132, |
| "loss_total": 1.2123438119888306, |
| "lr": 0.0011763654145117018, |
| "router/selected_tokens_s0": 7397.25, |
| "router/selected_tokens_s1": 4450.125, |
| "step": 1690, |
| "tokens_trained": 5.537264096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.48223530246081836, |
| "grad_norm": 0.5139926671981812, |
| "loss": 1.2158, |
| "loss_ce": 1.1350646018981934, |
| "loss_region": 0.05997651070356369, |
| "loss_total": 1.1950410604476929, |
| "lr": 0.0011759585225278508, |
| "router/selected_tokens_s0": 7432.5, |
| "router/selected_tokens_s1": 4295.5, |
| "step": 1700, |
| "tokens_trained": 5.570029536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4850719807105879, |
| "grad_norm": 1.5214523077011108, |
| "loss": 1.2131, |
| "loss_ce": 1.0787264108657837, |
| "loss_region": 0.059902340173721313, |
| "loss_total": 1.1386287212371826, |
| "lr": 0.001175551630544, |
| "router/selected_tokens_s0": 7345.875, |
| "router/selected_tokens_s1": 4338.75, |
| "step": 1710, |
| "tokens_trained": 5.602794976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4879086589603574, |
| "grad_norm": 0.5068179368972778, |
| "loss": 1.2096, |
| "loss_ce": 1.1360502243041992, |
| "loss_region": 0.060493726283311844, |
| "loss_total": 1.1965439319610596, |
| "lr": 0.001175144738560149, |
| "router/selected_tokens_s0": 7206.5, |
| "router/selected_tokens_s1": 3846.125, |
| "step": 1720, |
| "tokens_trained": 5.635560416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4907453372101269, |
| "grad_norm": 0.4137960970401764, |
| "loss": 1.2064, |
| "loss_ce": 1.2108829021453857, |
| "loss_region": 0.05981894209980965, |
| "loss_total": 1.2707018852233887, |
| "lr": 0.001174737846576298, |
| "router/selected_tokens_s0": 7653.0, |
| "router/selected_tokens_s1": 4487.625, |
| "step": 1730, |
| "tokens_trained": 5.668325856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49358201545989644, |
| "grad_norm": 0.8189796805381775, |
| "loss": 1.2067, |
| "loss_ce": 1.1404913663864136, |
| "loss_region": 0.05990920960903168, |
| "loss_total": 1.2004005908966064, |
| "lr": 0.0011743309545924469, |
| "router/selected_tokens_s0": 7451.125, |
| "router/selected_tokens_s1": 4361.125, |
| "step": 1740, |
| "tokens_trained": 5.701091296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49641869370966596, |
| "grad_norm": 0.8574137687683105, |
| "loss": 1.2035, |
| "loss_ce": 1.1549724340438843, |
| "loss_region": 0.05982809513807297, |
| "loss_total": 1.214800477027893, |
| "lr": 0.0011739240626085956, |
| "router/selected_tokens_s0": 7422.875, |
| "router/selected_tokens_s1": 4414.75, |
| "step": 1750, |
| "tokens_trained": 5.733856736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4992553719594355, |
| "grad_norm": 1.5001907348632812, |
| "loss": 1.2017, |
| "loss_ce": 1.1385822296142578, |
| "loss_region": 0.06083447486162186, |
| "loss_total": 1.1994167566299438, |
| "lr": 0.0011735171706247446, |
| "router/selected_tokens_s0": 7473.375, |
| "router/selected_tokens_s1": 3602.625, |
| "step": 1760, |
| "tokens_trained": 5.766622176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.502092050209205, |
| "grad_norm": 1.8692625761032104, |
| "loss": 1.202, |
| "loss_ce": 1.2150356769561768, |
| "loss_region": 0.060022830963134766, |
| "loss_total": 1.2750585079193115, |
| "lr": 0.0011731102786408936, |
| "router/selected_tokens_s0": 7648.0, |
| "router/selected_tokens_s1": 4317.5, |
| "step": 1770, |
| "tokens_trained": 5.799387616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5049287284589745, |
| "grad_norm": 1.3365726470947266, |
| "loss": 1.2043, |
| "loss_ce": 1.0311381816864014, |
| "loss_region": 0.060505885630846024, |
| "loss_total": 1.091644048690796, |
| "lr": 0.0011727033866570427, |
| "router/selected_tokens_s0": 7286.75, |
| "router/selected_tokens_s1": 3841.0, |
| "step": 1780, |
| "tokens_trained": 5.832153056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.507765406708744, |
| "grad_norm": 0.8497097492218018, |
| "loss": 1.1973, |
| "loss_ce": 1.0274080038070679, |
| "loss_region": 0.05991562083363533, |
| "loss_total": 1.0873236656188965, |
| "lr": 0.0011722964946731917, |
| "router/selected_tokens_s0": 7360.125, |
| "router/selected_tokens_s1": 4338.75, |
| "step": 1790, |
| "tokens_trained": 5.864918496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5106020849585136, |
| "grad_norm": 1.7930278778076172, |
| "loss": 1.1965, |
| "loss_ce": 1.0668989419937134, |
| "loss_region": 0.06032132729887962, |
| "loss_total": 1.1272202730178833, |
| "lr": 0.0011718896026893407, |
| "router/selected_tokens_s0": 7489.25, |
| "router/selected_tokens_s1": 4029.0, |
| "step": 1800, |
| "tokens_trained": 5.897683936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5134387632082831, |
| "grad_norm": 0.5829694271087646, |
| "loss": 1.1941, |
| "loss_ce": 1.115596890449524, |
| "loss_region": 0.05983370915055275, |
| "loss_total": 1.1754306554794312, |
| "lr": 0.0011714827107054896, |
| "router/selected_tokens_s0": 7556.25, |
| "router/selected_tokens_s1": 4460.375, |
| "step": 1810, |
| "tokens_trained": 5.930449376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5162754414580526, |
| "grad_norm": 0.7106938362121582, |
| "loss": 1.1921, |
| "loss_ce": 1.0678681135177612, |
| "loss_region": 0.060275427997112274, |
| "loss_total": 1.128143548965454, |
| "lr": 0.0011710758187216386, |
| "router/selected_tokens_s0": 7005.0, |
| "router/selected_tokens_s1": 4000.75, |
| "step": 1820, |
| "tokens_trained": 5.96321104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5191121197078221, |
| "grad_norm": 0.64195317029953, |
| "loss": 1.1967, |
| "loss_ce": 1.1604429483413696, |
| "loss_region": 0.05958753824234009, |
| "loss_total": 1.2200305461883545, |
| "lr": 0.0011706689267377876, |
| "router/selected_tokens_s0": 7597.75, |
| "router/selected_tokens_s1": 4673.875, |
| "step": 1830, |
| "tokens_trained": 5.99597648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5219487979575916, |
| "grad_norm": 0.7763417959213257, |
| "loss": 1.1938, |
| "loss_ce": 1.138045072555542, |
| "loss_region": 0.05993033945560455, |
| "loss_total": 1.1979753971099854, |
| "lr": 0.0011702620347539365, |
| "router/selected_tokens_s0": 7406.0, |
| "router/selected_tokens_s1": 4328.75, |
| "step": 1840, |
| "tokens_trained": 6.028741744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5247854762073612, |
| "grad_norm": 1.4419437646865845, |
| "loss": 1.1946, |
| "loss_ce": 1.0854356288909912, |
| "loss_region": 0.06058506295084953, |
| "loss_total": 1.1460206508636475, |
| "lr": 0.0011698551427700855, |
| "router/selected_tokens_s0": 7468.625, |
| "router/selected_tokens_s1": 3814.0, |
| "step": 1850, |
| "tokens_trained": 6.061507184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5276221544571307, |
| "grad_norm": 0.4786941111087799, |
| "loss": 1.198, |
| "loss_ce": 1.0818524360656738, |
| "loss_region": 0.05998766049742699, |
| "loss_total": 1.1418401002883911, |
| "lr": 0.0011694482507862345, |
| "router/selected_tokens_s0": 7472.625, |
| "router/selected_tokens_s1": 4303.875, |
| "step": 1860, |
| "tokens_trained": 6.094268624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5304588327069002, |
| "grad_norm": 0.6779845356941223, |
| "loss": 1.186, |
| "loss_ce": 1.0940133333206177, |
| "loss_region": 0.060079120099544525, |
| "loss_total": 1.1540924310684204, |
| "lr": 0.0011690413588023834, |
| "router/selected_tokens_s0": 7563.625, |
| "router/selected_tokens_s1": 4256.5, |
| "step": 1870, |
| "tokens_trained": 6.127034064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5332955109566697, |
| "grad_norm": 0.5507255792617798, |
| "loss": 1.1942, |
| "loss_ce": 1.1586185693740845, |
| "loss_region": 0.05944622680544853, |
| "loss_total": 1.218064785003662, |
| "lr": 0.0011686344668185324, |
| "router/selected_tokens_s0": 7539.625, |
| "router/selected_tokens_s1": 4774.75, |
| "step": 1880, |
| "tokens_trained": 6.159799504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5361321892064392, |
| "grad_norm": 1.0612168312072754, |
| "loss": 1.1953, |
| "loss_ce": 1.124820351600647, |
| "loss_region": 0.05970805883407593, |
| "loss_total": 1.1845283508300781, |
| "lr": 0.0011682275748346814, |
| "router/selected_tokens_s0": 7456.0, |
| "router/selected_tokens_s1": 4528.375, |
| "step": 1890, |
| "tokens_trained": 6.192561072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5389688674562088, |
| "grad_norm": 0.27030009031295776, |
| "loss": 1.1816, |
| "loss_ce": 1.1418118476867676, |
| "loss_region": 0.05972041189670563, |
| "loss_total": 1.201532244682312, |
| "lr": 0.0011678206828508303, |
| "router/selected_tokens_s0": 7315.625, |
| "router/selected_tokens_s1": 4474.25, |
| "step": 1900, |
| "tokens_trained": 6.225326512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5418055457059783, |
| "grad_norm": 0.7955554127693176, |
| "loss": 1.1932, |
| "loss_ce": 1.1275066137313843, |
| "loss_region": 0.06015876308083534, |
| "loss_total": 1.187665343284607, |
| "lr": 0.0011674137908669793, |
| "router/selected_tokens_s0": 7343.0, |
| "router/selected_tokens_s1": 4132.5, |
| "step": 1910, |
| "tokens_trained": 6.258091952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5446422239557478, |
| "grad_norm": 1.206097960472107, |
| "loss": 1.191, |
| "loss_ce": 1.1694256067276, |
| "loss_region": 0.06038440391421318, |
| "loss_total": 1.2298099994659424, |
| "lr": 0.0011670068988831283, |
| "router/selected_tokens_s0": 7146.5, |
| "router/selected_tokens_s1": 3922.625, |
| "step": 1920, |
| "tokens_trained": 6.290857392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5474789022055173, |
| "grad_norm": 1.862305760383606, |
| "loss": 1.1893, |
| "loss_ce": 1.143494963645935, |
| "loss_region": 0.06012733653187752, |
| "loss_total": 1.2036223411560059, |
| "lr": 0.0011666000068992772, |
| "router/selected_tokens_s0": 7706.5, |
| "router/selected_tokens_s1": 4280.75, |
| "step": 1930, |
| "tokens_trained": 6.323622832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5503155804552868, |
| "grad_norm": 1.6529579162597656, |
| "loss": 1.1967, |
| "loss_ce": 1.1993005275726318, |
| "loss_region": 0.059735652059316635, |
| "loss_total": 1.2590361833572388, |
| "lr": 0.0011661931149154262, |
| "router/selected_tokens_s0": 7429.25, |
| "router/selected_tokens_s1": 4494.0, |
| "step": 1940, |
| "tokens_trained": 6.356388272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5531522587050564, |
| "grad_norm": 0.6790427565574646, |
| "loss": 1.1897, |
| "loss_ce": 1.2285912036895752, |
| "loss_region": 0.06015128269791603, |
| "loss_total": 1.2887425422668457, |
| "lr": 0.0011657862229315751, |
| "router/selected_tokens_s0": 7461.875, |
| "router/selected_tokens_s1": 4166.625, |
| "step": 1950, |
| "tokens_trained": 6.389153712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5559889369548259, |
| "grad_norm": 0.7350674867630005, |
| "loss": 1.1879, |
| "loss_ce": 1.1737867593765259, |
| "loss_region": 0.060010842978954315, |
| "loss_total": 1.233797550201416, |
| "lr": 0.0011653793309477243, |
| "router/selected_tokens_s0": 7463.25, |
| "router/selected_tokens_s1": 4283.625, |
| "step": 1960, |
| "tokens_trained": 6.421919152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5588256152045954, |
| "grad_norm": 2.1251847743988037, |
| "loss": 1.1929, |
| "loss_ce": 1.1881052255630493, |
| "loss_region": 0.05957598611712456, |
| "loss_total": 1.2476812601089478, |
| "lr": 0.0011649724389638733, |
| "router/selected_tokens_s0": 7391.5, |
| "router/selected_tokens_s1": 4616.25, |
| "step": 1970, |
| "tokens_trained": 6.454684592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5616622934543649, |
| "grad_norm": 2.2398009300231934, |
| "loss": 1.1965, |
| "loss_ce": 1.1543619632720947, |
| "loss_region": 0.06045198440551758, |
| "loss_total": 1.2148139476776123, |
| "lr": 0.0011645655469800223, |
| "router/selected_tokens_s0": 7461.125, |
| "router/selected_tokens_s1": 3922.625, |
| "step": 1980, |
| "tokens_trained": 6.487450032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5644989717041344, |
| "grad_norm": 0.9572720527648926, |
| "loss": 1.1905, |
| "loss_ce": 1.1940102577209473, |
| "loss_region": 0.05966882035136223, |
| "loss_total": 1.2536790370941162, |
| "lr": 0.0011641586549961712, |
| "router/selected_tokens_s0": 7304.375, |
| "router/selected_tokens_s1": 4512.875, |
| "step": 1990, |
| "tokens_trained": 6.520215472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.567335649953904, |
| "grad_norm": 1.0482014417648315, |
| "loss": 1.1875, |
| "loss_ce": 1.1852117776870728, |
| "loss_region": 0.05964534357190132, |
| "loss_total": 1.2448570728302002, |
| "lr": 0.00116375176301232, |
| "router/selected_tokens_s0": 7341.375, |
| "router/selected_tokens_s1": 4539.25, |
| "step": 2000, |
| "tokens_trained": 6.552980912 |
| }, |
| { |
| "epoch": 0.567335649953904, |
| "eval_ppl": 3.0970225704644703, |
| "eval_runtime": 1.034, |
| "step": 2000, |
| "tokens_trained": 6.552980912 |
| }, |
| { |
| "epoch": 0.567335649953904, |
| "eval_F": 0.3193354949605438, |
| "eval_F_cds": 0.30131524678062416, |
| "eval_F_dig": 0.3408078756152824, |
| "eval_F_exon": 0.3181112928250933, |
| "eval_F_intron": 0.31980747812788923, |
| "eval_F_nig": 0.32217091538187553, |
| "eval_F_promoter": 0.31566914879719316, |
| "eval_F_utr": 0.3197454198768327, |
| "eval_G": 0.28967377288386786, |
| "eval_G_cds": 0.28399038089741535, |
| "eval_G_dig": 0.2875696160344558, |
| "eval_G_exon": 0.28512627374512517, |
| "eval_G_intron": 0.2903097508538823, |
| "eval_G_nig": 0.2919082544550291, |
| "eval_G_promoter": 0.28639406205967843, |
| "eval_G_utr": 0.28500934665605915, |
| "eval_avg_bp_per_token": 3.131502810621028, |
| "eval_bp_per_token/cds": 3.318783269961977, |
| "eval_bp_per_token/dig": 2.9342044933516735, |
| "eval_bp_per_token/exon": 3.1435539151067755, |
| "eval_bp_per_token/intron": 3.126881228212261, |
| "eval_bp_per_token/nig": 3.1039425108088365, |
| "eval_bp_per_token/promoter": 3.167873717816075, |
| "eval_bp_per_token/utr": 3.1274881134660326, |
| "eval_ppl_cds": 3.7688088011455108, |
| "eval_ppl_dig": 1.213764587451198, |
| "eval_ppl_exon": 3.475874625951551, |
| "eval_ppl_intron": 3.1372681996036618, |
| "eval_ppl_nig": 2.9655860866121753, |
| "eval_ppl_promoter": 3.4064425195757795, |
| "eval_ppl_utr": 3.401034912667057, |
| "step": 2000, |
| "tokens_trained": 6.552980912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5701723282036735, |
| "grad_norm": 0.4732998311519623, |
| "loss": 1.1901, |
| "loss_ce": 1.140292763710022, |
| "loss_region": 0.05972037836909294, |
| "loss_total": 1.2000131607055664, |
| "lr": 0.001163344871028469, |
| "router/selected_tokens_s0": 7414.375, |
| "router/selected_tokens_s1": 4512.625, |
| "step": 2010, |
| "tokens_trained": 6.585746352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.573009006453443, |
| "grad_norm": 0.9064376354217529, |
| "loss": 1.1917, |
| "loss_ce": 1.187543511390686, |
| "loss_region": 0.05966633930802345, |
| "loss_total": 1.247209906578064, |
| "lr": 0.001162937979044618, |
| "router/selected_tokens_s0": 7509.25, |
| "router/selected_tokens_s1": 4591.375, |
| "step": 2020, |
| "tokens_trained": 6.618511792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5758456847032125, |
| "grad_norm": 1.8362021446228027, |
| "loss": 1.1878, |
| "loss_ce": 1.1521724462509155, |
| "loss_region": 0.06033769249916077, |
| "loss_total": 1.212510108947754, |
| "lr": 0.001162531087060767, |
| "router/selected_tokens_s0": 7567.5, |
| "router/selected_tokens_s1": 4059.25, |
| "step": 2030, |
| "tokens_trained": 6.651277232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.578682362952982, |
| "grad_norm": 0.5258932113647461, |
| "loss": 1.1861, |
| "loss_ce": 1.1424130201339722, |
| "loss_region": 0.05959177017211914, |
| "loss_total": 1.2020047903060913, |
| "lr": 0.001162124195076916, |
| "router/selected_tokens_s0": 7294.75, |
| "router/selected_tokens_s1": 4573.5, |
| "step": 2040, |
| "tokens_trained": 6.684041872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5815190412027516, |
| "grad_norm": 0.8636136651039124, |
| "loss": 1.1844, |
| "loss_ce": 1.2008599042892456, |
| "loss_region": 0.06002184376120567, |
| "loss_total": 1.260881781578064, |
| "lr": 0.001161717303093065, |
| "router/selected_tokens_s0": 7491.875, |
| "router/selected_tokens_s1": 4283.5, |
| "step": 2050, |
| "tokens_trained": 6.716806512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5843557194525211, |
| "grad_norm": 1.8349488973617554, |
| "loss": 1.188, |
| "loss_ce": 1.151695966720581, |
| "loss_region": 0.060406751930713654, |
| "loss_total": 1.2121027708053589, |
| "lr": 0.001161310411109214, |
| "router/selected_tokens_s0": 7363.75, |
| "router/selected_tokens_s1": 3931.875, |
| "step": 2060, |
| "tokens_trained": 6.749571952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5871923977022906, |
| "grad_norm": 1.5168817043304443, |
| "loss": 1.1871, |
| "loss_ce": 1.1098576784133911, |
| "loss_region": 0.05994623526930809, |
| "loss_total": 1.1698038578033447, |
| "lr": 0.001160903519125363, |
| "router/selected_tokens_s0": 7911.125, |
| "router/selected_tokens_s1": 4589.875, |
| "step": 2070, |
| "tokens_trained": 6.782337392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5900290759520601, |
| "grad_norm": 0.41119372844696045, |
| "loss": 1.1909, |
| "loss_ce": 1.1331144571304321, |
| "loss_region": 0.060201916843652725, |
| "loss_total": 1.1933163404464722, |
| "lr": 0.001160496627141512, |
| "router/selected_tokens_s0": 7271.25, |
| "router/selected_tokens_s1": 4090.125, |
| "step": 2080, |
| "tokens_trained": 6.815102832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5928657542018296, |
| "grad_norm": 0.7637854814529419, |
| "loss": 1.1825, |
| "loss_ce": 1.1243622303009033, |
| "loss_region": 0.06005639582872391, |
| "loss_total": 1.1844186782836914, |
| "lr": 0.0011600897351576609, |
| "router/selected_tokens_s0": 7414.5, |
| "router/selected_tokens_s1": 4234.5, |
| "step": 2090, |
| "tokens_trained": 6.847868272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5957024324515992, |
| "grad_norm": 0.7214458584785461, |
| "loss": 1.1846, |
| "loss_ce": 1.2273354530334473, |
| "loss_region": 0.059937622398138046, |
| "loss_total": 1.2872730493545532, |
| "lr": 0.0011596828431738098, |
| "router/selected_tokens_s0": 7353.5, |
| "router/selected_tokens_s1": 4309.875, |
| "step": 2100, |
| "tokens_trained": 6.880633712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5985391107013687, |
| "grad_norm": 0.761725664138794, |
| "loss": 1.1841, |
| "loss_ce": 1.1755341291427612, |
| "loss_region": 0.06007400527596474, |
| "loss_total": 1.2356081008911133, |
| "lr": 0.0011592759511899588, |
| "router/selected_tokens_s0": 7109.5, |
| "router/selected_tokens_s1": 4157.375, |
| "step": 2110, |
| "tokens_trained": 6.913397016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6013757889511382, |
| "grad_norm": 1.6571940183639526, |
| "loss": 1.1874, |
| "loss_ce": 1.0919430255889893, |
| "loss_region": 0.060075193643569946, |
| "loss_total": 1.1520181894302368, |
| "lr": 0.0011588690592061078, |
| "router/selected_tokens_s0": 7679.625, |
| "router/selected_tokens_s1": 4325.75, |
| "step": 2120, |
| "tokens_trained": 6.946162296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6042124672009077, |
| "grad_norm": 1.00299870967865, |
| "loss": 1.1899, |
| "loss_ce": 0.9806433320045471, |
| "loss_region": 0.0603158213198185, |
| "loss_total": 1.040959119796753, |
| "lr": 0.0011584621672222567, |
| "router/selected_tokens_s0": 7512.25, |
| "router/selected_tokens_s1": 4050.5, |
| "step": 2130, |
| "tokens_trained": 6.978927736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6070491454506772, |
| "grad_norm": 0.7420610189437866, |
| "loss": 1.1868, |
| "loss_ce": 1.0650607347488403, |
| "loss_region": 0.060088276863098145, |
| "loss_total": 1.1251490116119385, |
| "lr": 0.0011580552752384057, |
| "router/selected_tokens_s0": 7468.875, |
| "router/selected_tokens_s1": 4230.25, |
| "step": 2140, |
| "tokens_trained": 7.011693176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6098858237004467, |
| "grad_norm": 0.422925740480423, |
| "loss": 1.1866, |
| "loss_ce": 1.140285849571228, |
| "loss_region": 0.059929173439741135, |
| "loss_total": 1.2002149820327759, |
| "lr": 0.0011576483832545547, |
| "router/selected_tokens_s0": 7197.0, |
| "router/selected_tokens_s1": 4285.5, |
| "step": 2150, |
| "tokens_trained": 7.044458616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6127225019502163, |
| "grad_norm": 1.301650881767273, |
| "loss": 1.1857, |
| "loss_ce": 1.1306639909744263, |
| "loss_region": 0.0599885918200016, |
| "loss_total": 1.19065260887146, |
| "lr": 0.0011572414912707036, |
| "router/selected_tokens_s0": 7413.875, |
| "router/selected_tokens_s1": 4286.25, |
| "step": 2160, |
| "tokens_trained": 7.077224056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6155591801999858, |
| "grad_norm": 1.5111349821090698, |
| "loss": 1.1789, |
| "loss_ce": 1.1107418537139893, |
| "loss_region": 0.05998992919921875, |
| "loss_total": 1.170731782913208, |
| "lr": 0.0011568345992868526, |
| "router/selected_tokens_s0": 7362.75, |
| "router/selected_tokens_s1": 4271.625, |
| "step": 2170, |
| "tokens_trained": 7.109989496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6183958584497553, |
| "grad_norm": 1.040898323059082, |
| "loss": 1.1843, |
| "loss_ce": 1.1505948305130005, |
| "loss_region": 0.05984164774417877, |
| "loss_total": 1.210436463356018, |
| "lr": 0.0011564277073030016, |
| "router/selected_tokens_s0": 7593.875, |
| "router/selected_tokens_s1": 4479.0, |
| "step": 2180, |
| "tokens_trained": 7.142754936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6212325366995248, |
| "grad_norm": 0.5109982490539551, |
| "loss": 1.1831, |
| "loss_ce": 1.1635215282440186, |
| "loss_region": 0.05977338179945946, |
| "loss_total": 1.2232948541641235, |
| "lr": 0.0011560208153191505, |
| "router/selected_tokens_s0": 7465.25, |
| "router/selected_tokens_s1": 4481.5, |
| "step": 2190, |
| "tokens_trained": 7.175520376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6240692149492943, |
| "grad_norm": 0.8081886768341064, |
| "loss": 1.1829, |
| "loss_ce": 1.1379996538162231, |
| "loss_region": 0.05986666679382324, |
| "loss_total": 1.1978663206100464, |
| "lr": 0.0011556139233352995, |
| "router/selected_tokens_s0": 7448.875, |
| "router/selected_tokens_s1": 4400.625, |
| "step": 2200, |
| "tokens_trained": 7.208285816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6269058931990639, |
| "grad_norm": 0.6905659437179565, |
| "loss": 1.181, |
| "loss_ce": 1.1509792804718018, |
| "loss_region": 0.05997806414961815, |
| "loss_total": 1.2109572887420654, |
| "lr": 0.0011552070313514487, |
| "router/selected_tokens_s0": 7552.125, |
| "router/selected_tokens_s1": 4350.75, |
| "step": 2210, |
| "tokens_trained": 7.241051256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6297425714488334, |
| "grad_norm": 1.281117558479309, |
| "loss": 1.1859, |
| "loss_ce": 1.0983253717422485, |
| "loss_region": 0.05995757132768631, |
| "loss_total": 1.158282995223999, |
| "lr": 0.0011548001393675976, |
| "router/selected_tokens_s0": 7399.875, |
| "router/selected_tokens_s1": 4318.0, |
| "step": 2220, |
| "tokens_trained": 7.273816696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6325792496986029, |
| "grad_norm": 0.9121662378311157, |
| "loss": 1.1886, |
| "loss_ce": 1.1557446718215942, |
| "loss_region": 0.05996396020054817, |
| "loss_total": 1.215708613395691, |
| "lr": 0.0011543932473837466, |
| "router/selected_tokens_s0": 7308.5, |
| "router/selected_tokens_s1": 4276.75, |
| "step": 2230, |
| "tokens_trained": 7.306582136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6354159279483724, |
| "grad_norm": 0.6554101705551147, |
| "loss": 1.1782, |
| "loss_ce": 1.0497093200683594, |
| "loss_region": 0.05988743156194687, |
| "loss_total": 1.1095967292785645, |
| "lr": 0.0011539863553998956, |
| "router/selected_tokens_s0": 7355.0, |
| "router/selected_tokens_s1": 4360.0, |
| "step": 2240, |
| "tokens_trained": 7.339347576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.638252606198142, |
| "grad_norm": 0.8508551120758057, |
| "loss": 1.1777, |
| "loss_ce": 1.1053940057754517, |
| "loss_region": 0.05997467786073685, |
| "loss_total": 1.165368676185608, |
| "lr": 0.0011535794634160443, |
| "router/selected_tokens_s0": 7460.125, |
| "router/selected_tokens_s1": 4319.25, |
| "step": 2250, |
| "tokens_trained": 7.372113016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6410892844479115, |
| "grad_norm": 0.4902251958847046, |
| "loss": 1.1781, |
| "loss_ce": 1.034322738647461, |
| "loss_region": 0.06009990721940994, |
| "loss_total": 1.094422698020935, |
| "lr": 0.0011531725714321933, |
| "router/selected_tokens_s0": 7439.0, |
| "router/selected_tokens_s1": 4203.25, |
| "step": 2260, |
| "tokens_trained": 7.404878456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.643925962697681, |
| "grad_norm": 1.0787978172302246, |
| "loss": 1.1791, |
| "loss_ce": 1.157869577407837, |
| "loss_region": 0.060018230229616165, |
| "loss_total": 1.2178877592086792, |
| "lr": 0.0011527656794483422, |
| "router/selected_tokens_s0": 7333.5, |
| "router/selected_tokens_s1": 4239.25, |
| "step": 2270, |
| "tokens_trained": 7.437643896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6467626409474505, |
| "grad_norm": 0.501467227935791, |
| "loss": 1.1763, |
| "loss_ce": 1.006744623184204, |
| "loss_region": 0.06016156077384949, |
| "loss_total": 1.066906213760376, |
| "lr": 0.0011523587874644914, |
| "router/selected_tokens_s0": 7145.375, |
| "router/selected_tokens_s1": 4089.875, |
| "step": 2280, |
| "tokens_trained": 7.470409336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.64959931919722, |
| "grad_norm": 0.2689010500907898, |
| "loss": 1.183, |
| "loss_ce": 1.1373342275619507, |
| "loss_region": 0.06009635701775551, |
| "loss_total": 1.1974306106567383, |
| "lr": 0.0011519518954806404, |
| "router/selected_tokens_s0": 7760.375, |
| "router/selected_tokens_s1": 4351.25, |
| "step": 2290, |
| "tokens_trained": 7.503173472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6524359974469895, |
| "grad_norm": 0.6574118733406067, |
| "loss": 1.1784, |
| "loss_ce": 1.1269201040267944, |
| "loss_region": 0.05999438464641571, |
| "loss_total": 1.1869144439697266, |
| "lr": 0.0011515450034967894, |
| "router/selected_tokens_s0": 7411.875, |
| "router/selected_tokens_s1": 4283.125, |
| "step": 2300, |
| "tokens_trained": 7.535938912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6552726756967591, |
| "grad_norm": 1.2753338813781738, |
| "loss": 1.1849, |
| "loss_ce": 1.122605562210083, |
| "loss_region": 0.059686850756406784, |
| "loss_total": 1.1822924613952637, |
| "lr": 0.0011511381115129383, |
| "router/selected_tokens_s0": 7782.0, |
| "router/selected_tokens_s1": 4731.875, |
| "step": 2310, |
| "tokens_trained": 7.568704352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6581093539465286, |
| "grad_norm": 0.6676866412162781, |
| "loss": 1.1793, |
| "loss_ce": 1.0851284265518188, |
| "loss_region": 0.06009310483932495, |
| "loss_total": 1.145221471786499, |
| "lr": 0.0011507312195290873, |
| "router/selected_tokens_s0": 7423.375, |
| "router/selected_tokens_s1": 4202.0, |
| "step": 2320, |
| "tokens_trained": 7.601469792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6609460321962981, |
| "grad_norm": 0.2944624423980713, |
| "loss": 1.1781, |
| "loss_ce": 1.1351754665374756, |
| "loss_region": 0.05992001295089722, |
| "loss_total": 1.1950955390930176, |
| "lr": 0.0011503243275452363, |
| "router/selected_tokens_s0": 7475.375, |
| "router/selected_tokens_s1": 4371.625, |
| "step": 2330, |
| "tokens_trained": 7.634233608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6637827104460676, |
| "grad_norm": 0.7615077495574951, |
| "loss": 1.1739, |
| "loss_ce": 1.1925767660140991, |
| "loss_region": 0.06029242277145386, |
| "loss_total": 1.2528691291809082, |
| "lr": 0.0011499174355613852, |
| "router/selected_tokens_s0": 7301.0, |
| "router/selected_tokens_s1": 4010.0, |
| "step": 2340, |
| "tokens_trained": 7.666998248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6666193886958371, |
| "grad_norm": 0.6357439160346985, |
| "loss": 1.1739, |
| "loss_ce": 1.0707632303237915, |
| "loss_region": 0.059961337596178055, |
| "loss_total": 1.130724549293518, |
| "lr": 0.0011495105435775342, |
| "router/selected_tokens_s0": 7490.25, |
| "router/selected_tokens_s1": 4340.25, |
| "step": 2350, |
| "tokens_trained": 7.699763688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6694560669456067, |
| "grad_norm": 0.5703539848327637, |
| "loss": 1.1754, |
| "loss_ce": 1.1037662029266357, |
| "loss_region": 0.05993586778640747, |
| "loss_total": 1.1637020111083984, |
| "lr": 0.0011491036515936831, |
| "router/selected_tokens_s0": 7448.25, |
| "router/selected_tokens_s1": 4343.25, |
| "step": 2360, |
| "tokens_trained": 7.732529128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6722927451953762, |
| "grad_norm": 0.24826520681381226, |
| "loss": 1.1704, |
| "loss_ce": 1.148972988128662, |
| "loss_region": 0.0599345862865448, |
| "loss_total": 1.2089076042175293, |
| "lr": 0.0011486967596098321, |
| "router/selected_tokens_s0": 7434.875, |
| "router/selected_tokens_s1": 4339.125, |
| "step": 2370, |
| "tokens_trained": 7.765294568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6751294234451457, |
| "grad_norm": 0.8287674188613892, |
| "loss": 1.1755, |
| "loss_ce": 1.1266905069351196, |
| "loss_region": 0.06008046492934227, |
| "loss_total": 1.1867709159851074, |
| "lr": 0.001148289867625981, |
| "router/selected_tokens_s0": 7483.75, |
| "router/selected_tokens_s1": 4241.0, |
| "step": 2380, |
| "tokens_trained": 7.798060008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6779661016949152, |
| "grad_norm": 0.5604900121688843, |
| "loss": 1.1775, |
| "loss_ce": 1.123670220375061, |
| "loss_region": 0.05985580384731293, |
| "loss_total": 1.1835260391235352, |
| "lr": 0.00114788297564213, |
| "router/selected_tokens_s0": 7468.375, |
| "router/selected_tokens_s1": 4420.75, |
| "step": 2390, |
| "tokens_trained": 7.830825448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6808027799446847, |
| "grad_norm": 1.3453503847122192, |
| "loss": 1.1807, |
| "loss_ce": 1.152555227279663, |
| "loss_region": 0.06042764335870743, |
| "loss_total": 1.2129828929901123, |
| "lr": 0.001147476083658279, |
| "router/selected_tokens_s0": 7370.375, |
| "router/selected_tokens_s1": 3917.0, |
| "step": 2400, |
| "tokens_trained": 7.863590888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6836394581944543, |
| "grad_norm": 0.28487667441368103, |
| "loss": 1.178, |
| "loss_ce": 1.0731216669082642, |
| "loss_region": 0.059980422258377075, |
| "loss_total": 1.1331020593643188, |
| "lr": 0.001147069191674428, |
| "router/selected_tokens_s0": 7738.75, |
| "router/selected_tokens_s1": 4453.125, |
| "step": 2410, |
| "tokens_trained": 7.896356328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6864761364442238, |
| "grad_norm": 0.657503604888916, |
| "loss": 1.1744, |
| "loss_ce": 0.9968506097793579, |
| "loss_region": 0.060376450419425964, |
| "loss_total": 1.0572270154953003, |
| "lr": 0.001146662299690577, |
| "router/selected_tokens_s0": 7406.125, |
| "router/selected_tokens_s1": 3959.25, |
| "step": 2420, |
| "tokens_trained": 7.929121768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6893128146939933, |
| "grad_norm": 0.4880315959453583, |
| "loss": 1.1715, |
| "loss_ce": 1.1423887014389038, |
| "loss_region": 0.05990506336092949, |
| "loss_total": 1.2022937536239624, |
| "lr": 0.001146255407706726, |
| "router/selected_tokens_s0": 7357.75, |
| "router/selected_tokens_s1": 4339.75, |
| "step": 2430, |
| "tokens_trained": 7.961887208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6921494929437628, |
| "grad_norm": 0.814704179763794, |
| "loss": 1.177, |
| "loss_ce": 1.1437619924545288, |
| "loss_region": 0.06004530191421509, |
| "loss_total": 1.2038073539733887, |
| "lr": 0.0011458485157228749, |
| "router/selected_tokens_s0": 7665.375, |
| "router/selected_tokens_s1": 4347.5, |
| "step": 2440, |
| "tokens_trained": 7.994651848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6949861711935323, |
| "grad_norm": 0.8780333399772644, |
| "loss": 1.1746, |
| "loss_ce": 1.0909539461135864, |
| "loss_region": 0.05988944694399834, |
| "loss_total": 1.1508433818817139, |
| "lr": 0.0011454416237390238, |
| "router/selected_tokens_s0": 7393.75, |
| "router/selected_tokens_s1": 4379.75, |
| "step": 2450, |
| "tokens_trained": 8.027417288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6978228494433019, |
| "grad_norm": 0.4150752127170563, |
| "loss": 1.1708, |
| "loss_ce": 1.1503171920776367, |
| "loss_region": 0.06001152843236923, |
| "loss_total": 1.2103286981582642, |
| "lr": 0.001145034731755173, |
| "router/selected_tokens_s0": 7436.25, |
| "router/selected_tokens_s1": 4275.125, |
| "step": 2460, |
| "tokens_trained": 8.060182704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7006595276930714, |
| "grad_norm": 0.3488692045211792, |
| "loss": 1.1695, |
| "loss_ce": 1.1709470748901367, |
| "loss_region": 0.06026313453912735, |
| "loss_total": 1.2312102317810059, |
| "lr": 0.001144627839771322, |
| "router/selected_tokens_s0": 7478.625, |
| "router/selected_tokens_s1": 4083.125, |
| "step": 2470, |
| "tokens_trained": 8.092948144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7034962059428409, |
| "grad_norm": 0.5676023960113525, |
| "loss": 1.1762, |
| "loss_ce": 1.111052393913269, |
| "loss_region": 0.05988939851522446, |
| "loss_total": 1.1709418296813965, |
| "lr": 0.001144220947787471, |
| "router/selected_tokens_s0": 7557.625, |
| "router/selected_tokens_s1": 4431.625, |
| "step": 2480, |
| "tokens_trained": 8.125713584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7063328841926104, |
| "grad_norm": 0.9211216568946838, |
| "loss": 1.1722, |
| "loss_ce": 1.1372982263565063, |
| "loss_region": 0.05975315347313881, |
| "loss_total": 1.1970514059066772, |
| "lr": 0.00114381405580362, |
| "router/selected_tokens_s0": 7306.625, |
| "router/selected_tokens_s1": 4463.875, |
| "step": 2490, |
| "tokens_trained": 8.158479016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7091695624423799, |
| "grad_norm": 0.38418033719062805, |
| "loss": 1.172, |
| "loss_ce": 1.0843875408172607, |
| "loss_region": 0.060160085558891296, |
| "loss_total": 1.1445475816726685, |
| "lr": 0.0011434071638197687, |
| "router/selected_tokens_s0": 7460.25, |
| "router/selected_tokens_s1": 4160.0, |
| "step": 2500, |
| "tokens_trained": 8.191244456 |
| }, |
| { |
| "epoch": 0.7091695624423799, |
| "eval_ppl": 3.0502517341523907, |
| "eval_runtime": 1.0296, |
| "step": 2500, |
| "tokens_trained": 8.191244456 |
| }, |
| { |
| "epoch": 0.7091695624423799, |
| "eval_F": 0.324281193843269, |
| "eval_F_cds": 0.33014069016085423, |
| "eval_F_dig": 0.3424486287991249, |
| "eval_F_exon": 0.32918186774017694, |
| "eval_F_intron": 0.32455070534671615, |
| "eval_F_nig": 0.32592670330229484, |
| "eval_F_promoter": 0.31897710094619564, |
| "eval_F_utr": 0.31848282614857376, |
| "eval_G": 0.2886181369760333, |
| "eval_G_cds": 0.2838436794687457, |
| "eval_G_dig": 0.30049420975271507, |
| "eval_G_exon": 0.2874631111879901, |
| "eval_G_intron": 0.2892678897551585, |
| "eval_G_nig": 0.29121496986385664, |
| "eval_G_promoter": 0.28373180247233265, |
| "eval_G_utr": 0.2818582472815584, |
| "eval_avg_bp_per_token": 3.0837434269571555, |
| "eval_bp_per_token/cds": 3.029011660188784, |
| "eval_bp_per_token/dig": 2.920146018708647, |
| "eval_bp_per_token/exon": 3.0378343949044586, |
| "eval_bp_per_token/intron": 3.081182642729752, |
| "eval_bp_per_token/nig": 3.0681745001805107, |
| "eval_bp_per_token/promoter": 3.1350212821975516, |
| "eval_bp_per_token/utr": 3.1398867313915857, |
| "eval_ppl_cds": 3.7501344975557402, |
| "eval_ppl_dig": 1.1912519609990055, |
| "eval_ppl_exon": 3.4368045839019854, |
| "eval_ppl_intron": 3.093198175869257, |
| "eval_ppl_nig": 2.911204542550375, |
| "eval_ppl_promoter": 3.3724010489561747, |
| "eval_ppl_utr": 3.3630147084441506, |
| "step": 2500, |
| "tokens_trained": 8.191244456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7120062406921495, |
| "grad_norm": 0.2484031319618225, |
| "loss": 1.1708, |
| "loss_ce": 1.0516343116760254, |
| "loss_region": 0.05984216183423996, |
| "loss_total": 1.1114764213562012, |
| "lr": 0.0011430002718359176, |
| "router/selected_tokens_s0": 7426.5, |
| "router/selected_tokens_s1": 4435.875, |
| "step": 2510, |
| "tokens_trained": 8.224009896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.714842918941919, |
| "grad_norm": 0.5577887296676636, |
| "loss": 1.1686, |
| "loss_ce": 1.1628156900405884, |
| "loss_region": 0.06016436591744423, |
| "loss_total": 1.22298002243042, |
| "lr": 0.0011425933798520666, |
| "router/selected_tokens_s0": 7422.25, |
| "router/selected_tokens_s1": 4144.75, |
| "step": 2520, |
| "tokens_trained": 8.256775336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7176795971916885, |
| "grad_norm": 1.2296240329742432, |
| "loss": 1.1737, |
| "loss_ce": 1.143703818321228, |
| "loss_region": 0.05981975421309471, |
| "loss_total": 1.2035235166549683, |
| "lr": 0.0011421864878682158, |
| "router/selected_tokens_s0": 7441.875, |
| "router/selected_tokens_s1": 4451.5, |
| "step": 2530, |
| "tokens_trained": 8.289540776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.720516275441458, |
| "grad_norm": 0.6363380551338196, |
| "loss": 1.1719, |
| "loss_ce": 1.0045700073242188, |
| "loss_region": 0.0602262057363987, |
| "loss_total": 1.0647962093353271, |
| "lr": 0.0011417795958843647, |
| "router/selected_tokens_s0": 7545.375, |
| "router/selected_tokens_s1": 4129.25, |
| "step": 2540, |
| "tokens_trained": 8.322306216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7233529536912275, |
| "grad_norm": 0.5893341302871704, |
| "loss": 1.1715, |
| "loss_ce": 1.094207763671875, |
| "loss_region": 0.05962364003062248, |
| "loss_total": 1.1538313627243042, |
| "lr": 0.0011413727039005137, |
| "router/selected_tokens_s0": 7538.5, |
| "router/selected_tokens_s1": 4675.25, |
| "step": 2550, |
| "tokens_trained": 8.355071656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7261896319409971, |
| "grad_norm": 0.721533477306366, |
| "loss": 1.1675, |
| "loss_ce": 1.1756376028060913, |
| "loss_region": 0.059939730912446976, |
| "loss_total": 1.2355773448944092, |
| "lr": 0.0011409658119166627, |
| "router/selected_tokens_s0": 7450.0, |
| "router/selected_tokens_s1": 4343.875, |
| "step": 2560, |
| "tokens_trained": 8.387835072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7290263101907666, |
| "grad_norm": 0.6618571281433105, |
| "loss": 1.1697, |
| "loss_ce": 1.139210820198059, |
| "loss_region": 0.06019924208521843, |
| "loss_total": 1.199410080909729, |
| "lr": 0.0011405589199328116, |
| "router/selected_tokens_s0": 7507.375, |
| "router/selected_tokens_s1": 4144.875, |
| "step": 2570, |
| "tokens_trained": 8.420600512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7318629884405361, |
| "grad_norm": 0.7483921051025391, |
| "loss": 1.1723, |
| "loss_ce": 1.0926769971847534, |
| "loss_region": 0.06003560498356819, |
| "loss_total": 1.1527125835418701, |
| "lr": 0.0011401520279489606, |
| "router/selected_tokens_s0": 7537.875, |
| "router/selected_tokens_s1": 4298.0, |
| "step": 2580, |
| "tokens_trained": 8.453365928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7346996666903056, |
| "grad_norm": 0.42239630222320557, |
| "loss": 1.1731, |
| "loss_ce": 1.0441555976867676, |
| "loss_region": 0.059957705438137054, |
| "loss_total": 1.1041133403778076, |
| "lr": 0.0011397451359651096, |
| "router/selected_tokens_s0": 7481.375, |
| "router/selected_tokens_s1": 4350.75, |
| "step": 2590, |
| "tokens_trained": 8.486131368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7375363449400751, |
| "grad_norm": 0.41389554738998413, |
| "loss": 1.1702, |
| "loss_ce": 1.0931426286697388, |
| "loss_region": 0.05995481461286545, |
| "loss_total": 1.15309739112854, |
| "lr": 0.0011393382439812585, |
| "router/selected_tokens_s0": 7410.25, |
| "router/selected_tokens_s1": 4320.125, |
| "step": 2600, |
| "tokens_trained": 8.518896808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7403730231898447, |
| "grad_norm": 0.9596730470657349, |
| "loss": 1.1648, |
| "loss_ce": 1.0973268747329712, |
| "loss_region": 0.060223136097192764, |
| "loss_total": 1.1575499773025513, |
| "lr": 0.0011389313519974075, |
| "router/selected_tokens_s0": 7651.375, |
| "router/selected_tokens_s1": 4185.375, |
| "step": 2610, |
| "tokens_trained": 8.551662248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7432097014396142, |
| "grad_norm": 0.45995089411735535, |
| "loss": 1.1699, |
| "loss_ce": 1.129144310951233, |
| "loss_region": 0.05994933471083641, |
| "loss_total": 1.1890935897827148, |
| "lr": 0.0011385244600135565, |
| "router/selected_tokens_s0": 7339.375, |
| "router/selected_tokens_s1": 4301.25, |
| "step": 2620, |
| "tokens_trained": 8.584426888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7460463796893837, |
| "grad_norm": 1.477419376373291, |
| "loss": 1.1667, |
| "loss_ce": 1.1206713914871216, |
| "loss_region": 0.060068678110837936, |
| "loss_total": 1.1807401180267334, |
| "lr": 0.0011381175680297054, |
| "router/selected_tokens_s0": 7434.375, |
| "router/selected_tokens_s1": 4225.5, |
| "step": 2630, |
| "tokens_trained": 8.617192328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7488830579391532, |
| "grad_norm": 0.9513674974441528, |
| "loss": 1.1693, |
| "loss_ce": 1.1470342874526978, |
| "loss_region": 0.05986784026026726, |
| "loss_total": 1.2069021463394165, |
| "lr": 0.0011377106760458544, |
| "router/selected_tokens_s0": 7393.125, |
| "router/selected_tokens_s1": 4392.625, |
| "step": 2640, |
| "tokens_trained": 8.649951656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7517197361889227, |
| "grad_norm": 0.6216477155685425, |
| "loss": 1.1689, |
| "loss_ce": 0.9593722820281982, |
| "loss_region": 0.059885814785957336, |
| "loss_total": 1.0192581415176392, |
| "lr": 0.0011373037840620034, |
| "router/selected_tokens_s0": 7577.0, |
| "router/selected_tokens_s1": 4453.25, |
| "step": 2650, |
| "tokens_trained": 8.682717096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7545564144386923, |
| "grad_norm": 0.35981065034866333, |
| "loss": 1.1644, |
| "loss_ce": 1.1608963012695312, |
| "loss_region": 0.05997583642601967, |
| "loss_total": 1.220872163772583, |
| "lr": 0.0011368968920781523, |
| "router/selected_tokens_s0": 7422.75, |
| "router/selected_tokens_s1": 4303.25, |
| "step": 2660, |
| "tokens_trained": 8.715482536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7573930926884618, |
| "grad_norm": 0.053441207855939865, |
| "loss": 1.1648, |
| "loss_ce": 1.0992127656936646, |
| "loss_region": 0.05997667461633682, |
| "loss_total": 1.1591894626617432, |
| "lr": 0.0011364900000943013, |
| "router/selected_tokens_s0": 7271.625, |
| "router/selected_tokens_s1": 4261.375, |
| "step": 2670, |
| "tokens_trained": 8.748247976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7602297709382313, |
| "grad_norm": 0.7620555758476257, |
| "loss": 1.1704, |
| "loss_ce": 1.111451506614685, |
| "loss_region": 0.05993987247347832, |
| "loss_total": 1.1713913679122925, |
| "lr": 0.0011360831081104503, |
| "router/selected_tokens_s0": 7619.375, |
| "router/selected_tokens_s1": 4424.125, |
| "step": 2680, |
| "tokens_trained": 8.781013416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7630664491880008, |
| "grad_norm": 0.8613774180412292, |
| "loss": 1.1739, |
| "loss_ce": 1.1915504932403564, |
| "loss_region": 0.06020931154489517, |
| "loss_total": 1.2517597675323486, |
| "lr": 0.0011356762161265992, |
| "router/selected_tokens_s0": 7298.0, |
| "router/selected_tokens_s1": 4064.875, |
| "step": 2690, |
| "tokens_trained": 8.813778696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7659031274377703, |
| "grad_norm": 0.5968960523605347, |
| "loss": 1.1583, |
| "loss_ce": 1.1074433326721191, |
| "loss_region": 0.060063157230615616, |
| "loss_total": 1.167506456375122, |
| "lr": 0.0011352693241427482, |
| "router/selected_tokens_s0": 7549.875, |
| "router/selected_tokens_s1": 4278.625, |
| "step": 2700, |
| "tokens_trained": 8.846544136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7687398056875399, |
| "grad_norm": 0.6496450901031494, |
| "loss": 1.1657, |
| "loss_ce": 1.1257076263427734, |
| "loss_region": 0.05991235002875328, |
| "loss_total": 1.1856199502944946, |
| "lr": 0.0011348624321588974, |
| "router/selected_tokens_s0": 7512.375, |
| "router/selected_tokens_s1": 4401.125, |
| "step": 2710, |
| "tokens_trained": 8.879309576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7715764839373094, |
| "grad_norm": 0.8424694538116455, |
| "loss": 1.1617, |
| "loss_ce": 1.1279290914535522, |
| "loss_region": 0.060039110481739044, |
| "loss_total": 1.1879682540893555, |
| "lr": 0.0011344555401750463, |
| "router/selected_tokens_s0": 7467.5, |
| "router/selected_tokens_s1": 4261.25, |
| "step": 2720, |
| "tokens_trained": 8.912075016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7744131621870789, |
| "grad_norm": 0.7486598491668701, |
| "loss": 1.1733, |
| "loss_ce": 1.1374742984771729, |
| "loss_region": 0.0601111575961113, |
| "loss_total": 1.1975854635238647, |
| "lr": 0.0011340486481911953, |
| "router/selected_tokens_s0": 7572.375, |
| "router/selected_tokens_s1": 4257.125, |
| "step": 2730, |
| "tokens_trained": 8.944840456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7772498404368484, |
| "grad_norm": 0.5935088396072388, |
| "loss": 1.1695, |
| "loss_ce": 1.0936038494110107, |
| "loss_region": 0.059866975992918015, |
| "loss_total": 1.1534708738327026, |
| "lr": 0.001133641756207344, |
| "router/selected_tokens_s0": 7294.75, |
| "router/selected_tokens_s1": 4377.5, |
| "step": 2740, |
| "tokens_trained": 8.977605896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7800865186866179, |
| "grad_norm": 0.18403343856334686, |
| "loss": 1.1649, |
| "loss_ce": 1.1556285619735718, |
| "loss_region": 0.0601109080016613, |
| "loss_total": 1.2157394886016846, |
| "lr": 0.001133234864223493, |
| "router/selected_tokens_s0": 7394.0, |
| "router/selected_tokens_s1": 4183.875, |
| "step": 2750, |
| "tokens_trained": 9.010371336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7829231969363875, |
| "grad_norm": 0.18721617758274078, |
| "loss": 1.1593, |
| "loss_ce": 1.1401076316833496, |
| "loss_region": 0.05992697551846504, |
| "loss_total": 1.2000346183776855, |
| "lr": 0.001132827972239642, |
| "router/selected_tokens_s0": 7434.5, |
| "router/selected_tokens_s1": 4354.125, |
| "step": 2760, |
| "tokens_trained": 9.043136776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.785759875186157, |
| "grad_norm": 0.4251779317855835, |
| "loss": 1.1691, |
| "loss_ce": 1.1821259260177612, |
| "loss_region": 0.06003726273775101, |
| "loss_total": 1.2421631813049316, |
| "lr": 0.001132421080255791, |
| "router/selected_tokens_s0": 7411.75, |
| "router/selected_tokens_s1": 4242.0, |
| "step": 2770, |
| "tokens_trained": 9.075902216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7885965534359265, |
| "grad_norm": 0.7461664080619812, |
| "loss": 1.1679, |
| "loss_ce": 1.1305651664733887, |
| "loss_region": 0.06012176349759102, |
| "loss_total": 1.1906869411468506, |
| "lr": 0.0011320141882719401, |
| "router/selected_tokens_s0": 7582.125, |
| "router/selected_tokens_s1": 4240.125, |
| "step": 2780, |
| "tokens_trained": 9.108667656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.791433231685696, |
| "grad_norm": 0.46765202283859253, |
| "loss": 1.1629, |
| "loss_ce": 1.0436062812805176, |
| "loss_region": 0.06002155691385269, |
| "loss_total": 1.1036278009414673, |
| "lr": 0.001131607296288089, |
| "router/selected_tokens_s0": 7352.75, |
| "router/selected_tokens_s1": 4238.875, |
| "step": 2790, |
| "tokens_trained": 9.14143004 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7942699099354655, |
| "grad_norm": 0.15761826932430267, |
| "loss": 1.1619, |
| "loss_ce": 0.9722632765769958, |
| "loss_region": 0.05996977165341377, |
| "loss_total": 1.0322329998016357, |
| "lr": 0.001131200404304238, |
| "router/selected_tokens_s0": 7640.125, |
| "router/selected_tokens_s1": 4418.75, |
| "step": 2800, |
| "tokens_trained": 9.17419548 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.797106588185235, |
| "grad_norm": 0.6991495490074158, |
| "loss": 1.1656, |
| "loss_ce": 1.0889579057693481, |
| "loss_region": 0.05980755016207695, |
| "loss_total": 1.1487654447555542, |
| "lr": 0.001130793512320387, |
| "router/selected_tokens_s0": 7576.375, |
| "router/selected_tokens_s1": 4533.5, |
| "step": 2810, |
| "tokens_trained": 9.20696092 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7999432664350046, |
| "grad_norm": 7.605064868927002, |
| "loss": 1.1754, |
| "loss_ce": 1.0953309535980225, |
| "loss_region": 0.06001967564225197, |
| "loss_total": 1.155350685119629, |
| "lr": 0.001130386620336536, |
| "router/selected_tokens_s0": 7465.375, |
| "router/selected_tokens_s1": 4280.0, |
| "step": 2820, |
| "tokens_trained": 9.23972636 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8027799446847741, |
| "grad_norm": 1.1716810464859009, |
| "loss": 1.1723, |
| "loss_ce": 1.0848501920700073, |
| "loss_region": 0.06025412306189537, |
| "loss_total": 1.1451042890548706, |
| "lr": 0.001129979728352685, |
| "router/selected_tokens_s0": 7606.25, |
| "router/selected_tokens_s1": 4139.5, |
| "step": 2830, |
| "tokens_trained": 9.2724918 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8056166229345436, |
| "grad_norm": 0.22220773994922638, |
| "loss": 1.1679, |
| "loss_ce": 1.0692635774612427, |
| "loss_region": 0.05984492972493172, |
| "loss_total": 1.1291085481643677, |
| "lr": 0.001129572836368834, |
| "router/selected_tokens_s0": 7514.375, |
| "router/selected_tokens_s1": 4475.875, |
| "step": 2840, |
| "tokens_trained": 9.30525692 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8084533011843131, |
| "grad_norm": 0.157245472073555, |
| "loss": 1.1629, |
| "loss_ce": 1.1122710704803467, |
| "loss_region": 0.06007363274693489, |
| "loss_total": 1.17234468460083, |
| "lr": 0.0011291659443849829, |
| "router/selected_tokens_s0": 7484.0, |
| "router/selected_tokens_s1": 4237.625, |
| "step": 2850, |
| "tokens_trained": 9.33802236 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8112899794340827, |
| "grad_norm": 0.20226812362670898, |
| "loss": 1.1579, |
| "loss_ce": 1.1239492893218994, |
| "loss_region": 0.06007097288966179, |
| "loss_total": 1.1840202808380127, |
| "lr": 0.0011287590524011318, |
| "router/selected_tokens_s0": 7343.75, |
| "router/selected_tokens_s1": 4189.625, |
| "step": 2860, |
| "tokens_trained": 9.3707878 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8141266576838522, |
| "grad_norm": 0.8050180077552795, |
| "loss": 1.1591, |
| "loss_ce": 1.0115890502929688, |
| "loss_region": 0.05998658388853073, |
| "loss_total": 1.07157564163208, |
| "lr": 0.0011283521604172808, |
| "router/selected_tokens_s0": 7345.0, |
| "router/selected_tokens_s1": 4284.0, |
| "step": 2870, |
| "tokens_trained": 9.403548272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8169633359336217, |
| "grad_norm": 0.23213136196136475, |
| "loss": 1.1609, |
| "loss_ce": 1.0894582271575928, |
| "loss_region": 0.059960782527923584, |
| "loss_total": 1.1494190692901611, |
| "lr": 0.0011279452684334298, |
| "router/selected_tokens_s0": 7514.125, |
| "router/selected_tokens_s1": 4359.625, |
| "step": 2880, |
| "tokens_trained": 9.436313712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8198000141833912, |
| "grad_norm": 0.15889334678649902, |
| "loss": 1.1619, |
| "loss_ce": 1.178969383239746, |
| "loss_region": 0.059914231300354004, |
| "loss_total": 1.2388836145401, |
| "lr": 0.0011275383764495787, |
| "router/selected_tokens_s0": 7517.75, |
| "router/selected_tokens_s1": 4405.625, |
| "step": 2890, |
| "tokens_trained": 9.469079152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8226366924331607, |
| "grad_norm": 1.0330082178115845, |
| "loss": 1.1673, |
| "loss_ce": 1.0116691589355469, |
| "loss_region": 0.05996264889836311, |
| "loss_total": 1.0716317892074585, |
| "lr": 0.0011271314844657277, |
| "router/selected_tokens_s0": 7586.25, |
| "router/selected_tokens_s1": 4396.125, |
| "step": 2900, |
| "tokens_trained": 9.50184356 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8254733706829303, |
| "grad_norm": 0.11898782849311829, |
| "loss": 1.1614, |
| "loss_ce": 1.0050482749938965, |
| "loss_region": 0.059950973838567734, |
| "loss_total": 1.0649992227554321, |
| "lr": 0.0011267245924818767, |
| "router/selected_tokens_s0": 7383.25, |
| "router/selected_tokens_s1": 4316.5, |
| "step": 2910, |
| "tokens_trained": 9.534608992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8283100489326998, |
| "grad_norm": 0.334542453289032, |
| "loss": 1.1596, |
| "loss_ce": 1.192863941192627, |
| "loss_region": 0.05996337905526161, |
| "loss_total": 1.2528272867202759, |
| "lr": 0.0011263177004980256, |
| "router/selected_tokens_s0": 7447.75, |
| "router/selected_tokens_s1": 4325.875, |
| "step": 2920, |
| "tokens_trained": 9.567373632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8311467271824693, |
| "grad_norm": 2.9175424575805664, |
| "loss": 1.1657, |
| "loss_ce": 1.1350030899047852, |
| "loss_region": 0.06023212894797325, |
| "loss_total": 1.195235252380371, |
| "lr": 0.0011259108085141746, |
| "router/selected_tokens_s0": 7292.75, |
| "router/selected_tokens_s1": 4036.5, |
| "step": 2930, |
| "tokens_trained": 9.600139072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8339834054322388, |
| "grad_norm": 0.56610107421875, |
| "loss": 1.1666, |
| "loss_ce": 1.0537909269332886, |
| "loss_region": 0.0598357617855072, |
| "loss_total": 1.1136267185211182, |
| "lr": 0.0011255039165303236, |
| "router/selected_tokens_s0": 7546.5, |
| "router/selected_tokens_s1": 4522.375, |
| "step": 2940, |
| "tokens_trained": 9.632904512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8368200836820083, |
| "grad_norm": 0.4306299090385437, |
| "loss": 1.1627, |
| "loss_ce": 0.9236807227134705, |
| "loss_region": 0.060068003833293915, |
| "loss_total": 0.983748733997345, |
| "lr": 0.0011250970245464725, |
| "router/selected_tokens_s0": 7534.5, |
| "router/selected_tokens_s1": 4282.375, |
| "step": 2950, |
| "tokens_trained": 9.665669952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8396567619317779, |
| "grad_norm": 0.26961612701416016, |
| "loss": 1.1591, |
| "loss_ce": 1.145656704902649, |
| "loss_region": 0.05994835123419762, |
| "loss_total": 1.2056050300598145, |
| "lr": 0.0011246901325626217, |
| "router/selected_tokens_s0": 7376.0, |
| "router/selected_tokens_s1": 4319.875, |
| "step": 2960, |
| "tokens_trained": 9.698432616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8424934401815474, |
| "grad_norm": 0.3499460220336914, |
| "loss": 1.1564, |
| "loss_ce": 1.1160905361175537, |
| "loss_region": 0.05992301553487778, |
| "loss_total": 1.1760135889053345, |
| "lr": 0.0011242832405787707, |
| "router/selected_tokens_s0": 7367.625, |
| "router/selected_tokens_s1": 4343.5, |
| "step": 2970, |
| "tokens_trained": 9.731196464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8453301184313169, |
| "grad_norm": 0.33717259764671326, |
| "loss": 1.1596, |
| "loss_ce": 1.1449989080429077, |
| "loss_region": 0.06000290438532829, |
| "loss_total": 1.2050018310546875, |
| "lr": 0.0011238763485949196, |
| "router/selected_tokens_s0": 7521.125, |
| "router/selected_tokens_s1": 4322.125, |
| "step": 2980, |
| "tokens_trained": 9.763955848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8481667966810864, |
| "grad_norm": 0.577868640422821, |
| "loss": 1.1567, |
| "loss_ce": 1.1596364974975586, |
| "loss_region": 0.059965357184410095, |
| "loss_total": 1.2196018695831299, |
| "lr": 0.0011234694566110684, |
| "router/selected_tokens_s0": 7441.375, |
| "router/selected_tokens_s1": 4320.875, |
| "step": 2990, |
| "tokens_trained": 9.796721288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8510034749308559, |
| "grad_norm": 0.31575798988342285, |
| "loss": 1.1571, |
| "loss_ce": 1.151265263557434, |
| "loss_region": 0.05999479070305824, |
| "loss_total": 1.2112600803375244, |
| "lr": 0.0011230625646272174, |
| "router/selected_tokens_s0": 7448.0, |
| "router/selected_tokens_s1": 4295.125, |
| "step": 3000, |
| "tokens_trained": 9.829486728 |
| }, |
| { |
| "epoch": 0.8510034749308559, |
| "eval_ppl": 3.0109347418579757, |
| "eval_runtime": 1.041, |
| "step": 3000, |
| "tokens_trained": 9.829486728 |
| }, |
| { |
| "epoch": 0.8510034749308559, |
| "eval_F": 0.3265157434174545, |
| "eval_F_cds": 0.32704733971861966, |
| "eval_F_dig": 0.32940073443237755, |
| "eval_F_exon": 0.3268335639703107, |
| "eval_F_intron": 0.3263412398381537, |
| "eval_F_nig": 0.33125031801760546, |
| "eval_F_promoter": 0.32055409364910514, |
| "eval_F_utr": 0.32332706331005695, |
| "eval_G": 0.2973011006414075, |
| "eval_G_cds": 0.2947680577912332, |
| "eval_G_dig": 0.29819865077349794, |
| "eval_G_exon": 0.2960844786555961, |
| "eval_G_intron": 0.29743575612920226, |
| "eval_G_nig": 0.30035083925839884, |
| "eval_G_promoter": 0.29358059830951516, |
| "eval_G_utr": 0.2933570700417429, |
| "eval_avg_bp_per_token": 3.062639459689046, |
| "eval_bp_per_token/cds": 3.0576613185735306, |
| "eval_bp_per_token/dig": 3.0358159392789372, |
| "eval_bp_per_token/exon": 3.0596612779060814, |
| "eval_bp_per_token/intron": 3.064277136704947, |
| "eval_bp_per_token/nig": 3.0188650262574286, |
| "eval_bp_per_token/promoter": 3.11959828251219, |
| "eval_bp_per_token/utr": 3.092843481032834, |
| "eval_ppl_cds": 3.7454664560279944, |
| "eval_ppl_dig": 1.1519394255345423, |
| "eval_ppl_exon": 3.40224242349742, |
| "eval_ppl_intron": 3.059094676849309, |
| "eval_ppl_nig": 2.858553832785096, |
| "eval_ppl_promoter": 3.339582435304094, |
| "eval_ppl_utr": 3.3378419038139535, |
| "step": 3000, |
| "tokens_trained": 9.829486728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8538401531806254, |
| "grad_norm": 1.484962821006775, |
| "loss": 1.1514, |
| "loss_ce": 1.1324831247329712, |
| "loss_region": 0.05994853004813194, |
| "loss_total": 1.1924316883087158, |
| "lr": 0.0011226556726433663, |
| "router/selected_tokens_s0": 7517.5, |
| "router/selected_tokens_s1": 4376.375, |
| "step": 3010, |
| "tokens_trained": 9.862252168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.856676831430395, |
| "grad_norm": 1.3830934762954712, |
| "loss": 1.1595, |
| "loss_ce": 1.1091521978378296, |
| "loss_region": 0.06006480008363724, |
| "loss_total": 1.1692169904708862, |
| "lr": 0.0011222487806595153, |
| "router/selected_tokens_s0": 7292.625, |
| "router/selected_tokens_s1": 4178.875, |
| "step": 3020, |
| "tokens_trained": 9.895016808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8595135096801645, |
| "grad_norm": 0.5553335547447205, |
| "loss": 1.1541, |
| "loss_ce": 0.938005268573761, |
| "loss_region": 0.060082145035266876, |
| "loss_total": 0.9980874061584473, |
| "lr": 0.0011218418886756645, |
| "router/selected_tokens_s0": 7320.125, |
| "router/selected_tokens_s1": 4164.75, |
| "step": 3030, |
| "tokens_trained": 9.927782248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.862350187929934, |
| "grad_norm": 0.16215354204177856, |
| "loss": 1.1601, |
| "loss_ce": 1.1301517486572266, |
| "loss_region": 0.05998187139630318, |
| "loss_total": 1.1901335716247559, |
| "lr": 0.0011214349966918134, |
| "router/selected_tokens_s0": 7467.875, |
| "router/selected_tokens_s1": 4321.375, |
| "step": 3040, |
| "tokens_trained": 9.960547688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8651868661797035, |
| "grad_norm": 0.33606576919555664, |
| "loss": 1.1614, |
| "loss_ce": 1.0460857152938843, |
| "loss_region": 0.060056839138269424, |
| "loss_total": 1.106142520904541, |
| "lr": 0.0011210281047079624, |
| "router/selected_tokens_s0": 7379.875, |
| "router/selected_tokens_s1": 4211.75, |
| "step": 3050, |
| "tokens_trained": 9.993313128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.868023544429473, |
| "grad_norm": 0.28386250138282776, |
| "loss": 1.1553, |
| "loss_ce": 1.0485560894012451, |
| "loss_region": 0.06009070575237274, |
| "loss_total": 1.1086467504501343, |
| "lr": 0.0011206212127241114, |
| "router/selected_tokens_s0": 7455.875, |
| "router/selected_tokens_s1": 4209.875, |
| "step": 3060, |
| "tokens_trained": 10.026078568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8708602226792426, |
| "grad_norm": 0.26301583647727966, |
| "loss": 1.1555, |
| "loss_ce": 1.0704364776611328, |
| "loss_region": 0.060045089572668076, |
| "loss_total": 1.1304816007614136, |
| "lr": 0.0011202143207402603, |
| "router/selected_tokens_s0": 7333.625, |
| "router/selected_tokens_s1": 4212.0, |
| "step": 3070, |
| "tokens_trained": 10.058844008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8736969009290121, |
| "grad_norm": 0.6406818628311157, |
| "loss": 1.1644, |
| "loss_ce": 1.0923793315887451, |
| "loss_region": 0.06021547690033913, |
| "loss_total": 1.152594804763794, |
| "lr": 0.0011198074287564093, |
| "router/selected_tokens_s0": 7350.0, |
| "router/selected_tokens_s1": 4048.75, |
| "step": 3080, |
| "tokens_trained": 10.091604144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8765335791787816, |
| "grad_norm": 0.3559488356113434, |
| "loss": 1.1612, |
| "loss_ce": 1.1637524366378784, |
| "loss_region": 0.05999548360705376, |
| "loss_total": 1.223747968673706, |
| "lr": 0.0011194005367725583, |
| "router/selected_tokens_s0": 7453.25, |
| "router/selected_tokens_s1": 4298.0, |
| "step": 3090, |
| "tokens_trained": 10.124368784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8793702574285511, |
| "grad_norm": 0.6486785411834717, |
| "loss": 1.1579, |
| "loss_ce": 1.0859977006912231, |
| "loss_region": 0.05990362912416458, |
| "loss_total": 1.1459013223648071, |
| "lr": 0.0011189936447887072, |
| "router/selected_tokens_s0": 7465.375, |
| "router/selected_tokens_s1": 4406.875, |
| "step": 3100, |
| "tokens_trained": 10.157134224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8822069356783206, |
| "grad_norm": 0.1741045117378235, |
| "loss": 1.1547, |
| "loss_ce": 1.1454678773880005, |
| "loss_region": 0.05989646539092064, |
| "loss_total": 1.2053643465042114, |
| "lr": 0.0011185867528048562, |
| "router/selected_tokens_s0": 7361.125, |
| "router/selected_tokens_s1": 4378.625, |
| "step": 3110, |
| "tokens_trained": 10.189899664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8850436139280902, |
| "grad_norm": 0.682843804359436, |
| "loss": 1.1585, |
| "loss_ce": 1.1063830852508545, |
| "loss_region": 0.060072798281908035, |
| "loss_total": 1.166455864906311, |
| "lr": 0.0011181798608210052, |
| "router/selected_tokens_s0": 7449.5, |
| "router/selected_tokens_s1": 4223.0, |
| "step": 3120, |
| "tokens_trained": 10.222665104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8878802921778597, |
| "grad_norm": 0.8256351351737976, |
| "loss": 1.159, |
| "loss_ce": 1.0325148105621338, |
| "loss_region": 0.06016336381435394, |
| "loss_total": 1.092678189277649, |
| "lr": 0.0011177729688371541, |
| "router/selected_tokens_s0": 7448.0, |
| "router/selected_tokens_s1": 4129.875, |
| "step": 3130, |
| "tokens_trained": 10.255429744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8907169704276292, |
| "grad_norm": 0.585247278213501, |
| "loss": 1.1541, |
| "loss_ce": 1.0111068487167358, |
| "loss_region": 0.059877362102270126, |
| "loss_total": 1.0709842443466187, |
| "lr": 0.001117366076853303, |
| "router/selected_tokens_s0": 7432.375, |
| "router/selected_tokens_s1": 4420.875, |
| "step": 3140, |
| "tokens_trained": 10.288194384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8935536486773987, |
| "grad_norm": 0.34178733825683594, |
| "loss": 1.1569, |
| "loss_ce": 1.1468701362609863, |
| "loss_region": 0.060133885592222214, |
| "loss_total": 1.2070040702819824, |
| "lr": 0.001116959184869452, |
| "router/selected_tokens_s0": 7415.125, |
| "router/selected_tokens_s1": 4145.0, |
| "step": 3150, |
| "tokens_trained": 10.320959664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8963903269271682, |
| "grad_norm": 0.3433026969432831, |
| "loss": 1.1526, |
| "loss_ce": 1.1024417877197266, |
| "loss_region": 0.060020167380571365, |
| "loss_total": 1.1624619960784912, |
| "lr": 0.001116552292885601, |
| "router/selected_tokens_s0": 7450.875, |
| "router/selected_tokens_s1": 4279.125, |
| "step": 3160, |
| "tokens_trained": 10.353725104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8992270051769378, |
| "grad_norm": 0.9464390277862549, |
| "loss": 1.1564, |
| "loss_ce": 1.0875449180603027, |
| "loss_region": 0.0599987767636776, |
| "loss_total": 1.1475436687469482, |
| "lr": 0.00111614540090175, |
| "router/selected_tokens_s0": 7436.0, |
| "router/selected_tokens_s1": 4293.75, |
| "step": 3170, |
| "tokens_trained": 10.386490544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9020636834267073, |
| "grad_norm": 0.18355347216129303, |
| "loss": 1.1626, |
| "loss_ce": 1.0882506370544434, |
| "loss_region": 0.05997658893465996, |
| "loss_total": 1.1482272148132324, |
| "lr": 0.001115738508917899, |
| "router/selected_tokens_s0": 7366.125, |
| "router/selected_tokens_s1": 4289.25, |
| "step": 3180, |
| "tokens_trained": 10.419255984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9049003616764768, |
| "grad_norm": 0.21294108033180237, |
| "loss": 1.1586, |
| "loss_ce": 1.096394658088684, |
| "loss_region": 0.06007112190127373, |
| "loss_total": 1.156465768814087, |
| "lr": 0.001115331616934048, |
| "router/selected_tokens_s0": 7388.875, |
| "router/selected_tokens_s1": 4189.5, |
| "step": 3190, |
| "tokens_trained": 10.452021424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9077370399262463, |
| "grad_norm": 0.6308612823486328, |
| "loss": 1.1545, |
| "loss_ce": 1.1357101202011108, |
| "loss_region": 0.059982795268297195, |
| "loss_total": 1.1956928968429565, |
| "lr": 0.0011149247249501969, |
| "router/selected_tokens_s0": 7421.625, |
| "router/selected_tokens_s1": 4299.75, |
| "step": 3200, |
| "tokens_trained": 10.484786064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9105737181760158, |
| "grad_norm": 0.5103173851966858, |
| "loss": 1.151, |
| "loss_ce": 1.0303833484649658, |
| "loss_region": 0.05991167947649956, |
| "loss_total": 1.0902950763702393, |
| "lr": 0.001114517832966346, |
| "router/selected_tokens_s0": 7488.75, |
| "router/selected_tokens_s1": 4412.0, |
| "step": 3210, |
| "tokens_trained": 10.517551504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9134103964257854, |
| "grad_norm": 0.2153053730726242, |
| "loss": 1.1559, |
| "loss_ce": 1.0447967052459717, |
| "loss_region": 0.06009405478835106, |
| "loss_total": 1.1048907041549683, |
| "lr": 0.001114110940982495, |
| "router/selected_tokens_s0": 7537.0, |
| "router/selected_tokens_s1": 4243.25, |
| "step": 3220, |
| "tokens_trained": 10.550316944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9162470746755549, |
| "grad_norm": 0.7214828133583069, |
| "loss": 1.1549, |
| "loss_ce": 1.139117956161499, |
| "loss_region": 0.05997217446565628, |
| "loss_total": 1.1990901231765747, |
| "lr": 0.001113704048998644, |
| "router/selected_tokens_s0": 7427.25, |
| "router/selected_tokens_s1": 4315.375, |
| "step": 3230, |
| "tokens_trained": 10.583082384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9190837529253244, |
| "grad_norm": 0.5311968922615051, |
| "loss": 1.154, |
| "loss_ce": 1.153262972831726, |
| "loss_region": 0.06014882028102875, |
| "loss_total": 1.213411808013916, |
| "lr": 0.0011132971570147927, |
| "router/selected_tokens_s0": 7319.625, |
| "router/selected_tokens_s1": 4102.75, |
| "step": 3240, |
| "tokens_trained": 10.615847824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9219204311750939, |
| "grad_norm": 0.3367522656917572, |
| "loss": 1.1555, |
| "loss_ce": 1.0961225032806396, |
| "loss_region": 0.06004282087087631, |
| "loss_total": 1.156165361404419, |
| "lr": 0.0011128902650309417, |
| "router/selected_tokens_s0": 7466.875, |
| "router/selected_tokens_s1": 4262.0, |
| "step": 3250, |
| "tokens_trained": 10.648613248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9247571094248634, |
| "grad_norm": 0.5154868364334106, |
| "loss": 1.1425, |
| "loss_ce": 0.9198837280273438, |
| "loss_region": 0.059922680258750916, |
| "loss_total": 0.9798064231872559, |
| "lr": 0.0011124833730470907, |
| "router/selected_tokens_s0": 7582.125, |
| "router/selected_tokens_s1": 4459.25, |
| "step": 3260, |
| "tokens_trained": 10.681378688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.927593787674633, |
| "grad_norm": 0.47784504294395447, |
| "loss": 1.1497, |
| "loss_ce": 0.9742546677589417, |
| "loss_region": 0.06002604961395264, |
| "loss_total": 1.034280776977539, |
| "lr": 0.0011120764810632396, |
| "router/selected_tokens_s0": 7462.0, |
| "router/selected_tokens_s1": 4289.5, |
| "step": 3270, |
| "tokens_trained": 10.714144128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9304304659244025, |
| "grad_norm": 0.5129175186157227, |
| "loss": 1.1509, |
| "loss_ce": 1.052802324295044, |
| "loss_region": 0.059992704540491104, |
| "loss_total": 1.1127949953079224, |
| "lr": 0.0011116695890793888, |
| "router/selected_tokens_s0": 7418.875, |
| "router/selected_tokens_s1": 4290.0, |
| "step": 3280, |
| "tokens_trained": 10.746909568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.933267144174172, |
| "grad_norm": 0.6715986132621765, |
| "loss": 1.144, |
| "loss_ce": 1.0578795671463013, |
| "loss_region": 0.05989795923233032, |
| "loss_total": 1.1177775859832764, |
| "lr": 0.0011112626970955378, |
| "router/selected_tokens_s0": 7524.75, |
| "router/selected_tokens_s1": 4455.625, |
| "step": 3290, |
| "tokens_trained": 10.779675008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9361038224239415, |
| "grad_norm": 1.6286574602127075, |
| "loss": 1.1526, |
| "loss_ce": 1.1762213706970215, |
| "loss_region": 0.060009438544511795, |
| "loss_total": 1.2362308502197266, |
| "lr": 0.0011108558051116867, |
| "router/selected_tokens_s0": 7364.875, |
| "router/selected_tokens_s1": 4253.375, |
| "step": 3300, |
| "tokens_trained": 10.812440448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.938940500673711, |
| "grad_norm": 0.5151171088218689, |
| "loss": 1.1554, |
| "loss_ce": 1.0672016143798828, |
| "loss_region": 0.06006050854921341, |
| "loss_total": 1.1272621154785156, |
| "lr": 0.0011104489131278357, |
| "router/selected_tokens_s0": 7480.25, |
| "router/selected_tokens_s1": 4240.75, |
| "step": 3310, |
| "tokens_trained": 10.845202832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9417771789234806, |
| "grad_norm": 0.2817917466163635, |
| "loss": 1.1505, |
| "loss_ce": 1.1648095846176147, |
| "loss_region": 0.0600048191845417, |
| "loss_total": 1.2248144149780273, |
| "lr": 0.0011100420211439847, |
| "router/selected_tokens_s0": 7581.625, |
| "router/selected_tokens_s1": 4353.375, |
| "step": 3320, |
| "tokens_trained": 10.877968272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9446138571732501, |
| "grad_norm": 0.11748674511909485, |
| "loss": 1.1557, |
| "loss_ce": 1.0878210067749023, |
| "loss_region": 0.05999528616666794, |
| "loss_total": 1.1478163003921509, |
| "lr": 0.0011096351291601336, |
| "router/selected_tokens_s0": 7491.625, |
| "router/selected_tokens_s1": 4317.25, |
| "step": 3330, |
| "tokens_trained": 10.910733712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9474505354230196, |
| "grad_norm": 0.659820556640625, |
| "loss": 1.1515, |
| "loss_ce": 1.0421631336212158, |
| "loss_region": 0.059975143522024155, |
| "loss_total": 1.1021382808685303, |
| "lr": 0.0011092282371762826, |
| "router/selected_tokens_s0": 7401.375, |
| "router/selected_tokens_s1": 4302.875, |
| "step": 3340, |
| "tokens_trained": 10.943499152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9502872136727891, |
| "grad_norm": 0.2588161528110504, |
| "loss": 1.1543, |
| "loss_ce": 1.165223240852356, |
| "loss_region": 0.0600115992128849, |
| "loss_total": 1.225234866142273, |
| "lr": 0.0011088213451924316, |
| "router/selected_tokens_s0": 7404.625, |
| "router/selected_tokens_s1": 4262.75, |
| "step": 3350, |
| "tokens_trained": 10.976264592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9531238919225586, |
| "grad_norm": 0.48925071954727173, |
| "loss": 1.1488, |
| "loss_ce": 1.0838979482650757, |
| "loss_region": 0.059988539665937424, |
| "loss_total": 1.1438864469528198, |
| "lr": 0.0011084144532085805, |
| "router/selected_tokens_s0": 7438.25, |
| "router/selected_tokens_s1": 4303.625, |
| "step": 3360, |
| "tokens_trained": 11.009030032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9559605701723282, |
| "grad_norm": 0.2473180890083313, |
| "loss": 1.1499, |
| "loss_ce": 0.9721645712852478, |
| "loss_region": 0.0599365159869194, |
| "loss_total": 1.032101035118103, |
| "lr": 0.0011080075612247295, |
| "router/selected_tokens_s0": 7439.5, |
| "router/selected_tokens_s1": 4369.0, |
| "step": 3370, |
| "tokens_trained": 11.041794728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9587972484220977, |
| "grad_norm": 0.18463283777236938, |
| "loss": 1.1535, |
| "loss_ce": 1.084818720817566, |
| "loss_region": 0.06001410260796547, |
| "loss_total": 1.1448328495025635, |
| "lr": 0.0011076006692408785, |
| "router/selected_tokens_s0": 7494.375, |
| "router/selected_tokens_s1": 4295.625, |
| "step": 3380, |
| "tokens_trained": 11.074559368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9616339266718672, |
| "grad_norm": 0.31234511733055115, |
| "loss": 1.1475, |
| "loss_ce": 1.0884270668029785, |
| "loss_region": 0.0599556639790535, |
| "loss_total": 1.1483827829360962, |
| "lr": 0.0011071937772570274, |
| "router/selected_tokens_s0": 7453.75, |
| "router/selected_tokens_s1": 4346.75, |
| "step": 3390, |
| "tokens_trained": 11.107324808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9644706049216367, |
| "grad_norm": 0.6354888677597046, |
| "loss": 1.1591, |
| "loss_ce": 1.06814706325531, |
| "loss_region": 0.059949424117803574, |
| "loss_total": 1.1280964612960815, |
| "lr": 0.0011067868852731764, |
| "router/selected_tokens_s0": 7476.375, |
| "router/selected_tokens_s1": 4367.5, |
| "step": 3400, |
| "tokens_trained": 11.140090248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9673072831714062, |
| "grad_norm": 0.21459953486919403, |
| "loss": 1.1508, |
| "loss_ce": 1.0278993844985962, |
| "loss_region": 0.060014039278030396, |
| "loss_total": 1.0879133939743042, |
| "lr": 0.0011063799932893254, |
| "router/selected_tokens_s0": 7518.125, |
| "router/selected_tokens_s1": 4318.875, |
| "step": 3410, |
| "tokens_trained": 11.172855688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9701439614211758, |
| "grad_norm": 1.008099913597107, |
| "loss": 1.1504, |
| "loss_ce": 1.0413086414337158, |
| "loss_region": 0.06001026928424835, |
| "loss_total": 1.1013189554214478, |
| "lr": 0.0011059731013054743, |
| "router/selected_tokens_s0": 7393.875, |
| "router/selected_tokens_s1": 4257.5, |
| "step": 3420, |
| "tokens_trained": 11.205621096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9729806396709453, |
| "grad_norm": 0.49454066157341003, |
| "loss": 1.1453, |
| "loss_ce": 0.9835691452026367, |
| "loss_region": 0.05995873734354973, |
| "loss_total": 1.0435278415679932, |
| "lr": 0.0011055662093216233, |
| "router/selected_tokens_s0": 7526.625, |
| "router/selected_tokens_s1": 4386.375, |
| "step": 3430, |
| "tokens_trained": 11.238384136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9758173179207148, |
| "grad_norm": 0.2803723216056824, |
| "loss": 1.1535, |
| "loss_ce": 1.0913236141204834, |
| "loss_region": 0.05994022637605667, |
| "loss_total": 1.1512638330459595, |
| "lr": 0.0011051593173377723, |
| "router/selected_tokens_s0": 7456.0, |
| "router/selected_tokens_s1": 4370.25, |
| "step": 3440, |
| "tokens_trained": 11.271149576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9786539961704843, |
| "grad_norm": 0.4610815644264221, |
| "loss": 1.1521, |
| "loss_ce": 1.0604761838912964, |
| "loss_region": 0.060004524886608124, |
| "loss_total": 1.1204806566238403, |
| "lr": 0.0011047524253539212, |
| "router/selected_tokens_s0": 7462.5, |
| "router/selected_tokens_s1": 4301.75, |
| "step": 3450, |
| "tokens_trained": 11.303915016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9814906744202538, |
| "grad_norm": 0.37205979228019714, |
| "loss": 1.1458, |
| "loss_ce": 1.0331106185913086, |
| "loss_region": 0.060014817863702774, |
| "loss_total": 1.0931254625320435, |
| "lr": 0.0011043455333700704, |
| "router/selected_tokens_s0": 7482.0, |
| "router/selected_tokens_s1": 4290.0, |
| "step": 3460, |
| "tokens_trained": 11.336680456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9843273526700234, |
| "grad_norm": 0.24320276081562042, |
| "loss": 1.1473, |
| "loss_ce": 0.9818251729011536, |
| "loss_region": 0.059945087879896164, |
| "loss_total": 1.0417702198028564, |
| "lr": 0.0011039386413862194, |
| "router/selected_tokens_s0": 7398.375, |
| "router/selected_tokens_s1": 4348.875, |
| "step": 3470, |
| "tokens_trained": 11.369444288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9871640309197929, |
| "grad_norm": 0.47212696075439453, |
| "loss": 1.141, |
| "loss_ce": 1.1068347692489624, |
| "loss_region": 0.05994030460715294, |
| "loss_total": 1.166775107383728, |
| "lr": 0.0011035317494023683, |
| "router/selected_tokens_s0": 7398.125, |
| "router/selected_tokens_s1": 4349.25, |
| "step": 3480, |
| "tokens_trained": 11.402209728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9900007091695624, |
| "grad_norm": 1.9513204097747803, |
| "loss": 1.153, |
| "loss_ce": 1.111778736114502, |
| "loss_region": 0.06009702384471893, |
| "loss_total": 1.1718757152557373, |
| "lr": 0.001103124857418517, |
| "router/selected_tokens_s0": 7419.125, |
| "router/selected_tokens_s1": 4178.625, |
| "step": 3490, |
| "tokens_trained": 11.434975168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9928373874193319, |
| "grad_norm": 0.6990298628807068, |
| "loss": 1.1454, |
| "loss_ce": 1.1152887344360352, |
| "loss_region": 0.05999855324625969, |
| "loss_total": 1.1752872467041016, |
| "lr": 0.001102717965434666, |
| "router/selected_tokens_s0": 7405.5, |
| "router/selected_tokens_s1": 4282.875, |
| "step": 3500, |
| "tokens_trained": 11.467740608 |
| }, |
| { |
| "epoch": 0.9928373874193319, |
| "eval_ppl": 2.990368541747783, |
| "eval_runtime": 1.0282, |
| "step": 3500, |
| "tokens_trained": 11.467740608 |
| }, |
| { |
| "epoch": 0.9928373874193319, |
| "eval_F": 0.34124931635284006, |
| "eval_F_cds": 0.3399935841620457, |
| "eval_F_dig": 0.3237752949449176, |
| "eval_F_exon": 0.3411330565689605, |
| "eval_F_intron": 0.3419830788859948, |
| "eval_F_nig": 0.3420374751946268, |
| "eval_F_promoter": 0.3388947987234757, |
| "eval_F_utr": 0.3383235847354995, |
| "eval_G": 0.3070386282009337, |
| "eval_G_cds": 0.30430517226811327, |
| "eval_G_dig": 0.2977837318833503, |
| "eval_G_exon": 0.3072212306579444, |
| "eval_G_intron": 0.3076679212901026, |
| "eval_G_nig": 0.3092953701307529, |
| "eval_G_promoter": 0.3028028836396338, |
| "eval_G_utr": 0.3037611886096009, |
| "eval_avg_bp_per_token": 2.9304088010715144, |
| "eval_bp_per_token/cds": 2.9412319719638766, |
| "eval_bp_per_token/dig": 3.088561776061776, |
| "eval_bp_per_token/exon": 2.9314074984634297, |
| "eval_bp_per_token/intron": 2.9241212847649845, |
| "eval_bp_per_token/nig": 2.923656243898357, |
| "eval_bp_per_token/promoter": 2.9507682141087064, |
| "eval_bp_per_token/utr": 2.9557501904036556, |
| "eval_ppl_cds": 3.7629390152362947, |
| "eval_ppl_dig": 1.147684685044179, |
| "eval_ppl_exon": 3.390840392014607, |
| "eval_ppl_intron": 3.042627072402514, |
| "eval_ppl_nig": 2.824713259021816, |
| "eval_ppl_promoter": 3.3304855964307176, |
| "eval_ppl_utr": 3.3256724056579388, |
| "step": 3500, |
| "tokens_trained": 11.467740608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9956740656691014, |
| "grad_norm": 0.5327582359313965, |
| "loss": 1.1501, |
| "loss_ce": 1.1076359748840332, |
| "loss_region": 0.0600367896258831, |
| "loss_total": 1.1676727533340454, |
| "lr": 0.001102311073450815, |
| "router/selected_tokens_s0": 7424.625, |
| "router/selected_tokens_s1": 4241.0, |
| "step": 3510, |
| "tokens_trained": 11.500506048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.998510743918871, |
| "grad_norm": 0.41065487265586853, |
| "loss": 1.1514, |
| "loss_ce": 1.0242090225219727, |
| "loss_region": 0.059992894530296326, |
| "loss_total": 1.0842019319534302, |
| "lr": 0.001101904181466964, |
| "router/selected_tokens_s0": 7490.25, |
| "router/selected_tokens_s1": 4330.75, |
| "step": 3520, |
| "tokens_trained": 11.533271488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0011346712999079, |
| "grad_norm": 0.1535387933254242, |
| "loss": 1.1414, |
| "loss_ce": 1.102476954460144, |
| "loss_region": 0.060031864792108536, |
| "loss_total": 1.1625088453292847, |
| "lr": 0.0011014972894831132, |
| "router/selected_tokens_s0": 7425.125, |
| "router/selected_tokens_s1": 4247.0, |
| "step": 3530, |
| "tokens_trained": 11.56357952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0039713495496774, |
| "grad_norm": 0.1981125921010971, |
| "loss": 1.1437, |
| "loss_ce": 1.0602258443832397, |
| "loss_region": 0.05993477255105972, |
| "loss_total": 1.1201605796813965, |
| "lr": 0.0011010903974992621, |
| "router/selected_tokens_s0": 7320.25, |
| "router/selected_tokens_s1": 4332.875, |
| "step": 3540, |
| "tokens_trained": 11.59634496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.006808027799447, |
| "grad_norm": 0.5812313556671143, |
| "loss": 1.156, |
| "loss_ce": 1.0328749418258667, |
| "loss_region": 0.059998273849487305, |
| "loss_total": 1.092873215675354, |
| "lr": 0.001100683505515411, |
| "router/selected_tokens_s0": 7472.125, |
| "router/selected_tokens_s1": 4304.625, |
| "step": 3550, |
| "tokens_trained": 11.6291104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0096447060492164, |
| "grad_norm": 0.4127905070781708, |
| "loss": 1.1601, |
| "loss_ce": 1.1510852575302124, |
| "loss_region": 0.059947848320007324, |
| "loss_total": 1.2110331058502197, |
| "lr": 0.00110027661353156, |
| "router/selected_tokens_s0": 7431.625, |
| "router/selected_tokens_s1": 4357.5, |
| "step": 3560, |
| "tokens_trained": 11.66187584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.012481384298986, |
| "grad_norm": 0.3352585434913635, |
| "loss": 1.1578, |
| "loss_ce": 1.0219836235046387, |
| "loss_region": 0.059950411319732666, |
| "loss_total": 1.0819339752197266, |
| "lr": 0.001099869721547709, |
| "router/selected_tokens_s0": 7476.0, |
| "router/selected_tokens_s1": 4375.875, |
| "step": 3570, |
| "tokens_trained": 11.69464128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0153180625487555, |
| "grad_norm": 0.35328951478004456, |
| "loss": 1.1552, |
| "loss_ce": 1.1077344417572021, |
| "loss_region": 0.06004289910197258, |
| "loss_total": 1.1677772998809814, |
| "lr": 0.001099462829563858, |
| "router/selected_tokens_s0": 7441.5, |
| "router/selected_tokens_s1": 4242.75, |
| "step": 3580, |
| "tokens_trained": 11.72740592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.018154740798525, |
| "grad_norm": 0.19262564182281494, |
| "loss": 1.1517, |
| "loss_ce": 1.1469261646270752, |
| "loss_region": 0.060008659958839417, |
| "loss_total": 1.2069348096847534, |
| "lr": 0.001099055937580007, |
| "router/selected_tokens_s0": 7375.125, |
| "router/selected_tokens_s1": 4253.875, |
| "step": 3590, |
| "tokens_trained": 11.76017136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0209914190482945, |
| "grad_norm": 0.20067667961120605, |
| "loss": 1.1528, |
| "loss_ce": 1.0741374492645264, |
| "loss_region": 0.060044094920158386, |
| "loss_total": 1.1341814994812012, |
| "lr": 0.001098649045596156, |
| "router/selected_tokens_s0": 7434.25, |
| "router/selected_tokens_s1": 4234.375, |
| "step": 3600, |
| "tokens_trained": 11.7929352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.023828097298064, |
| "grad_norm": 0.30228880047798157, |
| "loss": 1.1506, |
| "loss_ce": 1.1023025512695312, |
| "loss_region": 0.06002019718289375, |
| "loss_total": 1.162322759628296, |
| "lr": 0.0010982421536123049, |
| "router/selected_tokens_s0": 7451.25, |
| "router/selected_tokens_s1": 4271.25, |
| "step": 3610, |
| "tokens_trained": 11.82570064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0266647755478335, |
| "grad_norm": 0.8743928670883179, |
| "loss": 1.148, |
| "loss_ce": 1.005650520324707, |
| "loss_region": 0.060022272169589996, |
| "loss_total": 1.065672755241394, |
| "lr": 0.0010978352616284538, |
| "router/selected_tokens_s0": 7474.625, |
| "router/selected_tokens_s1": 4284.5, |
| "step": 3620, |
| "tokens_trained": 11.85846448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.029501453797603, |
| "grad_norm": 0.827194094657898, |
| "loss": 1.1452, |
| "loss_ce": 1.1064406633377075, |
| "loss_region": 0.05999286100268364, |
| "loss_total": 1.166433572769165, |
| "lr": 0.0010974283696446028, |
| "router/selected_tokens_s0": 7428.625, |
| "router/selected_tokens_s1": 4296.75, |
| "step": 3630, |
| "tokens_trained": 11.89122992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0323381320473726, |
| "grad_norm": 0.27454066276550293, |
| "loss": 1.139, |
| "loss_ce": 1.0833203792572021, |
| "loss_region": 0.059953462332487106, |
| "loss_total": 1.1432738304138184, |
| "lr": 0.0010970214776607518, |
| "router/selected_tokens_s0": 7458.875, |
| "router/selected_tokens_s1": 4362.125, |
| "step": 3640, |
| "tokens_trained": 11.92399536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.035174810297142, |
| "grad_norm": 0.07617896795272827, |
| "loss": 1.1378, |
| "loss_ce": 1.1439638137817383, |
| "loss_region": 0.059945326298475266, |
| "loss_total": 1.203909158706665, |
| "lr": 0.0010966145856769007, |
| "router/selected_tokens_s0": 7424.625, |
| "router/selected_tokens_s1": 4355.875, |
| "step": 3650, |
| "tokens_trained": 11.9567608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0380114885469116, |
| "grad_norm": 0.6363743543624878, |
| "loss": 1.1485, |
| "loss_ce": 1.079978585243225, |
| "loss_region": 0.06003373861312866, |
| "loss_total": 1.140012264251709, |
| "lr": 0.0010962076936930497, |
| "router/selected_tokens_s0": 7431.375, |
| "router/selected_tokens_s1": 4242.875, |
| "step": 3660, |
| "tokens_trained": 11.98952624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0408481667966811, |
| "grad_norm": 0.41256821155548096, |
| "loss": 1.1433, |
| "loss_ce": 1.13809072971344, |
| "loss_region": 0.06003982573747635, |
| "loss_total": 1.1981306076049805, |
| "lr": 0.0010958008017091987, |
| "router/selected_tokens_s0": 7385.375, |
| "router/selected_tokens_s1": 4224.125, |
| "step": 3670, |
| "tokens_trained": 12.02229168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0436848450464506, |
| "grad_norm": 0.22508053481578827, |
| "loss": 1.1512, |
| "loss_ce": 1.103158712387085, |
| "loss_region": 0.06010059267282486, |
| "loss_total": 1.1632592678070068, |
| "lr": 0.0010953939097253476, |
| "router/selected_tokens_s0": 7469.0, |
| "router/selected_tokens_s1": 4187.25, |
| "step": 3680, |
| "tokens_trained": 12.05505712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0465215232962202, |
| "grad_norm": 0.33240363001823425, |
| "loss": 1.1358, |
| "loss_ce": 1.138681173324585, |
| "loss_region": 0.059974271804094315, |
| "loss_total": 1.1986554861068726, |
| "lr": 0.0010949870177414966, |
| "router/selected_tokens_s0": 7456.125, |
| "router/selected_tokens_s1": 4332.125, |
| "step": 3690, |
| "tokens_trained": 12.08782256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0493582015459897, |
| "grad_norm": 0.372721791267395, |
| "loss": 1.1436, |
| "loss_ce": 1.081639051437378, |
| "loss_region": 0.05995466560125351, |
| "loss_total": 1.1415936946868896, |
| "lr": 0.0010945801257576456, |
| "router/selected_tokens_s0": 7384.875, |
| "router/selected_tokens_s1": 4328.0, |
| "step": 3700, |
| "tokens_trained": 12.1205864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0521948797957592, |
| "grad_norm": 0.7060279250144958, |
| "loss": 1.1442, |
| "loss_ce": 1.137622594833374, |
| "loss_region": 0.06001655012369156, |
| "loss_total": 1.1976391077041626, |
| "lr": 0.0010941732337737947, |
| "router/selected_tokens_s0": 7376.0, |
| "router/selected_tokens_s1": 4245.0, |
| "step": 3710, |
| "tokens_trained": 12.15335184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0550315580455287, |
| "grad_norm": 0.4556179940700531, |
| "loss": 1.1469, |
| "loss_ce": 1.1033775806427002, |
| "loss_region": 0.05993418022990227, |
| "loss_total": 1.1633117198944092, |
| "lr": 0.0010937663417899437, |
| "router/selected_tokens_s0": 7439.25, |
| "router/selected_tokens_s1": 4378.875, |
| "step": 3720, |
| "tokens_trained": 12.18611712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0578682362952982, |
| "grad_norm": 0.30534932017326355, |
| "loss": 1.1422, |
| "loss_ce": 1.0742703676223755, |
| "loss_region": 0.06001539155840874, |
| "loss_total": 1.134285807609558, |
| "lr": 0.0010933594498060927, |
| "router/selected_tokens_s0": 7455.875, |
| "router/selected_tokens_s1": 4283.875, |
| "step": 3730, |
| "tokens_trained": 12.21888256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0607049145450678, |
| "grad_norm": 0.44344577193260193, |
| "loss": 1.1423, |
| "loss_ce": 1.1271259784698486, |
| "loss_region": 0.059946540743112564, |
| "loss_total": 1.187072515487671, |
| "lr": 0.0010929525578222414, |
| "router/selected_tokens_s0": 7390.625, |
| "router/selected_tokens_s1": 4344.75, |
| "step": 3740, |
| "tokens_trained": 12.251648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0635415927948373, |
| "grad_norm": 0.5021528005599976, |
| "loss": 1.1466, |
| "loss_ce": 1.1103929281234741, |
| "loss_region": 0.06003858521580696, |
| "loss_total": 1.1704314947128296, |
| "lr": 0.0010925456658383904, |
| "router/selected_tokens_s0": 7478.625, |
| "router/selected_tokens_s1": 4263.0, |
| "step": 3750, |
| "tokens_trained": 12.284409608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0663782710446068, |
| "grad_norm": 0.21326135098934174, |
| "loss": 1.1457, |
| "loss_ce": 1.101629614830017, |
| "loss_region": 0.05999373272061348, |
| "loss_total": 1.1616233587265015, |
| "lr": 0.0010921387738545394, |
| "router/selected_tokens_s0": 7429.5, |
| "router/selected_tokens_s1": 4293.75, |
| "step": 3760, |
| "tokens_trained": 12.317175048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0692149492943763, |
| "grad_norm": 0.12464369833469391, |
| "loss": 1.1357, |
| "loss_ce": 1.0160081386566162, |
| "loss_region": 0.05996187403798103, |
| "loss_total": 1.0759700536727905, |
| "lr": 0.0010917318818706883, |
| "router/selected_tokens_s0": 7375.25, |
| "router/selected_tokens_s1": 4324.25, |
| "step": 3770, |
| "tokens_trained": 12.349940488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0720516275441458, |
| "grad_norm": 0.6811399459838867, |
| "loss": 1.1444, |
| "loss_ce": 1.106014609336853, |
| "loss_region": 0.06000228598713875, |
| "loss_total": 1.166016936302185, |
| "lr": 0.0010913249898868375, |
| "router/selected_tokens_s0": 7413.0, |
| "router/selected_tokens_s1": 4280.0, |
| "step": 3780, |
| "tokens_trained": 12.382705928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0748883057939154, |
| "grad_norm": 0.5335866808891296, |
| "loss": 1.1425, |
| "loss_ce": 1.0999557971954346, |
| "loss_region": 0.059975773096084595, |
| "loss_total": 1.1599315404891968, |
| "lr": 0.0010909180979029865, |
| "router/selected_tokens_s0": 7386.125, |
| "router/selected_tokens_s1": 4300.625, |
| "step": 3790, |
| "tokens_trained": 12.415470568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0777249840436849, |
| "grad_norm": 0.4129198491573334, |
| "loss": 1.1451, |
| "loss_ce": 1.1344683170318604, |
| "loss_region": 0.05997394025325775, |
| "loss_total": 1.1944422721862793, |
| "lr": 0.0010905112059191354, |
| "router/selected_tokens_s0": 7444.875, |
| "router/selected_tokens_s1": 4331.875, |
| "step": 3800, |
| "tokens_trained": 12.448236008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0805616622934544, |
| "grad_norm": 0.16327404975891113, |
| "loss": 1.1442, |
| "loss_ce": 1.1254371404647827, |
| "loss_region": 0.0599752776324749, |
| "loss_total": 1.1854124069213867, |
| "lr": 0.0010901043139352844, |
| "router/selected_tokens_s0": 7459.75, |
| "router/selected_tokens_s1": 4331.375, |
| "step": 3810, |
| "tokens_trained": 12.481001448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.083398340543224, |
| "grad_norm": 0.7598224878311157, |
| "loss": 1.1348, |
| "loss_ce": 1.0765974521636963, |
| "loss_region": 0.05999346822500229, |
| "loss_total": 1.1365909576416016, |
| "lr": 0.0010896974219514334, |
| "router/selected_tokens_s0": 7440.25, |
| "router/selected_tokens_s1": 4309.625, |
| "step": 3820, |
| "tokens_trained": 12.513766888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0862350187929934, |
| "grad_norm": 0.5273082852363586, |
| "loss": 1.1429, |
| "loss_ce": 1.1588966846466064, |
| "loss_region": 0.059993255883455276, |
| "loss_total": 1.2188899517059326, |
| "lr": 0.0010892905299675823, |
| "router/selected_tokens_s0": 7423.5, |
| "router/selected_tokens_s1": 4291.375, |
| "step": 3830, |
| "tokens_trained": 12.546532328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.089071697042763, |
| "grad_norm": 0.20225034654140472, |
| "loss": 1.1464, |
| "loss_ce": 1.0285930633544922, |
| "loss_region": 0.06002606824040413, |
| "loss_total": 1.0886191129684448, |
| "lr": 0.0010888836379837313, |
| "router/selected_tokens_s0": 7424.75, |
| "router/selected_tokens_s1": 4250.125, |
| "step": 3840, |
| "tokens_trained": 12.57929404 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0919083752925325, |
| "grad_norm": 0.1807214766740799, |
| "loss": 1.1429, |
| "loss_ce": 1.0824134349822998, |
| "loss_region": 0.05998450517654419, |
| "loss_total": 1.1423978805541992, |
| "lr": 0.0010884767459998803, |
| "router/selected_tokens_s0": 7402.125, |
| "router/selected_tokens_s1": 4295.5, |
| "step": 3850, |
| "tokens_trained": 12.61205788 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.094745053542302, |
| "grad_norm": 0.8687542080879211, |
| "loss": 1.1394, |
| "loss_ce": 1.0904909372329712, |
| "loss_region": 0.06012667715549469, |
| "loss_total": 1.1506175994873047, |
| "lr": 0.0010880698540160292, |
| "router/selected_tokens_s0": 7367.875, |
| "router/selected_tokens_s1": 4109.25, |
| "step": 3860, |
| "tokens_trained": 12.64482332 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0975817317920715, |
| "grad_norm": 0.31520599126815796, |
| "loss": 1.1336, |
| "loss_ce": 1.1574056148529053, |
| "loss_region": 0.060040898621082306, |
| "loss_total": 1.2174465656280518, |
| "lr": 0.0010876629620321782, |
| "router/selected_tokens_s0": 7445.5, |
| "router/selected_tokens_s1": 4242.875, |
| "step": 3870, |
| "tokens_trained": 12.677587992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.100418410041841, |
| "grad_norm": 0.16909047961235046, |
| "loss": 1.1397, |
| "loss_ce": 1.0726521015167236, |
| "loss_region": 0.060116663575172424, |
| "loss_total": 1.1327687501907349, |
| "lr": 0.0010872560700483272, |
| "router/selected_tokens_s0": 7365.125, |
| "router/selected_tokens_s1": 4121.625, |
| "step": 3880, |
| "tokens_trained": 12.710352632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1032550882916106, |
| "grad_norm": 0.4796403646469116, |
| "loss": 1.1426, |
| "loss_ce": 1.043395757675171, |
| "loss_region": 0.060001593083143234, |
| "loss_total": 1.1033973693847656, |
| "lr": 0.0010868491780644761, |
| "router/selected_tokens_s0": 7446.25, |
| "router/selected_tokens_s1": 4295.125, |
| "step": 3890, |
| "tokens_trained": 12.743118072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.10609176654138, |
| "grad_norm": 0.3688628375530243, |
| "loss": 1.1334, |
| "loss_ce": 1.0408189296722412, |
| "loss_region": 0.05999990180134773, |
| "loss_total": 1.1008188724517822, |
| "lr": 0.001086442286080625, |
| "router/selected_tokens_s0": 7405.875, |
| "router/selected_tokens_s1": 4278.0, |
| "step": 3900, |
| "tokens_trained": 12.775882712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1089284447911496, |
| "grad_norm": 0.1674785166978836, |
| "loss": 1.1453, |
| "loss_ce": 1.1500557661056519, |
| "loss_region": 0.06002119183540344, |
| "loss_total": 1.210076928138733, |
| "lr": 0.001086035394096774, |
| "router/selected_tokens_s0": 7402.0, |
| "router/selected_tokens_s1": 4250.0, |
| "step": 3910, |
| "tokens_trained": 12.808648152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1117651230409191, |
| "grad_norm": 0.18490345776081085, |
| "loss": 1.1435, |
| "loss_ce": 1.1302103996276855, |
| "loss_region": 0.059938620775938034, |
| "loss_total": 1.1901490688323975, |
| "lr": 0.001085628502112923, |
| "router/selected_tokens_s0": 7380.0, |
| "router/selected_tokens_s1": 4363.375, |
| "step": 3920, |
| "tokens_trained": 12.841413592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1146018012906886, |
| "grad_norm": 0.23284821212291718, |
| "loss": 1.1423, |
| "loss_ce": 1.0065786838531494, |
| "loss_region": 0.059997957199811935, |
| "loss_total": 1.066576600074768, |
| "lr": 0.001085221610129072, |
| "router/selected_tokens_s0": 7459.875, |
| "router/selected_tokens_s1": 4311.375, |
| "step": 3930, |
| "tokens_trained": 12.874179032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1174384795404582, |
| "grad_norm": 0.23733806610107422, |
| "loss": 1.1395, |
| "loss_ce": 1.0488975048065186, |
| "loss_region": 0.060037050396203995, |
| "loss_total": 1.1089345216751099, |
| "lr": 0.001084814718145221, |
| "router/selected_tokens_s0": 7396.625, |
| "router/selected_tokens_s1": 4231.75, |
| "step": 3940, |
| "tokens_trained": 12.906944472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1202751577902277, |
| "grad_norm": 0.33361032605171204, |
| "loss": 1.1355, |
| "loss_ce": 1.0286937952041626, |
| "loss_region": 0.060001228004693985, |
| "loss_total": 1.0886950492858887, |
| "lr": 0.00108440782616137, |
| "router/selected_tokens_s0": 7383.375, |
| "router/selected_tokens_s1": 4268.5, |
| "step": 3950, |
| "tokens_trained": 12.939709912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1231118360399972, |
| "grad_norm": 0.46005359292030334, |
| "loss": 1.1301, |
| "loss_ce": 1.105696678161621, |
| "loss_region": 0.059990961104631424, |
| "loss_total": 1.1656876802444458, |
| "lr": 0.001084000934177519, |
| "router/selected_tokens_s0": 7330.25, |
| "router/selected_tokens_s1": 4260.25, |
| "step": 3960, |
| "tokens_trained": 12.972475352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1259485142897667, |
| "grad_norm": 0.09066780656576157, |
| "loss": 1.1408, |
| "loss_ce": 1.1488341093063354, |
| "loss_region": 0.059959400445222855, |
| "loss_total": 1.2087935209274292, |
| "lr": 0.001083594042193668, |
| "router/selected_tokens_s0": 7336.875, |
| "router/selected_tokens_s1": 4313.5, |
| "step": 3970, |
| "tokens_trained": 13.005240792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1287851925395362, |
| "grad_norm": 0.9433469772338867, |
| "loss": 1.1324, |
| "loss_ce": 1.1015199422836304, |
| "loss_region": 0.05999411270022392, |
| "loss_total": 1.1615140438079834, |
| "lr": 0.001083187150209817, |
| "router/selected_tokens_s0": 7323.5, |
| "router/selected_tokens_s1": 4254.75, |
| "step": 3980, |
| "tokens_trained": 13.038005432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1316218707893058, |
| "grad_norm": 0.2512190639972687, |
| "loss": 1.1389, |
| "loss_ce": 1.0987569093704224, |
| "loss_region": 0.05999063700437546, |
| "loss_total": 1.1587475538253784, |
| "lr": 0.0010827802582259658, |
| "router/selected_tokens_s0": 7455.125, |
| "router/selected_tokens_s1": 4317.25, |
| "step": 3990, |
| "tokens_trained": 13.070770872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1344585490390753, |
| "grad_norm": 0.3653326630592346, |
| "loss": 1.1399, |
| "loss_ce": 1.072330355644226, |
| "loss_region": 0.06002863124012947, |
| "loss_total": 1.1323590278625488, |
| "lr": 0.0010823733662421147, |
| "router/selected_tokens_s0": 7473.0, |
| "router/selected_tokens_s1": 4271.5, |
| "step": 4000, |
| "tokens_trained": 13.103536312 |
| }, |
| { |
| "epoch": 1.1344585490390753, |
| "eval_ppl": 2.9523484878038198, |
| "eval_runtime": 1.0573, |
| "step": 4000, |
| "tokens_trained": 13.103536312 |
| }, |
| { |
| "epoch": 1.1344585490390753, |
| "eval_F": 0.33808435554861055, |
| "eval_F_cds": 0.33309655836121166, |
| "eval_F_dig": 0.32549417923275253, |
| "eval_F_exon": 0.33622677904977566, |
| "eval_F_intron": 0.3384065143924063, |
| "eval_F_nig": 0.33901630794280774, |
| "eval_F_promoter": 0.33740178788048447, |
| "eval_F_utr": 0.33783400757556237, |
| "eval_G": 0.3120454166035921, |
| "eval_G_cds": 0.30918100324801795, |
| "eval_G_dig": 0.3016167339733573, |
| "eval_G_exon": 0.31355957051725586, |
| "eval_G_intron": 0.3124654773125766, |
| "eval_G_nig": 0.31342695299580536, |
| "eval_G_promoter": 0.30978970781894444, |
| "eval_G_utr": 0.30942776240691594, |
| "eval_avg_bp_per_token": 2.957841685331156, |
| "eval_bp_per_token/cds": 3.0021324895095276, |
| "eval_bp_per_token/dig": 3.07225156024964, |
| "eval_bp_per_token/exon": 2.974183088051883, |
| "eval_bp_per_token/intron": 2.955025856388891, |
| "eval_bp_per_token/nig": 2.94971060851946, |
| "eval_bp_per_token/promoter": 2.963825432822612, |
| "eval_bp_per_token/utr": 2.9600335596064373, |
| "eval_ppl_cds": 3.7308104955454358, |
| "eval_ppl_dig": 1.1529258628275645, |
| "eval_ppl_exon": 3.3675201309937965, |
| "eval_ppl_intron": 3.0130073290188872, |
| "eval_ppl_nig": 2.7720357615980986, |
| "eval_ppl_promoter": 3.30537569832896, |
| "eval_ppl_utr": 3.2970389002777467, |
| "step": 4000, |
| "tokens_trained": 13.103536312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1372952272888448, |
| "grad_norm": 0.24893344938755035, |
| "loss": 1.1333, |
| "loss_ce": 1.0987017154693604, |
| "loss_region": 0.05996103584766388, |
| "loss_total": 1.1586627960205078, |
| "lr": 0.0010819664742582637, |
| "router/selected_tokens_s0": 7486.375, |
| "router/selected_tokens_s1": 4381.25, |
| "step": 4010, |
| "tokens_trained": 13.136300952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1401319055386143, |
| "grad_norm": 0.5850163102149963, |
| "loss": 1.1354, |
| "loss_ce": 1.0224004983901978, |
| "loss_region": 0.059973448514938354, |
| "loss_total": 1.0823739767074585, |
| "lr": 0.0010815595822744127, |
| "router/selected_tokens_s0": 7335.75, |
| "router/selected_tokens_s1": 4307.875, |
| "step": 4020, |
| "tokens_trained": 13.169065592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1429685837883838, |
| "grad_norm": 0.35301917791366577, |
| "loss": 1.1386, |
| "loss_ce": 1.0751712322235107, |
| "loss_region": 0.059972189366817474, |
| "loss_total": 1.1351433992385864, |
| "lr": 0.0010811526902905618, |
| "router/selected_tokens_s0": 7411.125, |
| "router/selected_tokens_s1": 4328.75, |
| "step": 4030, |
| "tokens_trained": 13.201831032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1458052620381534, |
| "grad_norm": 0.5564679503440857, |
| "loss": 1.1356, |
| "loss_ce": 1.0059022903442383, |
| "loss_region": 0.05994752421975136, |
| "loss_total": 1.065849781036377, |
| "lr": 0.0010807457983067108, |
| "router/selected_tokens_s0": 7381.0, |
| "router/selected_tokens_s1": 4361.5, |
| "step": 4040, |
| "tokens_trained": 13.234596472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1486419402879229, |
| "grad_norm": 0.18776975572109222, |
| "loss": 1.1368, |
| "loss_ce": 1.1155309677124023, |
| "loss_region": 0.06004191190004349, |
| "loss_total": 1.1755728721618652, |
| "lr": 0.0010803389063228598, |
| "router/selected_tokens_s0": 7473.375, |
| "router/selected_tokens_s1": 4257.875, |
| "step": 4050, |
| "tokens_trained": 13.267361888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1514786185376924, |
| "grad_norm": 0.17018158733844757, |
| "loss": 1.1406, |
| "loss_ce": 1.113221287727356, |
| "loss_region": 0.06000470370054245, |
| "loss_total": 1.173225998878479, |
| "lr": 0.0010799320143390087, |
| "router/selected_tokens_s0": 7405.875, |
| "router/selected_tokens_s1": 4272.375, |
| "step": 4060, |
| "tokens_trained": 13.300127328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.154315296787462, |
| "grad_norm": 0.42208781838417053, |
| "loss": 1.1383, |
| "loss_ce": 1.0297209024429321, |
| "loss_region": 0.060001879930496216, |
| "loss_total": 1.089722752571106, |
| "lr": 0.0010795251223551577, |
| "router/selected_tokens_s0": 7452.25, |
| "router/selected_tokens_s1": 4312.75, |
| "step": 4070, |
| "tokens_trained": 13.332892768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1571519750372314, |
| "grad_norm": 0.35124096274375916, |
| "loss": 1.1331, |
| "loss_ce": 1.1511611938476562, |
| "loss_region": 0.05999987572431564, |
| "loss_total": 1.2111610174179077, |
| "lr": 0.0010791182303713067, |
| "router/selected_tokens_s0": 7421.25, |
| "router/selected_tokens_s1": 4286.75, |
| "step": 4080, |
| "tokens_trained": 13.365658208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.159988653287001, |
| "grad_norm": 0.2931098937988281, |
| "loss": 1.1368, |
| "loss_ce": 1.0053439140319824, |
| "loss_region": 0.060056962072849274, |
| "loss_total": 1.0654008388519287, |
| "lr": 0.0010787113383874556, |
| "router/selected_tokens_s0": 7453.75, |
| "router/selected_tokens_s1": 4215.25, |
| "step": 4090, |
| "tokens_trained": 13.398422568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1628253315367705, |
| "grad_norm": 0.40350601077079773, |
| "loss": 1.1383, |
| "loss_ce": 1.1511709690093994, |
| "loss_region": 0.05996457114815712, |
| "loss_total": 1.2111355066299438, |
| "lr": 0.0010783044464036046, |
| "router/selected_tokens_s0": 7475.75, |
| "router/selected_tokens_s1": 4380.375, |
| "step": 4100, |
| "tokens_trained": 13.431185432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.16566200978654, |
| "grad_norm": 0.2456795573234558, |
| "loss": 1.1353, |
| "loss_ce": 1.1185240745544434, |
| "loss_region": 0.06002639979124069, |
| "loss_total": 1.1785504817962646, |
| "lr": 0.0010778975544197536, |
| "router/selected_tokens_s0": 7406.5, |
| "router/selected_tokens_s1": 4244.625, |
| "step": 4110, |
| "tokens_trained": 13.463950872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1684986880363095, |
| "grad_norm": 0.4379357397556305, |
| "loss": 1.1306, |
| "loss_ce": 0.9611161947250366, |
| "loss_region": 0.06001690775156021, |
| "loss_total": 1.0211330652236938, |
| "lr": 0.0010774906624359025, |
| "router/selected_tokens_s0": 7336.125, |
| "router/selected_tokens_s1": 4239.0, |
| "step": 4120, |
| "tokens_trained": 13.496713976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.171335366286079, |
| "grad_norm": 0.6094051599502563, |
| "loss": 1.1339, |
| "loss_ce": 1.146083950996399, |
| "loss_region": 0.060058578848838806, |
| "loss_total": 1.206142544746399, |
| "lr": 0.0010770837704520515, |
| "router/selected_tokens_s0": 7396.25, |
| "router/selected_tokens_s1": 4185.5, |
| "step": 4130, |
| "tokens_trained": 13.529479416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1741720445358486, |
| "grad_norm": 0.2225402295589447, |
| "loss": 1.1436, |
| "loss_ce": 0.917146623134613, |
| "loss_region": 0.060047075152397156, |
| "loss_total": 0.9771937131881714, |
| "lr": 0.0010766768784682005, |
| "router/selected_tokens_s0": 7424.25, |
| "router/selected_tokens_s1": 4215.5, |
| "step": 4140, |
| "tokens_trained": 13.562244856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.177008722785618, |
| "grad_norm": 0.13494586944580078, |
| "loss": 1.1371, |
| "loss_ce": 1.123674988746643, |
| "loss_region": 0.06002392992377281, |
| "loss_total": 1.1836988925933838, |
| "lr": 0.0010762699864843494, |
| "router/selected_tokens_s0": 7392.875, |
| "router/selected_tokens_s1": 4235.0, |
| "step": 4150, |
| "tokens_trained": 13.595010296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1798454010353876, |
| "grad_norm": 0.40003934502601624, |
| "loss": 1.1226, |
| "loss_ce": 0.9939314126968384, |
| "loss_region": 0.059998590499162674, |
| "loss_total": 1.0539300441741943, |
| "lr": 0.0010758630945004984, |
| "router/selected_tokens_s0": 7431.625, |
| "router/selected_tokens_s1": 4292.125, |
| "step": 4160, |
| "tokens_trained": 13.627775736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1826820792851571, |
| "grad_norm": 0.4521540105342865, |
| "loss": 1.1314, |
| "loss_ce": 1.0949397087097168, |
| "loss_region": 0.05998746678233147, |
| "loss_total": 1.154927134513855, |
| "lr": 0.0010754562025166474, |
| "router/selected_tokens_s0": 7429.5, |
| "router/selected_tokens_s1": 4307.875, |
| "step": 4170, |
| "tokens_trained": 13.660541176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1855187575349266, |
| "grad_norm": 0.20438899099826813, |
| "loss": 1.1314, |
| "loss_ce": 1.1041476726531982, |
| "loss_region": 0.0599747933447361, |
| "loss_total": 1.164122462272644, |
| "lr": 0.0010750493105327963, |
| "router/selected_tokens_s0": 7337.25, |
| "router/selected_tokens_s1": 4301.25, |
| "step": 4180, |
| "tokens_trained": 13.693306616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1883554357846962, |
| "grad_norm": 0.7250345349311829, |
| "loss": 1.1298, |
| "loss_ce": 1.0269466638565063, |
| "loss_region": 0.060086045414209366, |
| "loss_total": 1.087032675743103, |
| "lr": 0.0010746424185489453, |
| "router/selected_tokens_s0": 7419.875, |
| "router/selected_tokens_s1": 4171.125, |
| "step": 4190, |
| "tokens_trained": 13.726072056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1911921140344657, |
| "grad_norm": 0.21629612147808075, |
| "loss": 1.1353, |
| "loss_ce": 1.0419249534606934, |
| "loss_region": 0.060025639832019806, |
| "loss_total": 1.1019506454467773, |
| "lr": 0.0010742355265650943, |
| "router/selected_tokens_s0": 7399.125, |
| "router/selected_tokens_s1": 4234.125, |
| "step": 4200, |
| "tokens_trained": 13.758837496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1940287922842352, |
| "grad_norm": 0.1624452769756317, |
| "loss": 1.1332, |
| "loss_ce": 1.0706652402877808, |
| "loss_region": 0.059998173266649246, |
| "loss_total": 1.1306633949279785, |
| "lr": 0.0010738286345812434, |
| "router/selected_tokens_s0": 7412.125, |
| "router/selected_tokens_s1": 4284.5, |
| "step": 4210, |
| "tokens_trained": 13.791602936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1968654705340047, |
| "grad_norm": 0.5419676899909973, |
| "loss": 1.143, |
| "loss_ce": 1.0858913660049438, |
| "loss_region": 0.05999434366822243, |
| "loss_total": 1.145885705947876, |
| "lr": 0.0010734217425973924, |
| "router/selected_tokens_s0": 7422.5, |
| "router/selected_tokens_s1": 4295.875, |
| "step": 4220, |
| "tokens_trained": 13.824368376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1997021487837742, |
| "grad_norm": 0.2358941286802292, |
| "loss": 1.1329, |
| "loss_ce": 1.081803321838379, |
| "loss_region": 0.060014963150024414, |
| "loss_total": 1.1418182849884033, |
| "lr": 0.0010730148506135414, |
| "router/selected_tokens_s0": 7424.625, |
| "router/selected_tokens_s1": 4259.25, |
| "step": 4230, |
| "tokens_trained": 13.857133016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2025388270335438, |
| "grad_norm": 0.510456383228302, |
| "loss": 1.1278, |
| "loss_ce": 1.110556721687317, |
| "loss_region": 0.06000852584838867, |
| "loss_total": 1.1705652475357056, |
| "lr": 0.0010726079586296901, |
| "router/selected_tokens_s0": 7484.0, |
| "router/selected_tokens_s1": 4306.625, |
| "step": 4240, |
| "tokens_trained": 13.889896856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2053755052833133, |
| "grad_norm": 0.18302984535694122, |
| "loss": 1.1313, |
| "loss_ce": 1.0691109895706177, |
| "loss_region": 0.05997291952371597, |
| "loss_total": 1.1290838718414307, |
| "lr": 0.001072201066645839, |
| "router/selected_tokens_s0": 7450.0, |
| "router/selected_tokens_s1": 4361.5, |
| "step": 4250, |
| "tokens_trained": 13.922662296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2082121835330828, |
| "grad_norm": 0.5035632848739624, |
| "loss": 1.1307, |
| "loss_ce": 1.0827662944793701, |
| "loss_region": 0.06004443019628525, |
| "loss_total": 1.1428107023239136, |
| "lr": 0.001071794174661988, |
| "router/selected_tokens_s0": 7423.75, |
| "router/selected_tokens_s1": 4217.75, |
| "step": 4260, |
| "tokens_trained": 13.955427736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2110488617828523, |
| "grad_norm": 0.3019254505634308, |
| "loss": 1.1301, |
| "loss_ce": 1.080075740814209, |
| "loss_region": 0.06004132702946663, |
| "loss_total": 1.1401170492172241, |
| "lr": 0.0010713872826781372, |
| "router/selected_tokens_s0": 7457.625, |
| "router/selected_tokens_s1": 4234.875, |
| "step": 4270, |
| "tokens_trained": 13.988193176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2138855400326218, |
| "grad_norm": 0.22490078210830688, |
| "loss": 1.1268, |
| "loss_ce": 1.1394598484039307, |
| "loss_region": 0.05999091640114784, |
| "loss_total": 1.1994507312774658, |
| "lr": 0.0010709803906942862, |
| "router/selected_tokens_s0": 7419.125, |
| "router/selected_tokens_s1": 4300.875, |
| "step": 4280, |
| "tokens_trained": 14.020958616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2167222182823914, |
| "grad_norm": 0.2757962644100189, |
| "loss": 1.1311, |
| "loss_ce": 1.060826063156128, |
| "loss_region": 0.059974100440740585, |
| "loss_total": 1.1208001375198364, |
| "lr": 0.0010705734987104352, |
| "router/selected_tokens_s0": 7454.125, |
| "router/selected_tokens_s1": 4350.75, |
| "step": 4290, |
| "tokens_trained": 14.053724056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2195588965321609, |
| "grad_norm": 0.3286055028438568, |
| "loss": 1.1351, |
| "loss_ce": 1.116142749786377, |
| "loss_region": 0.05997960641980171, |
| "loss_total": 1.1761223077774048, |
| "lr": 0.0010701666067265841, |
| "router/selected_tokens_s0": 7509.0, |
| "router/selected_tokens_s1": 4382.875, |
| "step": 4300, |
| "tokens_trained": 14.086488728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2223955747819304, |
| "grad_norm": 0.5284584760665894, |
| "loss": 1.1263, |
| "loss_ce": 1.0240802764892578, |
| "loss_region": 0.06002355366945267, |
| "loss_total": 1.0841038227081299, |
| "lr": 0.001069759714742733, |
| "router/selected_tokens_s0": 7453.75, |
| "router/selected_tokens_s1": 4262.0, |
| "step": 4310, |
| "tokens_trained": 14.119254168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2252322530317, |
| "grad_norm": 0.052983351051807404, |
| "loss": 1.1243, |
| "loss_ce": 1.0505942106246948, |
| "loss_region": 0.06003909558057785, |
| "loss_total": 1.1106332540512085, |
| "lr": 0.001069352822758882, |
| "router/selected_tokens_s0": 7451.5, |
| "router/selected_tokens_s1": 4236.625, |
| "step": 4320, |
| "tokens_trained": 14.152019608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2280689312814694, |
| "grad_norm": 0.44211697578430176, |
| "loss": 1.1298, |
| "loss_ce": 1.0448384284973145, |
| "loss_region": 0.060010477900505066, |
| "loss_total": 1.104848861694336, |
| "lr": 0.001068945930775031, |
| "router/selected_tokens_s0": 7421.625, |
| "router/selected_tokens_s1": 4270.125, |
| "step": 4330, |
| "tokens_trained": 14.184785048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.230905609531239, |
| "grad_norm": 0.24782565236091614, |
| "loss": 1.1294, |
| "loss_ce": 1.0347596406936646, |
| "loss_region": 0.05996475741267204, |
| "loss_total": 1.094724416732788, |
| "lr": 0.00106853903879118, |
| "router/selected_tokens_s0": 7470.625, |
| "router/selected_tokens_s1": 4404.375, |
| "step": 4340, |
| "tokens_trained": 14.217550488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2337422877810085, |
| "grad_norm": 0.11738014966249466, |
| "loss": 1.1239, |
| "loss_ce": 1.104210376739502, |
| "loss_region": 0.06001203507184982, |
| "loss_total": 1.1642223596572876, |
| "lr": 0.001068132146807329, |
| "router/selected_tokens_s0": 7455.25, |
| "router/selected_tokens_s1": 4282.0, |
| "step": 4350, |
| "tokens_trained": 14.250315928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.236578966030778, |
| "grad_norm": 0.30317816138267517, |
| "loss": 1.1244, |
| "loss_ce": 1.1028004884719849, |
| "loss_region": 0.06001470610499382, |
| "loss_total": 1.1628152132034302, |
| "lr": 0.001067725254823478, |
| "router/selected_tokens_s0": 7429.75, |
| "router/selected_tokens_s1": 4263.25, |
| "step": 4360, |
| "tokens_trained": 14.283081368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2394156442805475, |
| "grad_norm": 0.200590580701828, |
| "loss": 1.1274, |
| "loss_ce": 1.0992459058761597, |
| "loss_region": 0.06001310050487518, |
| "loss_total": 1.1592589616775513, |
| "lr": 0.0010673183628396269, |
| "router/selected_tokens_s0": 7432.875, |
| "router/selected_tokens_s1": 4263.125, |
| "step": 4370, |
| "tokens_trained": 14.315846808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.242252322530317, |
| "grad_norm": 0.4970727860927582, |
| "loss": 1.1246, |
| "loss_ce": 1.1652504205703735, |
| "loss_region": 0.060001764446496964, |
| "loss_total": 1.2252521514892578, |
| "lr": 0.0010669114708557758, |
| "router/selected_tokens_s0": 7388.75, |
| "router/selected_tokens_s1": 4263.125, |
| "step": 4380, |
| "tokens_trained": 14.348612232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2450890007800866, |
| "grad_norm": 0.17079408466815948, |
| "loss": 1.1275, |
| "loss_ce": 1.1308954954147339, |
| "loss_region": 0.059982020407915115, |
| "loss_total": 1.1908775568008423, |
| "lr": 0.0010665045788719248, |
| "router/selected_tokens_s0": 7438.875, |
| "router/selected_tokens_s1": 4336.625, |
| "step": 4390, |
| "tokens_trained": 14.381377672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.247925679029856, |
| "grad_norm": 0.1912943720817566, |
| "loss": 1.1282, |
| "loss_ce": 1.0140893459320068, |
| "loss_region": 0.05996926501393318, |
| "loss_total": 1.0740586519241333, |
| "lr": 0.0010660976868880738, |
| "router/selected_tokens_s0": 7448.375, |
| "router/selected_tokens_s1": 4391.875, |
| "step": 4400, |
| "tokens_trained": 14.414143112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2507623572796256, |
| "grad_norm": 0.5349684953689575, |
| "loss": 1.1317, |
| "loss_ce": 1.0418589115142822, |
| "loss_region": 0.059975508600473404, |
| "loss_total": 1.1018344163894653, |
| "lr": 0.0010656907949042227, |
| "router/selected_tokens_s0": 7420.25, |
| "router/selected_tokens_s1": 4350.875, |
| "step": 4410, |
| "tokens_trained": 14.446908552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2535990355293951, |
| "grad_norm": 0.10011377930641174, |
| "loss": 1.1295, |
| "loss_ce": 1.14780592918396, |
| "loss_region": 0.0599743016064167, |
| "loss_total": 1.2077802419662476, |
| "lr": 0.0010652839029203717, |
| "router/selected_tokens_s0": 7343.375, |
| "router/selected_tokens_s1": 4315.875, |
| "step": 4420, |
| "tokens_trained": 14.479673992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2564357137791646, |
| "grad_norm": 0.13616910576820374, |
| "loss": 1.127, |
| "loss_ce": 1.0690678358078003, |
| "loss_region": 0.06000702828168869, |
| "loss_total": 1.1290748119354248, |
| "lr": 0.0010648770109365207, |
| "router/selected_tokens_s0": 7325.125, |
| "router/selected_tokens_s1": 4238.75, |
| "step": 4430, |
| "tokens_trained": 14.512439432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2592723920289342, |
| "grad_norm": 0.21678487956523895, |
| "loss": 1.1266, |
| "loss_ce": 1.1105207204818726, |
| "loss_region": 0.06007102504372597, |
| "loss_total": 1.1705917119979858, |
| "lr": 0.0010644701189526696, |
| "router/selected_tokens_s0": 7417.75, |
| "router/selected_tokens_s1": 4169.25, |
| "step": 4440, |
| "tokens_trained": 14.545204872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2621090702787037, |
| "grad_norm": 0.5876789093017578, |
| "loss": 1.127, |
| "loss_ce": 0.9410568475723267, |
| "loss_region": 0.06004181131720543, |
| "loss_total": 1.0010986328125, |
| "lr": 0.0010640632269688186, |
| "router/selected_tokens_s0": 7412.75, |
| "router/selected_tokens_s1": 4238.875, |
| "step": 4450, |
| "tokens_trained": 14.577970312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2649457485284732, |
| "grad_norm": 0.3269062936306, |
| "loss": 1.1279, |
| "loss_ce": 1.0947961807250977, |
| "loss_region": 0.0600116066634655, |
| "loss_total": 1.1548078060150146, |
| "lr": 0.0010636563349849678, |
| "router/selected_tokens_s0": 7457.625, |
| "router/selected_tokens_s1": 4283.0, |
| "step": 4460, |
| "tokens_trained": 14.610735752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2677824267782427, |
| "grad_norm": 0.1908155232667923, |
| "loss": 1.1287, |
| "loss_ce": 1.098473072052002, |
| "loss_region": 0.060026198625564575, |
| "loss_total": 1.1584992408752441, |
| "lr": 0.0010632494430011167, |
| "router/selected_tokens_s0": 7424.75, |
| "router/selected_tokens_s1": 4236.25, |
| "step": 4470, |
| "tokens_trained": 14.643501192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2706191050280122, |
| "grad_norm": 0.10397963970899582, |
| "loss": 1.1321, |
| "loss_ce": 1.036777138710022, |
| "loss_region": 0.060002975165843964, |
| "loss_total": 1.0967800617218018, |
| "lr": 0.0010628425510172657, |
| "router/selected_tokens_s0": 7400.5, |
| "router/selected_tokens_s1": 4276.0, |
| "step": 4480, |
| "tokens_trained": 14.676266632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2734557832777817, |
| "grad_norm": 0.21262697875499725, |
| "loss": 1.1309, |
| "loss_ce": 0.9618175029754639, |
| "loss_region": 0.0600084513425827, |
| "loss_total": 1.021825909614563, |
| "lr": 0.0010624356590334145, |
| "router/selected_tokens_s0": 7394.5, |
| "router/selected_tokens_s1": 4267.75, |
| "step": 4490, |
| "tokens_trained": 14.709032072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2762924615275513, |
| "grad_norm": 0.2513585686683655, |
| "loss": 1.117, |
| "loss_ce": 1.0686031579971313, |
| "loss_region": 0.060012537986040115, |
| "loss_total": 1.1286157369613647, |
| "lr": 0.0010620287670495634, |
| "router/selected_tokens_s0": 7436.625, |
| "router/selected_tokens_s1": 4269.125, |
| "step": 4500, |
| "tokens_trained": 14.741797512 |
| }, |
| { |
| "epoch": 1.2762924615275513, |
| "eval_ppl": 2.917957803788485, |
| "eval_runtime": 1.0395, |
| "step": 4500, |
| "tokens_trained": 14.741797512 |
| }, |
| { |
| "epoch": 1.2762924615275513, |
| "eval_F": 0.33461598562387684, |
| "eval_F_cds": 0.33259245680766236, |
| "eval_F_dig": 0.3207281818892101, |
| "eval_F_exon": 0.33861701681553236, |
| "eval_F_intron": 0.33541328126143716, |
| "eval_F_nig": 0.33607782526840685, |
| "eval_F_promoter": 0.3298154265345352, |
| "eval_F_utr": 0.33543765621376487, |
| "eval_G": 0.31760316662922883, |
| "eval_G_cds": 0.313791894490972, |
| "eval_G_dig": 0.29314698658098287, |
| "eval_G_exon": 0.32055345667169877, |
| "eval_G_intron": 0.3188114075381784, |
| "eval_G_nig": 0.31865636741806275, |
| "eval_G_promoter": 0.3128496054748008, |
| "eval_G_utr": 0.31821468602643715, |
| "eval_avg_bp_per_token": 2.988500379429105, |
| "eval_bp_per_token/cds": 3.006682741991044, |
| "eval_bp_per_token/dig": 3.117904993909866, |
| "eval_bp_per_token/exon": 2.9531888544891642, |
| "eval_bp_per_token/intron": 2.981396551261046, |
| "eval_bp_per_token/nig": 2.9755012821847293, |
| "eval_bp_per_token/promoter": 3.0319988682982033, |
| "eval_bp_per_token/utr": 2.981179904747273, |
| "eval_ppl_cds": 3.7123510478966413, |
| "eval_ppl_dig": 1.1330739061422115, |
| "eval_ppl_exon": 3.337367507183785, |
| "eval_ppl_intron": 2.9854555129266296, |
| "eval_ppl_nig": 2.7241976865988637, |
| "eval_ppl_promoter": 3.2884822422677225, |
| "eval_ppl_utr": 3.2793148726546266, |
| "step": 4500, |
| "tokens_trained": 14.741797512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2791291397773208, |
| "grad_norm": 0.24554239213466644, |
| "loss": 1.1286, |
| "loss_ce": 1.1632256507873535, |
| "loss_region": 0.05998421087861061, |
| "loss_total": 1.2232098579406738, |
| "lr": 0.0010616218750657124, |
| "router/selected_tokens_s0": 7387.5, |
| "router/selected_tokens_s1": 4308.375, |
| "step": 4510, |
| "tokens_trained": 14.774562152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2819658180270903, |
| "grad_norm": 0.07869572937488556, |
| "loss": 1.1184, |
| "loss_ce": 1.1050900220870972, |
| "loss_region": 0.059989046305418015, |
| "loss_total": 1.165079116821289, |
| "lr": 0.0010612149830818616, |
| "router/selected_tokens_s0": 7434.875, |
| "router/selected_tokens_s1": 4323.0, |
| "step": 4520, |
| "tokens_trained": 14.807327592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2848024962768598, |
| "grad_norm": 0.17654132843017578, |
| "loss": 1.1229, |
| "loss_ce": 1.0468547344207764, |
| "loss_region": 0.05998006463050842, |
| "loss_total": 1.1068347692489624, |
| "lr": 0.0010608080910980105, |
| "router/selected_tokens_s0": 7451.375, |
| "router/selected_tokens_s1": 4358.75, |
| "step": 4530, |
| "tokens_trained": 14.840092232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2876391745266293, |
| "grad_norm": 0.5160551071166992, |
| "loss": 1.1315, |
| "loss_ce": 1.0896406173706055, |
| "loss_region": 0.060032665729522705, |
| "loss_total": 1.1496732234954834, |
| "lr": 0.0010604011991141595, |
| "router/selected_tokens_s0": 7418.5, |
| "router/selected_tokens_s1": 4231.5, |
| "step": 4540, |
| "tokens_trained": 14.872854344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2904758527763989, |
| "grad_norm": 0.27445802092552185, |
| "loss": 1.1303, |
| "loss_ce": 1.1501026153564453, |
| "loss_region": 0.059976108372211456, |
| "loss_total": 1.2100787162780762, |
| "lr": 0.0010599943071303085, |
| "router/selected_tokens_s0": 7381.0, |
| "router/selected_tokens_s1": 4362.0, |
| "step": 4550, |
| "tokens_trained": 14.905619784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2933125310261684, |
| "grad_norm": 0.10401671379804611, |
| "loss": 1.1242, |
| "loss_ce": 1.12882399559021, |
| "loss_region": 0.059980615973472595, |
| "loss_total": 1.1888046264648438, |
| "lr": 0.0010595874151464574, |
| "router/selected_tokens_s0": 7415.25, |
| "router/selected_tokens_s1": 4340.5, |
| "step": 4560, |
| "tokens_trained": 14.938385224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.296149209275938, |
| "grad_norm": 0.22361941635608673, |
| "loss": 1.1184, |
| "loss_ce": 1.0420340299606323, |
| "loss_region": 0.05997908487915993, |
| "loss_total": 1.102013111114502, |
| "lr": 0.0010591805231626064, |
| "router/selected_tokens_s0": 7414.625, |
| "router/selected_tokens_s1": 4339.875, |
| "step": 4570, |
| "tokens_trained": 14.971149864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2989858875257074, |
| "grad_norm": 0.13880027830600739, |
| "loss": 1.1182, |
| "loss_ce": 1.1432592868804932, |
| "loss_region": 0.05998491123318672, |
| "loss_total": 1.2032442092895508, |
| "lr": 0.0010587736311787554, |
| "router/selected_tokens_s0": 7413.375, |
| "router/selected_tokens_s1": 4324.25, |
| "step": 4580, |
| "tokens_trained": 15.003915304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.301822565775477, |
| "grad_norm": 0.34181830286979675, |
| "loss": 1.1229, |
| "loss_ce": 1.0234683752059937, |
| "loss_region": 0.05998027324676514, |
| "loss_total": 1.0834486484527588, |
| "lr": 0.0010583667391949043, |
| "router/selected_tokens_s0": 7370.75, |
| "router/selected_tokens_s1": 4327.625, |
| "step": 4590, |
| "tokens_trained": 15.036680744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3046592440252465, |
| "grad_norm": 0.44472306966781616, |
| "loss": 1.1253, |
| "loss_ce": 1.101945400238037, |
| "loss_region": 0.05997232720255852, |
| "loss_total": 1.1619176864624023, |
| "lr": 0.0010579598472110533, |
| "router/selected_tokens_s0": 7432.125, |
| "router/selected_tokens_s1": 4372.75, |
| "step": 4600, |
| "tokens_trained": 15.069442824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.307495922275016, |
| "grad_norm": 0.16355791687965393, |
| "loss": 1.1188, |
| "loss_ce": 1.1986117362976074, |
| "loss_region": 0.06000068411231041, |
| "loss_total": 1.2586123943328857, |
| "lr": 0.0010575529552272023, |
| "router/selected_tokens_s0": 7469.875, |
| "router/selected_tokens_s1": 4313.25, |
| "step": 4610, |
| "tokens_trained": 15.102208264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3103326005247855, |
| "grad_norm": 0.22206738591194153, |
| "loss": 1.1225, |
| "loss_ce": 1.048006296157837, |
| "loss_region": 0.05998196825385094, |
| "loss_total": 1.1079882383346558, |
| "lr": 0.0010571460632433512, |
| "router/selected_tokens_s0": 7401.875, |
| "router/selected_tokens_s1": 4326.5, |
| "step": 4620, |
| "tokens_trained": 15.134972904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.313169278774555, |
| "grad_norm": 0.4577283263206482, |
| "loss": 1.1217, |
| "loss_ce": 0.9944411516189575, |
| "loss_region": 0.06000739336013794, |
| "loss_total": 1.0544486045837402, |
| "lr": 0.0010567391712595002, |
| "router/selected_tokens_s0": 7405.25, |
| "router/selected_tokens_s1": 4267.875, |
| "step": 4630, |
| "tokens_trained": 15.167735632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3160059570243245, |
| "grad_norm": 0.11892636120319366, |
| "loss": 1.1252, |
| "loss_ce": 1.0683937072753906, |
| "loss_region": 0.059995830059051514, |
| "loss_total": 1.128389596939087, |
| "lr": 0.0010563322792756492, |
| "router/selected_tokens_s0": 7362.75, |
| "router/selected_tokens_s1": 4270.875, |
| "step": 4640, |
| "tokens_trained": 15.200501072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.318842635274094, |
| "grad_norm": 0.22857649624347687, |
| "loss": 1.1228, |
| "loss_ce": 1.0523583889007568, |
| "loss_region": 0.05998574197292328, |
| "loss_total": 1.1123441457748413, |
| "lr": 0.0010559253872917981, |
| "router/selected_tokens_s0": 7456.75, |
| "router/selected_tokens_s1": 4359.25, |
| "step": 4650, |
| "tokens_trained": 15.233266512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3216793135238636, |
| "grad_norm": 0.23737908899784088, |
| "loss": 1.1186, |
| "loss_ce": 1.13929283618927, |
| "loss_region": 0.0599866583943367, |
| "loss_total": 1.199279546737671, |
| "lr": 0.001055518495307947, |
| "router/selected_tokens_s0": 7427.75, |
| "router/selected_tokens_s1": 4333.625, |
| "step": 4660, |
| "tokens_trained": 15.266028416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.324515991773633, |
| "grad_norm": 0.37915486097335815, |
| "loss": 1.1254, |
| "loss_ce": 1.09169602394104, |
| "loss_region": 0.06004900857806206, |
| "loss_total": 1.151745080947876, |
| "lr": 0.001055111603324096, |
| "router/selected_tokens_s0": 7402.75, |
| "router/selected_tokens_s1": 4227.625, |
| "step": 4670, |
| "tokens_trained": 15.298793832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3273526700234026, |
| "grad_norm": 0.3147301971912384, |
| "loss": 1.1198, |
| "loss_ce": 1.1839306354522705, |
| "loss_region": 0.060013432055711746, |
| "loss_total": 1.2439440488815308, |
| "lr": 0.001054704711340245, |
| "router/selected_tokens_s0": 7460.5, |
| "router/selected_tokens_s1": 4279.875, |
| "step": 4680, |
| "tokens_trained": 15.331559256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3301893482731721, |
| "grad_norm": 0.20523282885551453, |
| "loss": 1.1242, |
| "loss_ce": 1.0608105659484863, |
| "loss_region": 0.0599835105240345, |
| "loss_total": 1.1207940578460693, |
| "lr": 0.001054297819356394, |
| "router/selected_tokens_s0": 7370.25, |
| "router/selected_tokens_s1": 4307.375, |
| "step": 4690, |
| "tokens_trained": 15.364323896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3330260265229417, |
| "grad_norm": 0.4551692008972168, |
| "loss": 1.122, |
| "loss_ce": 1.038015604019165, |
| "loss_region": 0.0599852129817009, |
| "loss_total": 1.0980007648468018, |
| "lr": 0.0010538909273725432, |
| "router/selected_tokens_s0": 7438.0, |
| "router/selected_tokens_s1": 4342.25, |
| "step": 4700, |
| "tokens_trained": 15.397089336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3358627047727112, |
| "grad_norm": 0.17796026170253754, |
| "loss": 1.116, |
| "loss_ce": 1.0065257549285889, |
| "loss_region": 0.05998144671320915, |
| "loss_total": 1.0665072202682495, |
| "lr": 0.0010534840353886921, |
| "router/selected_tokens_s0": 7439.875, |
| "router/selected_tokens_s1": 4383.0, |
| "step": 4710, |
| "tokens_trained": 15.42985476 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3386993830224807, |
| "grad_norm": 0.22296547889709473, |
| "loss": 1.123, |
| "loss_ce": 1.0840928554534912, |
| "loss_region": 0.060023073107004166, |
| "loss_total": 1.144115924835205, |
| "lr": 0.001053077143404841, |
| "router/selected_tokens_s0": 7459.125, |
| "router/selected_tokens_s1": 4259.125, |
| "step": 4720, |
| "tokens_trained": 15.4626202 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3415360612722502, |
| "grad_norm": 0.13965415954589844, |
| "loss": 1.1186, |
| "loss_ce": 0.9718788862228394, |
| "loss_region": 0.06004929542541504, |
| "loss_total": 1.0319281816482544, |
| "lr": 0.0010526702514209898, |
| "router/selected_tokens_s0": 7428.875, |
| "router/selected_tokens_s1": 4207.0, |
| "step": 4730, |
| "tokens_trained": 15.49538484 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3443727395220197, |
| "grad_norm": 0.06705205887556076, |
| "loss": 1.1143, |
| "loss_ce": 1.0242143869400024, |
| "loss_region": 0.06001764535903931, |
| "loss_total": 1.0842320919036865, |
| "lr": 0.0010522633594371388, |
| "router/selected_tokens_s0": 7395.625, |
| "router/selected_tokens_s1": 4246.125, |
| "step": 4740, |
| "tokens_trained": 15.52815028 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3472094177717893, |
| "grad_norm": 0.22649919986724854, |
| "loss": 1.1177, |
| "loss_ce": 1.1338170766830444, |
| "loss_region": 0.06001260131597519, |
| "loss_total": 1.1938296556472778, |
| "lr": 0.0010518564674532878, |
| "router/selected_tokens_s0": 7391.125, |
| "router/selected_tokens_s1": 4239.0, |
| "step": 4750, |
| "tokens_trained": 15.56091572 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3500460960215588, |
| "grad_norm": 0.31113359332084656, |
| "loss": 1.123, |
| "loss_ce": 0.9169719815254211, |
| "loss_region": 0.06001540645956993, |
| "loss_total": 0.976987361907959, |
| "lr": 0.0010514495754694367, |
| "router/selected_tokens_s0": 7328.375, |
| "router/selected_tokens_s1": 4216.875, |
| "step": 4760, |
| "tokens_trained": 15.593678736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3528827742713283, |
| "grad_norm": 0.1546049863100052, |
| "loss": 1.1181, |
| "loss_ce": 1.0581512451171875, |
| "loss_region": 0.05999620631337166, |
| "loss_total": 1.1181474924087524, |
| "lr": 0.001051042683485586, |
| "router/selected_tokens_s0": 7396.625, |
| "router/selected_tokens_s1": 4291.375, |
| "step": 4770, |
| "tokens_trained": 15.626444176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3557194525210978, |
| "grad_norm": 0.2984902858734131, |
| "loss": 1.1199, |
| "loss_ce": 1.0760042667388916, |
| "loss_region": 0.060034509748220444, |
| "loss_total": 1.1360387802124023, |
| "lr": 0.0010506357915017349, |
| "router/selected_tokens_s0": 7484.25, |
| "router/selected_tokens_s1": 4248.375, |
| "step": 4780, |
| "tokens_trained": 15.659209616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3585561307708673, |
| "grad_norm": 0.15915615856647491, |
| "loss": 1.1209, |
| "loss_ce": 1.1233313083648682, |
| "loss_region": 0.060023460537195206, |
| "loss_total": 1.1833547353744507, |
| "lr": 0.0010502288995178838, |
| "router/selected_tokens_s0": 7426.25, |
| "router/selected_tokens_s1": 4239.625, |
| "step": 4790, |
| "tokens_trained": 15.691975056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3613928090206369, |
| "grad_norm": 0.4409286379814148, |
| "loss": 1.1238, |
| "loss_ce": 1.1545687913894653, |
| "loss_region": 0.06003446504473686, |
| "loss_total": 1.214603304862976, |
| "lr": 0.0010498220075340328, |
| "router/selected_tokens_s0": 7428.375, |
| "router/selected_tokens_s1": 4222.25, |
| "step": 4800, |
| "tokens_trained": 15.724740496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3642294872704064, |
| "grad_norm": 0.14395779371261597, |
| "loss": 1.1167, |
| "loss_ce": 1.1078091859817505, |
| "loss_region": 0.06001976132392883, |
| "loss_total": 1.167828917503357, |
| "lr": 0.0010494151155501818, |
| "router/selected_tokens_s0": 7381.5, |
| "router/selected_tokens_s1": 4220.125, |
| "step": 4810, |
| "tokens_trained": 15.757505936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.367066165520176, |
| "grad_norm": 0.07509951293468475, |
| "loss": 1.1117, |
| "loss_ce": 1.0311144590377808, |
| "loss_region": 0.05999474227428436, |
| "loss_total": 1.0911091566085815, |
| "lr": 0.0010490082235663307, |
| "router/selected_tokens_s0": 7421.625, |
| "router/selected_tokens_s1": 4303.625, |
| "step": 4820, |
| "tokens_trained": 15.790271376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3699028437699454, |
| "grad_norm": 0.33459046483039856, |
| "loss": 1.123, |
| "loss_ce": 1.0805909633636475, |
| "loss_region": 0.059987638145685196, |
| "loss_total": 1.1405786275863647, |
| "lr": 0.0010486013315824797, |
| "router/selected_tokens_s0": 7454.625, |
| "router/selected_tokens_s1": 4340.875, |
| "step": 4830, |
| "tokens_trained": 15.823036016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.372739522019715, |
| "grad_norm": 0.3175514340400696, |
| "loss": 1.116, |
| "loss_ce": 0.9817476868629456, |
| "loss_region": 0.06001373007893562, |
| "loss_total": 1.0417613983154297, |
| "lr": 0.0010481944395986287, |
| "router/selected_tokens_s0": 7536.375, |
| "router/selected_tokens_s1": 4333.25, |
| "step": 4840, |
| "tokens_trained": 15.855801456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3755762002694845, |
| "grad_norm": 0.18610598146915436, |
| "loss": 1.1122, |
| "loss_ce": 0.9771859049797058, |
| "loss_region": 0.059994395822286606, |
| "loss_total": 1.0371803045272827, |
| "lr": 0.0010477875476147776, |
| "router/selected_tokens_s0": 7416.5, |
| "router/selected_tokens_s1": 4305.75, |
| "step": 4850, |
| "tokens_trained": 15.888566096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.378412878519254, |
| "grad_norm": 0.2766318619251251, |
| "loss": 1.1136, |
| "loss_ce": 1.0846664905548096, |
| "loss_region": 0.059982847422361374, |
| "loss_total": 1.1446493864059448, |
| "lr": 0.0010473806556309266, |
| "router/selected_tokens_s0": 7447.5, |
| "router/selected_tokens_s1": 4378.25, |
| "step": 4860, |
| "tokens_trained": 15.921331536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3812495567690235, |
| "grad_norm": 0.236831933259964, |
| "loss": 1.1133, |
| "loss_ce": 1.09515380859375, |
| "loss_region": 0.06001046299934387, |
| "loss_total": 1.1551642417907715, |
| "lr": 0.0010469737636470756, |
| "router/selected_tokens_s0": 7429.125, |
| "router/selected_tokens_s1": 4270.375, |
| "step": 4870, |
| "tokens_trained": 15.954096176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.384086235018793, |
| "grad_norm": 0.38822853565216064, |
| "loss": 1.1163, |
| "loss_ce": 1.102951169013977, |
| "loss_region": 0.06000502035021782, |
| "loss_total": 1.1629562377929688, |
| "lr": 0.0010465668716632245, |
| "router/selected_tokens_s0": 7433.625, |
| "router/selected_tokens_s1": 4280.0, |
| "step": 4880, |
| "tokens_trained": 15.9868564 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3869229132685625, |
| "grad_norm": 0.14238017797470093, |
| "loss": 1.1136, |
| "loss_ce": 1.0466766357421875, |
| "loss_region": 0.06005201116204262, |
| "loss_total": 1.1067286729812622, |
| "lr": 0.0010461599796793735, |
| "router/selected_tokens_s0": 7479.375, |
| "router/selected_tokens_s1": 4220.5, |
| "step": 4890, |
| "tokens_trained": 16.019621784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.389759591518332, |
| "grad_norm": 0.24368639290332794, |
| "loss": 1.1171, |
| "loss_ce": 1.1112743616104126, |
| "loss_region": 0.059988003224134445, |
| "loss_total": 1.1712623834609985, |
| "lr": 0.0010457530876955225, |
| "router/selected_tokens_s0": 7440.125, |
| "router/selected_tokens_s1": 4345.0, |
| "step": 4900, |
| "tokens_trained": 16.052387224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3925962697681016, |
| "grad_norm": 0.16130021214485168, |
| "loss": 1.113, |
| "loss_ce": 0.9168754816055298, |
| "loss_region": 0.05999680235981941, |
| "loss_total": 0.9768722653388977, |
| "lr": 0.0010453461957116714, |
| "router/selected_tokens_s0": 7493.375, |
| "router/selected_tokens_s1": 4340.75, |
| "step": 4910, |
| "tokens_trained": 16.085152664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.395432948017871, |
| "grad_norm": 0.4398319125175476, |
| "loss": 1.1189, |
| "loss_ce": 0.9984895586967468, |
| "loss_region": 0.05999951437115669, |
| "loss_total": 1.0584890842437744, |
| "lr": 0.0010449393037278204, |
| "router/selected_tokens_s0": 7526.375, |
| "router/selected_tokens_s1": 4367.125, |
| "step": 4920, |
| "tokens_trained": 16.117918104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3982696262676406, |
| "grad_norm": 0.20883530378341675, |
| "loss": 1.1163, |
| "loss_ce": 0.99969083070755, |
| "loss_region": 0.0599936805665493, |
| "loss_total": 1.0596845149993896, |
| "lr": 0.0010445324117439694, |
| "router/selected_tokens_s0": 7413.75, |
| "router/selected_tokens_s1": 4302.0, |
| "step": 4930, |
| "tokens_trained": 16.150683544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4011063045174101, |
| "grad_norm": 0.6086301207542419, |
| "loss": 1.1135, |
| "loss_ce": 1.027841567993164, |
| "loss_region": 0.05998702719807625, |
| "loss_total": 1.0878286361694336, |
| "lr": 0.0010441255197601183, |
| "router/selected_tokens_s0": 7387.25, |
| "router/selected_tokens_s1": 4316.875, |
| "step": 4940, |
| "tokens_trained": 16.183448184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4039429827671797, |
| "grad_norm": 0.047434039413928986, |
| "loss": 1.1168, |
| "loss_ce": 1.03725266456604, |
| "loss_region": 0.06001438945531845, |
| "loss_total": 1.0972670316696167, |
| "lr": 0.0010437186277762675, |
| "router/selected_tokens_s0": 7409.0, |
| "router/selected_tokens_s1": 4250.125, |
| "step": 4950, |
| "tokens_trained": 16.216210576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4067796610169492, |
| "grad_norm": 0.12742455303668976, |
| "loss": 1.1131, |
| "loss_ce": 1.1178278923034668, |
| "loss_region": 0.060008853673934937, |
| "loss_total": 1.1778367757797241, |
| "lr": 0.0010433117357924165, |
| "router/selected_tokens_s0": 7387.75, |
| "router/selected_tokens_s1": 4253.0, |
| "step": 4960, |
| "tokens_trained": 16.248975216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4096163392667187, |
| "grad_norm": 0.46089595556259155, |
| "loss": 1.1148, |
| "loss_ce": 1.0517332553863525, |
| "loss_region": 0.0600130595266819, |
| "loss_total": 1.1117463111877441, |
| "lr": 0.0010429048438085654, |
| "router/selected_tokens_s0": 7444.375, |
| "router/selected_tokens_s1": 4266.125, |
| "step": 4970, |
| "tokens_trained": 16.281739056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4124530175164882, |
| "grad_norm": 0.16032390296459198, |
| "loss": 1.1141, |
| "loss_ce": 1.0725197792053223, |
| "loss_region": 0.060011573135852814, |
| "loss_total": 1.1325314044952393, |
| "lr": 0.0010424979518247142, |
| "router/selected_tokens_s0": 7395.875, |
| "router/selected_tokens_s1": 4244.375, |
| "step": 4980, |
| "tokens_trained": 16.314504496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4152896957662577, |
| "grad_norm": 0.2698926627635956, |
| "loss": 1.114, |
| "loss_ce": 0.9691027998924255, |
| "loss_region": 0.06001393124461174, |
| "loss_total": 1.0291167497634888, |
| "lr": 0.0010420910598408631, |
| "router/selected_tokens_s0": 7416.875, |
| "router/selected_tokens_s1": 4247.125, |
| "step": 4990, |
| "tokens_trained": 16.347269936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4181263740160273, |
| "grad_norm": 0.29608723521232605, |
| "loss": 1.1139, |
| "loss_ce": 1.0969948768615723, |
| "loss_region": 0.06001259386539459, |
| "loss_total": 1.1570074558258057, |
| "lr": 0.0010416841678570121, |
| "router/selected_tokens_s0": 7368.25, |
| "router/selected_tokens_s1": 4259.5, |
| "step": 5000, |
| "tokens_trained": 16.380034576 |
| }, |
| { |
| "epoch": 1.4181263740160273, |
| "eval_ppl": 2.8878776092072367, |
| "eval_runtime": 1.0117, |
| "step": 5000, |
| "tokens_trained": 16.380034576 |
| }, |
| { |
| "epoch": 1.4181263740160273, |
| "eval_F": 0.3334707138578535, |
| "eval_F_cds": 0.3258558269556849, |
| "eval_F_dig": 0.3043987811547777, |
| "eval_F_exon": 0.3379041388853944, |
| "eval_F_intron": 0.33496283233277363, |
| "eval_F_nig": 0.33443367424820636, |
| "eval_F_promoter": 0.32823376817274136, |
| "eval_F_utr": 0.33587569893581387, |
| "eval_G": 0.32152487499023363, |
| "eval_G_cds": 0.31537235847921724, |
| "eval_G_dig": 0.28879102493358855, |
| "eval_G_exon": 0.32521681186312745, |
| "eval_G_intron": 0.3229452310232905, |
| "eval_G_nig": 0.3225945309816726, |
| "eval_G_promoter": 0.3164494893582153, |
| "eval_G_utr": 0.3238339528202221, |
| "eval_avg_bp_per_token": 2.998764084651415, |
| "eval_bp_per_token/cds": 3.0688418535967936, |
| "eval_bp_per_token/dig": 3.2851642710472277, |
| "eval_bp_per_token/exon": 2.9594192107222637, |
| "eval_bp_per_token/intron": 2.985405852451521, |
| "eval_bp_per_token/nig": 2.9901295144633995, |
| "eval_bp_per_token/promoter": 3.04660914557007, |
| "eval_bp_per_token/utr": 2.9772919064058305, |
| "eval_ppl_cds": 3.709566657145957, |
| "eval_ppl_dig": 1.123842032296959, |
| "eval_ppl_exon": 3.3107114360355596, |
| "eval_ppl_intron": 2.957223274787988, |
| "eval_ppl_nig": 2.6887659433495315, |
| "eval_ppl_promoter": 3.2651215439300434, |
| "eval_ppl_utr": 3.2607755780543872, |
| "step": 5000, |
| "tokens_trained": 16.380034576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4209630522657968, |
| "grad_norm": 0.13509275019168854, |
| "loss": 1.1122, |
| "loss_ce": 1.0823110342025757, |
| "loss_region": 0.06001218780875206, |
| "loss_total": 1.1423232555389404, |
| "lr": 0.001041277275873161, |
| "router/selected_tokens_s0": 7402.875, |
| "router/selected_tokens_s1": 4253.0, |
| "step": 5010, |
| "tokens_trained": 16.412800016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4237997305155663, |
| "grad_norm": 0.46688932180404663, |
| "loss": 1.1176, |
| "loss_ce": 1.067285418510437, |
| "loss_region": 0.0600067637860775, |
| "loss_total": 1.1272921562194824, |
| "lr": 0.0010408703838893103, |
| "router/selected_tokens_s0": 7453.125, |
| "router/selected_tokens_s1": 4284.0, |
| "step": 5020, |
| "tokens_trained": 16.445565456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4266364087653358, |
| "grad_norm": 0.27795660495758057, |
| "loss": 1.1091, |
| "loss_ce": 1.0628243684768677, |
| "loss_region": 0.059996675699949265, |
| "loss_total": 1.1228210926055908, |
| "lr": 0.0010404634919054592, |
| "router/selected_tokens_s0": 7427.25, |
| "router/selected_tokens_s1": 4300.875, |
| "step": 5030, |
| "tokens_trained": 16.478329936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4294730870151053, |
| "grad_norm": 0.14971427619457245, |
| "loss": 1.1043, |
| "loss_ce": 1.1450423002243042, |
| "loss_region": 0.059983156621456146, |
| "loss_total": 1.2050254344940186, |
| "lr": 0.0010400565999216082, |
| "router/selected_tokens_s0": 7437.25, |
| "router/selected_tokens_s1": 4375.0, |
| "step": 5040, |
| "tokens_trained": 16.511090344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4323097652648749, |
| "grad_norm": 0.15365780889987946, |
| "loss": 1.1052, |
| "loss_ce": 1.002153754234314, |
| "loss_region": 0.059985172003507614, |
| "loss_total": 1.0621389150619507, |
| "lr": 0.0010396497079377572, |
| "router/selected_tokens_s0": 7341.625, |
| "router/selected_tokens_s1": 4392.375, |
| "step": 5050, |
| "tokens_trained": 16.543855784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4351464435146444, |
| "grad_norm": 0.30252763628959656, |
| "loss": 1.1175, |
| "loss_ce": 1.1226723194122314, |
| "loss_region": 0.059989143162965775, |
| "loss_total": 1.1826614141464233, |
| "lr": 0.0010392428159539061, |
| "router/selected_tokens_s0": 7409.625, |
| "router/selected_tokens_s1": 4329.875, |
| "step": 5060, |
| "tokens_trained": 16.576621224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.437983121764414, |
| "grad_norm": 0.13629955053329468, |
| "loss": 1.1122, |
| "loss_ce": 0.9620881676673889, |
| "loss_region": 0.06000163033604622, |
| "loss_total": 1.0220898389816284, |
| "lr": 0.001038835923970055, |
| "router/selected_tokens_s0": 7431.25, |
| "router/selected_tokens_s1": 4287.375, |
| "step": 5070, |
| "tokens_trained": 16.609386664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4408198000141834, |
| "grad_norm": 0.35703352093696594, |
| "loss": 1.1165, |
| "loss_ce": 0.8642436861991882, |
| "loss_region": 0.06000054255127907, |
| "loss_total": 0.924244225025177, |
| "lr": 0.001038429031986204, |
| "router/selected_tokens_s0": 7412.125, |
| "router/selected_tokens_s1": 4284.0, |
| "step": 5080, |
| "tokens_trained": 16.642152104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.443656478263953, |
| "grad_norm": 0.0827016830444336, |
| "loss": 1.1121, |
| "loss_ce": 1.0974977016448975, |
| "loss_region": 0.060001373291015625, |
| "loss_total": 1.157499074935913, |
| "lr": 0.001038022140002353, |
| "router/selected_tokens_s0": 7450.625, |
| "router/selected_tokens_s1": 4308.0, |
| "step": 5090, |
| "tokens_trained": 16.674914848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4464931565137225, |
| "grad_norm": 0.1692710518836975, |
| "loss": 1.1067, |
| "loss_ce": 1.1373043060302734, |
| "loss_region": 0.06000300124287605, |
| "loss_total": 1.1973073482513428, |
| "lr": 0.001037615248018502, |
| "router/selected_tokens_s0": 7456.625, |
| "router/selected_tokens_s1": 4291.75, |
| "step": 5100, |
| "tokens_trained": 16.707680128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.449329834763492, |
| "grad_norm": 0.2739175856113434, |
| "loss": 1.1093, |
| "loss_ce": 1.1393697261810303, |
| "loss_region": 0.059991370886564255, |
| "loss_total": 1.1993610858917236, |
| "lr": 0.001037208356034651, |
| "router/selected_tokens_s0": 7455.125, |
| "router/selected_tokens_s1": 4355.375, |
| "step": 5110, |
| "tokens_trained": 16.740445568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4521665130132615, |
| "grad_norm": 0.30162233114242554, |
| "loss": 1.1123, |
| "loss_ce": 1.1314183473587036, |
| "loss_region": 0.059990085661411285, |
| "loss_total": 1.191408395767212, |
| "lr": 0.0010368014640508, |
| "router/selected_tokens_s0": 7427.5, |
| "router/selected_tokens_s1": 4322.125, |
| "step": 5120, |
| "tokens_trained": 16.773211008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.455003191263031, |
| "grad_norm": 0.0802861824631691, |
| "loss": 1.1163, |
| "loss_ce": 1.0108295679092407, |
| "loss_region": 0.06001696363091469, |
| "loss_total": 1.0708465576171875, |
| "lr": 0.0010363945720669489, |
| "router/selected_tokens_s0": 7423.125, |
| "router/selected_tokens_s1": 4250.75, |
| "step": 5130, |
| "tokens_trained": 16.805971784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4578398695128005, |
| "grad_norm": 0.1599212884902954, |
| "loss": 1.112, |
| "loss_ce": 1.0099071264266968, |
| "loss_region": 0.05999860167503357, |
| "loss_total": 1.0699057579040527, |
| "lr": 0.0010359876800830978, |
| "router/selected_tokens_s0": 7411.0, |
| "router/selected_tokens_s1": 4282.875, |
| "step": 5140, |
| "tokens_trained": 16.838737224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.46067654776257, |
| "grad_norm": 0.1863497495651245, |
| "loss": 1.1069, |
| "loss_ce": 1.0711653232574463, |
| "loss_region": 0.05999632179737091, |
| "loss_total": 1.1311616897583008, |
| "lr": 0.0010355807880992468, |
| "router/selected_tokens_s0": 7435.375, |
| "router/selected_tokens_s1": 4302.75, |
| "step": 5150, |
| "tokens_trained": 16.871502664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4635132260123396, |
| "grad_norm": 0.22350755333900452, |
| "loss": 1.1053, |
| "loss_ce": 1.1011829376220703, |
| "loss_region": 0.05999854579567909, |
| "loss_total": 1.1611814498901367, |
| "lr": 0.0010351738961153958, |
| "router/selected_tokens_s0": 7417.875, |
| "router/selected_tokens_s1": 4287.25, |
| "step": 5160, |
| "tokens_trained": 16.904268104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.466349904262109, |
| "grad_norm": 0.10094260424375534, |
| "loss": 1.1102, |
| "loss_ce": 1.0678144693374634, |
| "loss_region": 0.059994544833898544, |
| "loss_total": 1.1278090476989746, |
| "lr": 0.0010347670041315447, |
| "router/selected_tokens_s0": 7475.125, |
| "router/selected_tokens_s1": 4293.0, |
| "step": 5170, |
| "tokens_trained": 16.937030128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4691865825118786, |
| "grad_norm": 0.16106005012989044, |
| "loss": 1.1087, |
| "loss_ce": 1.0874272584915161, |
| "loss_region": 0.05999764800071716, |
| "loss_total": 1.1474249362945557, |
| "lr": 0.0010343601121476937, |
| "router/selected_tokens_s0": 7448.125, |
| "router/selected_tokens_s1": 4309.875, |
| "step": 5180, |
| "tokens_trained": 16.969795184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4720232607616481, |
| "grad_norm": 0.2280910164117813, |
| "loss": 1.1116, |
| "loss_ce": 1.0265172719955444, |
| "loss_region": 0.05999668687582016, |
| "loss_total": 1.0865139961242676, |
| "lr": 0.0010339532201638427, |
| "router/selected_tokens_s0": 7427.25, |
| "router/selected_tokens_s1": 4310.875, |
| "step": 5190, |
| "tokens_trained": 17.002560624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4748599390114177, |
| "grad_norm": 0.20116379857063293, |
| "loss": 1.1153, |
| "loss_ce": 1.101824164390564, |
| "loss_region": 0.05999062955379486, |
| "loss_total": 1.16181480884552, |
| "lr": 0.0010335463281799918, |
| "router/selected_tokens_s0": 7408.5, |
| "router/selected_tokens_s1": 4336.5, |
| "step": 5200, |
| "tokens_trained": 17.035326064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4776966172611872, |
| "grad_norm": 0.18408538401126862, |
| "loss": 1.1055, |
| "loss_ce": 0.9988734126091003, |
| "loss_region": 0.06001979857683182, |
| "loss_total": 1.0588932037353516, |
| "lr": 0.0010331394361961408, |
| "router/selected_tokens_s0": 7452.5, |
| "router/selected_tokens_s1": 4255.625, |
| "step": 5210, |
| "tokens_trained": 17.068091504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4805332955109567, |
| "grad_norm": 0.23415856063365936, |
| "loss": 1.1015, |
| "loss_ce": 1.020708680152893, |
| "loss_region": 0.06000715121626854, |
| "loss_total": 1.0807157754898071, |
| "lr": 0.0010327325442122898, |
| "router/selected_tokens_s0": 7493.25, |
| "router/selected_tokens_s1": 4307.375, |
| "step": 5220, |
| "tokens_trained": 17.100856944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4833699737607262, |
| "grad_norm": 0.06807141751050949, |
| "loss": 1.1108, |
| "loss_ce": 1.0629068613052368, |
| "loss_region": 0.0599919855594635, |
| "loss_total": 1.122898817062378, |
| "lr": 0.0010323256522284385, |
| "router/selected_tokens_s0": 7458.75, |
| "router/selected_tokens_s1": 4347.375, |
| "step": 5230, |
| "tokens_trained": 17.133622384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4862066520104957, |
| "grad_norm": 0.1424221247434616, |
| "loss": 1.1025, |
| "loss_ce": 1.0725303888320923, |
| "loss_region": 0.05999269708991051, |
| "loss_total": 1.1325230598449707, |
| "lr": 0.0010319187602445875, |
| "router/selected_tokens_s0": 7437.625, |
| "router/selected_tokens_s1": 4324.625, |
| "step": 5240, |
| "tokens_trained": 17.166385424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4890433302602653, |
| "grad_norm": 0.3532690703868866, |
| "loss": 1.1109, |
| "loss_ce": 0.8595598340034485, |
| "loss_region": 0.06000366061925888, |
| "loss_total": 0.9195634722709656, |
| "lr": 0.0010315118682607365, |
| "router/selected_tokens_s0": 7354.625, |
| "router/selected_tokens_s1": 4278.875, |
| "step": 5250, |
| "tokens_trained": 17.199150864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4918800085100348, |
| "grad_norm": 0.22460265457630157, |
| "loss": 1.1129, |
| "loss_ce": 1.0128638744354248, |
| "loss_region": 0.06000963971018791, |
| "loss_total": 1.0728734731674194, |
| "lr": 0.0010311049762768854, |
| "router/selected_tokens_s0": 7375.25, |
| "router/selected_tokens_s1": 4243.25, |
| "step": 5260, |
| "tokens_trained": 17.231916304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4947166867598043, |
| "grad_norm": 0.1373329609632492, |
| "loss": 1.1099, |
| "loss_ce": 1.0595307350158691, |
| "loss_region": 0.05999409779906273, |
| "loss_total": 1.1195248365402222, |
| "lr": 0.0010306980842930346, |
| "router/selected_tokens_s0": 7399.875, |
| "router/selected_tokens_s1": 4312.75, |
| "step": 5270, |
| "tokens_trained": 17.264676432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4975533650095738, |
| "grad_norm": 0.21981820464134216, |
| "loss": 1.1031, |
| "loss_ce": 0.9644787311553955, |
| "loss_region": 0.06000012904405594, |
| "loss_total": 1.0244789123535156, |
| "lr": 0.0010302911923091836, |
| "router/selected_tokens_s0": 7390.625, |
| "router/selected_tokens_s1": 4275.375, |
| "step": 5280, |
| "tokens_trained": 17.297440272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5003900432593433, |
| "grad_norm": 0.2568603754043579, |
| "loss": 1.1132, |
| "loss_ce": 1.0996148586273193, |
| "loss_region": 0.05999647080898285, |
| "loss_total": 1.1596113443374634, |
| "lr": 0.0010298843003253325, |
| "router/selected_tokens_s0": 7425.0, |
| "router/selected_tokens_s1": 4308.75, |
| "step": 5290, |
| "tokens_trained": 17.330205712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5032267215091129, |
| "grad_norm": 0.15437553822994232, |
| "loss": 1.1087, |
| "loss_ce": 1.082763910293579, |
| "loss_region": 0.060007624328136444, |
| "loss_total": 1.1427714824676514, |
| "lr": 0.0010294774083414815, |
| "router/selected_tokens_s0": 7454.5, |
| "router/selected_tokens_s1": 4281.75, |
| "step": 5300, |
| "tokens_trained": 17.362971152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5060633997588824, |
| "grad_norm": 0.1503092348575592, |
| "loss": 1.1118, |
| "loss_ce": 1.1156936883926392, |
| "loss_region": 0.060016077011823654, |
| "loss_total": 1.1757097244262695, |
| "lr": 0.0010290705163576305, |
| "router/selected_tokens_s0": 7488.25, |
| "router/selected_tokens_s1": 4275.375, |
| "step": 5310, |
| "tokens_trained": 17.395736592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.508900078008652, |
| "grad_norm": 0.2845729887485504, |
| "loss": 1.1114, |
| "loss_ce": 1.0718103647232056, |
| "loss_region": 0.06000002846121788, |
| "loss_total": 1.1318104267120361, |
| "lr": 0.0010286636243737794, |
| "router/selected_tokens_s0": 7451.375, |
| "router/selected_tokens_s1": 4305.5, |
| "step": 5320, |
| "tokens_trained": 17.428502032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5117367562584214, |
| "grad_norm": 0.2394387274980545, |
| "loss": 1.1064, |
| "loss_ce": 1.067128300666809, |
| "loss_region": 0.06001432612538338, |
| "loss_total": 1.1271426677703857, |
| "lr": 0.0010282567323899284, |
| "router/selected_tokens_s0": 7442.375, |
| "router/selected_tokens_s1": 4261.125, |
| "step": 5330, |
| "tokens_trained": 17.461262816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.514573434508191, |
| "grad_norm": 0.17551490664482117, |
| "loss": 1.1077, |
| "loss_ce": 0.9975200891494751, |
| "loss_region": 0.06000635772943497, |
| "loss_total": 1.0575264692306519, |
| "lr": 0.0010278498404060774, |
| "router/selected_tokens_s0": 7472.0, |
| "router/selected_tokens_s1": 4289.375, |
| "step": 5340, |
| "tokens_trained": 17.494028256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5174101127579604, |
| "grad_norm": 0.14462608098983765, |
| "loss": 1.1143, |
| "loss_ce": 1.0838935375213623, |
| "loss_region": 0.06000163033604622, |
| "loss_total": 1.143895149230957, |
| "lr": 0.0010274429484222263, |
| "router/selected_tokens_s0": 7453.5, |
| "router/selected_tokens_s1": 4294.125, |
| "step": 5350, |
| "tokens_trained": 17.526793696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.52024679100773, |
| "grad_norm": 0.11604762822389603, |
| "loss": 1.1096, |
| "loss_ce": 0.9876978993415833, |
| "loss_region": 0.05999482050538063, |
| "loss_total": 1.0476927757263184, |
| "lr": 0.0010270360564383753, |
| "router/selected_tokens_s0": 7393.625, |
| "router/selected_tokens_s1": 4336.25, |
| "step": 5360, |
| "tokens_trained": 17.559559136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5230834692574995, |
| "grad_norm": 0.19762036204338074, |
| "loss": 1.1052, |
| "loss_ce": 1.0647521018981934, |
| "loss_region": 0.05999292433261871, |
| "loss_total": 1.1247450113296509, |
| "lr": 0.0010266291644545243, |
| "router/selected_tokens_s0": 7445.5, |
| "router/selected_tokens_s1": 4347.625, |
| "step": 5370, |
| "tokens_trained": 17.59232456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.525920147507269, |
| "grad_norm": 0.12396788597106934, |
| "loss": 1.1071, |
| "loss_ce": 0.9899451732635498, |
| "loss_region": 0.0599982813000679, |
| "loss_total": 1.049943447113037, |
| "lr": 0.0010262222724706732, |
| "router/selected_tokens_s0": 7417.0, |
| "router/selected_tokens_s1": 4315.375, |
| "step": 5380, |
| "tokens_trained": 17.6250892 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5287568257570385, |
| "grad_norm": 0.26794371008872986, |
| "loss": 1.1092, |
| "loss_ce": 1.0269250869750977, |
| "loss_region": 0.059988874942064285, |
| "loss_total": 1.0869139432907104, |
| "lr": 0.0010258153804868222, |
| "router/selected_tokens_s0": 7418.625, |
| "router/selected_tokens_s1": 4356.25, |
| "step": 5390, |
| "tokens_trained": 17.65785464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.531593504006808, |
| "grad_norm": 0.13411357998847961, |
| "loss": 1.1132, |
| "loss_ce": 1.1071951389312744, |
| "loss_region": 0.05999772250652313, |
| "loss_total": 1.167192816734314, |
| "lr": 0.0010254084885029712, |
| "router/selected_tokens_s0": 7403.5, |
| "router/selected_tokens_s1": 4374.0, |
| "step": 5400, |
| "tokens_trained": 17.69062008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5344301822565776, |
| "grad_norm": 0.10997501015663147, |
| "loss": 1.1052, |
| "loss_ce": 1.0851551294326782, |
| "loss_region": 0.05999771133065224, |
| "loss_total": 1.1451528072357178, |
| "lr": 0.0010250015965191201, |
| "router/selected_tokens_s0": 7394.0, |
| "router/selected_tokens_s1": 4301.125, |
| "step": 5410, |
| "tokens_trained": 17.72338552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.537266860506347, |
| "grad_norm": 0.1932307481765747, |
| "loss": 1.1071, |
| "loss_ce": 1.1663951873779297, |
| "loss_region": 0.06001700833439827, |
| "loss_total": 1.2264121770858765, |
| "lr": 0.001024594704535269, |
| "router/selected_tokens_s0": 7470.125, |
| "router/selected_tokens_s1": 4251.375, |
| "step": 5420, |
| "tokens_trained": 17.75615016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5401035387561166, |
| "grad_norm": 0.18134096264839172, |
| "loss": 1.1095, |
| "loss_ce": 1.0441800355911255, |
| "loss_region": 0.060025617480278015, |
| "loss_total": 1.10420560836792, |
| "lr": 0.001024187812551418, |
| "router/selected_tokens_s0": 7487.75, |
| "router/selected_tokens_s1": 4252.375, |
| "step": 5430, |
| "tokens_trained": 17.78891464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5429402170058861, |
| "grad_norm": 0.11615798622369766, |
| "loss": 1.1043, |
| "loss_ce": 1.0413671731948853, |
| "loss_region": 0.06000577658414841, |
| "loss_total": 1.1013729572296143, |
| "lr": 0.001023780920567567, |
| "router/selected_tokens_s0": 7445.75, |
| "router/selected_tokens_s1": 4278.125, |
| "step": 5440, |
| "tokens_trained": 17.821677712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5457768952556556, |
| "grad_norm": 0.1413813829421997, |
| "loss": 1.1027, |
| "loss_ce": 1.124322772026062, |
| "loss_region": 0.05999701842665672, |
| "loss_total": 1.1843197345733643, |
| "lr": 0.0010233740285837162, |
| "router/selected_tokens_s0": 7456.5, |
| "router/selected_tokens_s1": 4312.0, |
| "step": 5450, |
| "tokens_trained": 17.854443152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5486135735054252, |
| "grad_norm": 0.2357422411441803, |
| "loss": 1.1088, |
| "loss_ce": 1.029032826423645, |
| "loss_region": 0.06000213697552681, |
| "loss_total": 1.089034914970398, |
| "lr": 0.0010229671365998652, |
| "router/selected_tokens_s0": 7351.875, |
| "router/selected_tokens_s1": 4269.75, |
| "step": 5460, |
| "tokens_trained": 17.88720588 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5514502517551947, |
| "grad_norm": 0.3043672740459442, |
| "loss": 1.1069, |
| "loss_ce": 1.0109539031982422, |
| "loss_region": 0.06000383198261261, |
| "loss_total": 1.0709577798843384, |
| "lr": 0.0010225602446160141, |
| "router/selected_tokens_s0": 7402.125, |
| "router/selected_tokens_s1": 4259.875, |
| "step": 5470, |
| "tokens_trained": 17.91997132 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5542869300049642, |
| "grad_norm": 0.21447394788265228, |
| "loss": 1.0999, |
| "loss_ce": 0.898987352848053, |
| "loss_region": 0.059998832643032074, |
| "loss_total": 0.9589861631393433, |
| "lr": 0.0010221533526321629, |
| "router/selected_tokens_s0": 7391.75, |
| "router/selected_tokens_s1": 4311.25, |
| "step": 5480, |
| "tokens_trained": 17.952736472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5571236082547337, |
| "grad_norm": 0.12158813327550888, |
| "loss": 1.1054, |
| "loss_ce": 1.0969467163085938, |
| "loss_region": 0.06000404804944992, |
| "loss_total": 1.1569507122039795, |
| "lr": 0.0010217464606483118, |
| "router/selected_tokens_s0": 7385.25, |
| "router/selected_tokens_s1": 4268.75, |
| "step": 5490, |
| "tokens_trained": 17.985498936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5599602865045032, |
| "grad_norm": 0.16777564585208893, |
| "loss": 1.1088, |
| "loss_ce": 1.074271559715271, |
| "loss_region": 0.05999928340315819, |
| "loss_total": 1.1342707872390747, |
| "lr": 0.0010213395686644608, |
| "router/selected_tokens_s0": 7404.625, |
| "router/selected_tokens_s1": 4339.0, |
| "step": 5500, |
| "tokens_trained": 18.018264376 |
| }, |
| { |
| "epoch": 1.5599602865045032, |
| "eval_ppl": 2.8623211345020083, |
| "eval_runtime": 1.0594, |
| "step": 5500, |
| "tokens_trained": 18.018264376 |
| }, |
| { |
| "epoch": 1.5599602865045032, |
| "eval_F": 0.3352866109331458, |
| "eval_F_cds": 0.332042527840154, |
| "eval_F_dig": 0.3194780842253301, |
| "eval_F_exon": 0.3372331949511469, |
| "eval_F_intron": 0.33585922570081406, |
| "eval_F_nig": 0.3355467358672976, |
| "eval_F_promoter": 0.3334546404643264, |
| "eval_F_utr": 0.3378597747945064, |
| "eval_G": 0.3283424776227635, |
| "eval_G_cds": 0.3256197978724623, |
| "eval_G_dig": 0.2966016937358387, |
| "eval_G_exon": 0.33176564072524845, |
| "eval_G_intron": 0.3290947716744763, |
| "eval_G_nig": 0.32887621721238486, |
| "eval_G_promoter": 0.32587681763864007, |
| "eval_G_utr": 0.33184780954482207, |
| "eval_avg_bp_per_token": 2.9825229144011187, |
| "eval_bp_per_token/cds": 3.0116624111517494, |
| "eval_bp_per_token/dig": 3.1301051601858645, |
| "eval_bp_per_token/exon": 2.965307137527978, |
| "eval_bp_per_token/intron": 2.9774379367229518, |
| "eval_bp_per_token/nig": 2.980210781712033, |
| "eval_bp_per_token/promoter": 2.998908632992864, |
| "eval_bp_per_token/utr": 2.9598078096400244, |
| "eval_ppl_cds": 3.5822509806281193, |
| "eval_ppl_dig": 1.121855248822277, |
| "eval_ppl_exon": 3.2827148915254987, |
| "eval_ppl_intron": 2.9368199473955423, |
| "eval_ppl_nig": 2.662848735088766, |
| "eval_ppl_promoter": 3.249090164143398, |
| "eval_ppl_utr": 3.2504448948002427, |
| "step": 5500, |
| "tokens_trained": 18.018264376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5627969647542728, |
| "grad_norm": 0.23786824941635132, |
| "loss": 1.1033, |
| "loss_ce": 1.0256643295288086, |
| "loss_region": 0.059994738548994064, |
| "loss_total": 1.0856590270996094, |
| "lr": 0.0010209326766806098, |
| "router/selected_tokens_s0": 7439.875, |
| "router/selected_tokens_s1": 4329.375, |
| "step": 5510, |
| "tokens_trained": 18.051029816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5656336430040423, |
| "grad_norm": 0.06368014961481094, |
| "loss": 1.1021, |
| "loss_ce": 1.013634204864502, |
| "loss_region": 0.05999471992254257, |
| "loss_total": 1.0736289024353027, |
| "lr": 0.001020525784696759, |
| "router/selected_tokens_s0": 7432.125, |
| "router/selected_tokens_s1": 4334.125, |
| "step": 5520, |
| "tokens_trained": 18.083794456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5684703212538118, |
| "grad_norm": 0.20794622600078583, |
| "loss": 1.0995, |
| "loss_ce": 0.9896392226219177, |
| "loss_region": 0.06000121310353279, |
| "loss_total": 1.049640417098999, |
| "lr": 0.001020118892712908, |
| "router/selected_tokens_s0": 7468.0, |
| "router/selected_tokens_s1": 4308.75, |
| "step": 5530, |
| "tokens_trained": 18.116559896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5713069995035813, |
| "grad_norm": 0.10307029634714127, |
| "loss": 1.1022, |
| "loss_ce": 1.1249936819076538, |
| "loss_region": 0.059996627271175385, |
| "loss_total": 1.1849902868270874, |
| "lr": 0.0010197120007290569, |
| "router/selected_tokens_s0": 7442.5, |
| "router/selected_tokens_s1": 4306.125, |
| "step": 5540, |
| "tokens_trained": 18.149325336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5741436777533508, |
| "grad_norm": 0.10810263454914093, |
| "loss": 1.1039, |
| "loss_ce": 1.0132877826690674, |
| "loss_region": 0.0599999763071537, |
| "loss_total": 1.0732877254486084, |
| "lr": 0.0010193051087452058, |
| "router/selected_tokens_s0": 7422.25, |
| "router/selected_tokens_s1": 4291.875, |
| "step": 5550, |
| "tokens_trained": 18.182090776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5769803560031204, |
| "grad_norm": 0.1716887205839157, |
| "loss": 1.1052, |
| "loss_ce": 0.8134817481040955, |
| "loss_region": 0.06000108644366264, |
| "loss_total": 0.8734828233718872, |
| "lr": 0.0010188982167613548, |
| "router/selected_tokens_s0": 7408.875, |
| "router/selected_tokens_s1": 4308.625, |
| "step": 5560, |
| "tokens_trained": 18.214852704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5798170342528899, |
| "grad_norm": 0.14704060554504395, |
| "loss": 1.1105, |
| "loss_ce": 1.1164242029190063, |
| "loss_region": 0.05999663099646568, |
| "loss_total": 1.17642080783844, |
| "lr": 0.0010184913247775038, |
| "router/selected_tokens_s0": 7460.25, |
| "router/selected_tokens_s1": 4319.5, |
| "step": 5570, |
| "tokens_trained": 18.247618144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5826537125026594, |
| "grad_norm": 0.20581626892089844, |
| "loss": 1.1033, |
| "loss_ce": 0.9779660701751709, |
| "loss_region": 0.06000097095966339, |
| "loss_total": 1.0379670858383179, |
| "lr": 0.0010180844327936527, |
| "router/selected_tokens_s0": 7422.875, |
| "router/selected_tokens_s1": 4289.0, |
| "step": 5580, |
| "tokens_trained": 18.280383584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.585490390752429, |
| "grad_norm": 0.14134176075458527, |
| "loss": 1.0991, |
| "loss_ce": 0.9097084999084473, |
| "loss_region": 0.05999951437115669, |
| "loss_total": 0.9697080254554749, |
| "lr": 0.0010176775408098017, |
| "router/selected_tokens_s0": 7425.875, |
| "router/selected_tokens_s1": 4281.125, |
| "step": 5590, |
| "tokens_trained": 18.313149024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5883270690021984, |
| "grad_norm": 0.19416779279708862, |
| "loss": 1.099, |
| "loss_ce": 1.0267307758331299, |
| "loss_region": 0.06000243127346039, |
| "loss_total": 1.0867332220077515, |
| "lr": 0.0010172706488259507, |
| "router/selected_tokens_s0": 7437.625, |
| "router/selected_tokens_s1": 4288.375, |
| "step": 5600, |
| "tokens_trained": 18.345914464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.591163747251968, |
| "grad_norm": 0.28280109167099, |
| "loss": 1.1031, |
| "loss_ce": 1.0410712957382202, |
| "loss_region": 0.06000087037682533, |
| "loss_total": 1.1010721921920776, |
| "lr": 0.0010168637568420996, |
| "router/selected_tokens_s0": 7408.0, |
| "router/selected_tokens_s1": 4275.625, |
| "step": 5610, |
| "tokens_trained": 18.378679104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5940004255017375, |
| "grad_norm": 0.26848140358924866, |
| "loss": 1.0972, |
| "loss_ce": 0.9527778029441833, |
| "loss_region": 0.059995293617248535, |
| "loss_total": 1.012773036956787, |
| "lr": 0.0010164568648582486, |
| "router/selected_tokens_s0": 7462.0, |
| "router/selected_tokens_s1": 4331.25, |
| "step": 5620, |
| "tokens_trained": 18.411444544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.596837103751507, |
| "grad_norm": 0.22628232836723328, |
| "loss": 1.1009, |
| "loss_ce": 1.0201693773269653, |
| "loss_region": 0.060002438724040985, |
| "loss_total": 1.080171823501587, |
| "lr": 0.0010160499728743976, |
| "router/selected_tokens_s0": 7481.0, |
| "router/selected_tokens_s1": 4304.5, |
| "step": 5630, |
| "tokens_trained": 18.444209984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5996737820012765, |
| "grad_norm": 0.260987251996994, |
| "loss": 1.0964, |
| "loss_ce": 0.984906017780304, |
| "loss_region": 0.06002039834856987, |
| "loss_total": 1.044926404953003, |
| "lr": 0.0010156430808905465, |
| "router/selected_tokens_s0": 7382.0, |
| "router/selected_tokens_s1": 4233.75, |
| "step": 5640, |
| "tokens_trained": 18.476975424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.602510460251046, |
| "grad_norm": 0.26559290289878845, |
| "loss": 1.1014, |
| "loss_ce": 1.1597309112548828, |
| "loss_region": 0.06001560389995575, |
| "loss_total": 1.219746470451355, |
| "lr": 0.0010152361889066955, |
| "router/selected_tokens_s0": 7370.0, |
| "router/selected_tokens_s1": 4235.625, |
| "step": 5650, |
| "tokens_trained": 18.509739264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6053471385008156, |
| "grad_norm": 0.20504187047481537, |
| "loss": 1.1017, |
| "loss_ce": 0.9569210410118103, |
| "loss_region": 0.05999953672289848, |
| "loss_total": 1.016920566558838, |
| "lr": 0.0010148292969228445, |
| "router/selected_tokens_s0": 7356.125, |
| "router/selected_tokens_s1": 4291.5, |
| "step": 5660, |
| "tokens_trained": 18.542503904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.608183816750585, |
| "grad_norm": 0.150313138961792, |
| "loss": 1.0987, |
| "loss_ce": 1.0686254501342773, |
| "loss_region": 0.059998732060194016, |
| "loss_total": 1.1286242008209229, |
| "lr": 0.0010144224049389934, |
| "router/selected_tokens_s0": 7439.125, |
| "router/selected_tokens_s1": 4303.25, |
| "step": 5670, |
| "tokens_trained": 18.575269344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6110204950003546, |
| "grad_norm": 0.11161628365516663, |
| "loss": 1.0954, |
| "loss_ce": 0.9945487976074219, |
| "loss_region": 0.06000113487243652, |
| "loss_total": 1.0545499324798584, |
| "lr": 0.0010140155129551424, |
| "router/selected_tokens_s0": 7482.25, |
| "router/selected_tokens_s1": 4331.75, |
| "step": 5680, |
| "tokens_trained": 18.608031832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6138571732501241, |
| "grad_norm": 0.17794479429721832, |
| "loss": 1.0983, |
| "loss_ce": 1.112082600593567, |
| "loss_region": 0.05999523028731346, |
| "loss_total": 1.1720777750015259, |
| "lr": 0.0010136086209712914, |
| "router/selected_tokens_s0": 7418.125, |
| "router/selected_tokens_s1": 4316.625, |
| "step": 5690, |
| "tokens_trained": 18.640797272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6166938514998936, |
| "grad_norm": 0.16560673713684082, |
| "loss": 1.1002, |
| "loss_ce": 0.9786914587020874, |
| "loss_region": 0.06000278890132904, |
| "loss_total": 1.0386942625045776, |
| "lr": 0.0010132017289874405, |
| "router/selected_tokens_s0": 7344.625, |
| "router/selected_tokens_s1": 4255.5, |
| "step": 5700, |
| "tokens_trained": 18.673562712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6195305297496632, |
| "grad_norm": 0.10915427654981613, |
| "loss": 1.0996, |
| "loss_ce": 1.0432522296905518, |
| "loss_region": 0.06000416725873947, |
| "loss_total": 1.103256344795227, |
| "lr": 0.0010127948370035895, |
| "router/selected_tokens_s0": 7388.625, |
| "router/selected_tokens_s1": 4249.625, |
| "step": 5710, |
| "tokens_trained": 18.706328152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6223672079994327, |
| "grad_norm": 0.16476505994796753, |
| "loss": 1.104, |
| "loss_ce": 1.1305444240570068, |
| "loss_region": 0.05999914929270744, |
| "loss_total": 1.190543532371521, |
| "lr": 0.0010123879450197385, |
| "router/selected_tokens_s0": 7396.75, |
| "router/selected_tokens_s1": 4338.125, |
| "step": 5720, |
| "tokens_trained": 18.739093568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6252038862492022, |
| "grad_norm": 0.10708323866128922, |
| "loss": 1.1039, |
| "loss_ce": 1.016690731048584, |
| "loss_region": 0.05999882519245148, |
| "loss_total": 1.076689600944519, |
| "lr": 0.0010119810530358872, |
| "router/selected_tokens_s0": 7452.875, |
| "router/selected_tokens_s1": 4316.375, |
| "step": 5730, |
| "tokens_trained": 18.771858208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6280405644989717, |
| "grad_norm": 0.21779781579971313, |
| "loss": 1.1069, |
| "loss_ce": 1.1287535429000854, |
| "loss_region": 0.05999905243515968, |
| "loss_total": 1.1887526512145996, |
| "lr": 0.0010115741610520362, |
| "router/selected_tokens_s0": 7480.125, |
| "router/selected_tokens_s1": 4319.5, |
| "step": 5740, |
| "tokens_trained": 18.804619848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6308772427487412, |
| "grad_norm": 0.23356978595256805, |
| "loss": 1.0974, |
| "loss_ce": 1.1192667484283447, |
| "loss_region": 0.06000344455242157, |
| "loss_total": 1.1792701482772827, |
| "lr": 0.0010111672690681851, |
| "router/selected_tokens_s0": 7453.625, |
| "router/selected_tokens_s1": 4299.375, |
| "step": 5750, |
| "tokens_trained": 18.837385288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6337139209985108, |
| "grad_norm": 0.06789640337228775, |
| "loss": 1.0989, |
| "loss_ce": 1.0962039232254028, |
| "loss_region": 0.06000284478068352, |
| "loss_total": 1.156206727027893, |
| "lr": 0.0010107603770843341, |
| "router/selected_tokens_s0": 7391.125, |
| "router/selected_tokens_s1": 4261.5, |
| "step": 5760, |
| "tokens_trained": 18.870150648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6365505992482803, |
| "grad_norm": 0.1432223916053772, |
| "loss": 1.0971, |
| "loss_ce": 1.0244016647338867, |
| "loss_region": 0.060005053877830505, |
| "loss_total": 1.0844067335128784, |
| "lr": 0.0010103534851004833, |
| "router/selected_tokens_s0": 7423.875, |
| "router/selected_tokens_s1": 4263.25, |
| "step": 5770, |
| "tokens_trained": 18.902916088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6393872774980498, |
| "grad_norm": 0.26096969842910767, |
| "loss": 1.1026, |
| "loss_ce": 1.0609381198883057, |
| "loss_region": 0.0600053071975708, |
| "loss_total": 1.1209434270858765, |
| "lr": 0.0010099465931166323, |
| "router/selected_tokens_s0": 7478.75, |
| "router/selected_tokens_s1": 4297.625, |
| "step": 5780, |
| "tokens_trained": 18.935681528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6422239557478193, |
| "grad_norm": 0.12078365683555603, |
| "loss": 1.0929, |
| "loss_ce": 0.9542346000671387, |
| "loss_region": 0.06000049039721489, |
| "loss_total": 1.0142351388931274, |
| "lr": 0.0010095397011327812, |
| "router/selected_tokens_s0": 7443.0, |
| "router/selected_tokens_s1": 4298.875, |
| "step": 5790, |
| "tokens_trained": 18.968446968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6450606339975888, |
| "grad_norm": 0.19275307655334473, |
| "loss": 1.0965, |
| "loss_ce": 0.9804484248161316, |
| "loss_region": 0.060001272708177567, |
| "loss_total": 1.0404497385025024, |
| "lr": 0.0010091328091489302, |
| "router/selected_tokens_s0": 7427.5, |
| "router/selected_tokens_s1": 4289.875, |
| "step": 5800, |
| "tokens_trained": 19.001210808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6478973122473584, |
| "grad_norm": 0.18818101286888123, |
| "loss": 1.0954, |
| "loss_ce": 1.0286747217178345, |
| "loss_region": 0.060000162571668625, |
| "loss_total": 1.0886749029159546, |
| "lr": 0.0010087259171650792, |
| "router/selected_tokens_s0": 7470.625, |
| "router/selected_tokens_s1": 4308.375, |
| "step": 5810, |
| "tokens_trained": 19.033976248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6507339904971279, |
| "grad_norm": 0.10521817207336426, |
| "loss": 1.0971, |
| "loss_ce": 1.0690152645111084, |
| "loss_region": 0.059997450560331345, |
| "loss_total": 1.1290127038955688, |
| "lr": 0.0010083190251812281, |
| "router/selected_tokens_s0": 7459.625, |
| "router/selected_tokens_s1": 4354.125, |
| "step": 5820, |
| "tokens_trained": 19.066741688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6535706687468974, |
| "grad_norm": 0.14187674224376678, |
| "loss": 1.0965, |
| "loss_ce": 1.1216542720794678, |
| "loss_region": 0.059998299926519394, |
| "loss_total": 1.181652545928955, |
| "lr": 0.001007912133197377, |
| "router/selected_tokens_s0": 7462.75, |
| "router/selected_tokens_s1": 4328.25, |
| "step": 5830, |
| "tokens_trained": 19.099507128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.656407346996667, |
| "grad_norm": 0.22437113523483276, |
| "loss": 1.0949, |
| "loss_ce": 1.0522420406341553, |
| "loss_region": 0.06000174209475517, |
| "loss_total": 1.1122437715530396, |
| "lr": 0.001007505241213526, |
| "router/selected_tokens_s0": 7470.0, |
| "router/selected_tokens_s1": 4301.625, |
| "step": 5840, |
| "tokens_trained": 19.132272568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6592440252464364, |
| "grad_norm": 0.16007539629936218, |
| "loss": 1.0967, |
| "loss_ce": 1.0500563383102417, |
| "loss_region": 0.060007136315107346, |
| "loss_total": 1.1100634336471558, |
| "lr": 0.001007098349229675, |
| "router/selected_tokens_s0": 7476.625, |
| "router/selected_tokens_s1": 4285.75, |
| "step": 5850, |
| "tokens_trained": 19.165038008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.662080703496206, |
| "grad_norm": 0.3188250958919525, |
| "loss": 1.0938, |
| "loss_ce": 0.9961692690849304, |
| "loss_region": 0.06000681594014168, |
| "loss_total": 1.0561760663986206, |
| "lr": 0.001006691457245824, |
| "router/selected_tokens_s0": 7489.375, |
| "router/selected_tokens_s1": 4298.25, |
| "step": 5860, |
| "tokens_trained": 19.197803448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6649173817459755, |
| "grad_norm": 0.12829625606536865, |
| "loss": 1.0978, |
| "loss_ce": 1.124149203300476, |
| "loss_region": 0.060012202709913254, |
| "loss_total": 1.1841614246368408, |
| "lr": 0.001006284565261973, |
| "router/selected_tokens_s0": 7500.125, |
| "router/selected_tokens_s1": 4290.875, |
| "step": 5870, |
| "tokens_trained": 19.230568888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.667754059995745, |
| "grad_norm": 0.12101241201162338, |
| "loss": 1.0936, |
| "loss_ce": 1.0956649780273438, |
| "loss_region": 0.06001261621713638, |
| "loss_total": 1.1556775569915771, |
| "lr": 0.001005877673278122, |
| "router/selected_tokens_s0": 7488.875, |
| "router/selected_tokens_s1": 4266.125, |
| "step": 5880, |
| "tokens_trained": 19.263333528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6705907382455145, |
| "grad_norm": 0.17701566219329834, |
| "loss": 1.0922, |
| "loss_ce": 1.0158523321151733, |
| "loss_region": 0.060010313987731934, |
| "loss_total": 1.0758626461029053, |
| "lr": 0.0010054707812942709, |
| "router/selected_tokens_s0": 7456.25, |
| "router/selected_tokens_s1": 4267.875, |
| "step": 5890, |
| "tokens_trained": 19.296098168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.673427416495284, |
| "grad_norm": 0.19043512642383575, |
| "loss": 1.0986, |
| "loss_ce": 1.003629207611084, |
| "loss_region": 0.060002945363521576, |
| "loss_total": 1.0636321306228638, |
| "lr": 0.0010050638893104198, |
| "router/selected_tokens_s0": 7441.375, |
| "router/selected_tokens_s1": 4296.5, |
| "step": 5900, |
| "tokens_trained": 19.328862688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6762640947450536, |
| "grad_norm": 0.17591844499111176, |
| "loss": 1.0986, |
| "loss_ce": 1.1180049180984497, |
| "loss_region": 0.059997666627168655, |
| "loss_total": 1.1780025959014893, |
| "lr": 0.0010046569973265688, |
| "router/selected_tokens_s0": 7446.375, |
| "router/selected_tokens_s1": 4344.25, |
| "step": 5910, |
| "tokens_trained": 19.361627328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.679100772994823, |
| "grad_norm": 0.1346396952867508, |
| "loss": 1.0925, |
| "loss_ce": 0.998043954372406, |
| "loss_region": 0.05999904125928879, |
| "loss_total": 1.0580430030822754, |
| "lr": 0.0010042501053427178, |
| "router/selected_tokens_s0": 7426.125, |
| "router/selected_tokens_s1": 4302.25, |
| "step": 5920, |
| "tokens_trained": 19.394392768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6819374512445926, |
| "grad_norm": 0.17575864493846893, |
| "loss": 1.0988, |
| "loss_ce": 1.1197152137756348, |
| "loss_region": 0.060000572353601456, |
| "loss_total": 1.1797157526016235, |
| "lr": 0.0010038432133588667, |
| "router/selected_tokens_s0": 7466.625, |
| "router/selected_tokens_s1": 4301.625, |
| "step": 5930, |
| "tokens_trained": 19.427158208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6847741294943621, |
| "grad_norm": 0.2070358544588089, |
| "loss": 1.0926, |
| "loss_ce": 1.0191881656646729, |
| "loss_region": 0.06000307574868202, |
| "loss_total": 1.0791912078857422, |
| "lr": 0.0010034363213750157, |
| "router/selected_tokens_s0": 7439.0, |
| "router/selected_tokens_s1": 4286.375, |
| "step": 5940, |
| "tokens_trained": 19.459922848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6876108077441316, |
| "grad_norm": 0.09869208186864853, |
| "loss": 1.0965, |
| "loss_ce": 1.076581358909607, |
| "loss_region": 0.060005977749824524, |
| "loss_total": 1.136587381362915, |
| "lr": 0.0010030294293911649, |
| "router/selected_tokens_s0": 7388.0, |
| "router/selected_tokens_s1": 4243.875, |
| "step": 5950, |
| "tokens_trained": 19.492688288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6904474859939012, |
| "grad_norm": 0.13199345767498016, |
| "loss": 1.0979, |
| "loss_ce": 1.0537714958190918, |
| "loss_region": 0.06000440567731857, |
| "loss_total": 1.1137758493423462, |
| "lr": 0.0010026225374073139, |
| "router/selected_tokens_s0": 7365.375, |
| "router/selected_tokens_s1": 4246.0, |
| "step": 5960, |
| "tokens_trained": 19.525453728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6932841642436707, |
| "grad_norm": 0.09942718595266342, |
| "loss": 1.0993, |
| "loss_ce": 0.9943310618400574, |
| "loss_region": 0.05999838560819626, |
| "loss_total": 1.0543293952941895, |
| "lr": 0.0010022156454234628, |
| "router/selected_tokens_s0": 7437.0, |
| "router/selected_tokens_s1": 4312.875, |
| "step": 5970, |
| "tokens_trained": 19.558219168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6961208424934402, |
| "grad_norm": 0.1292436569929123, |
| "loss": 1.0892, |
| "loss_ce": 1.0568798780441284, |
| "loss_region": 0.05999910086393356, |
| "loss_total": 1.1168789863586426, |
| "lr": 0.0010018087534396116, |
| "router/selected_tokens_s0": 7451.625, |
| "router/selected_tokens_s1": 4347.25, |
| "step": 5980, |
| "tokens_trained": 19.590983808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6989575207432097, |
| "grad_norm": 0.12750542163848877, |
| "loss": 1.0923, |
| "loss_ce": 1.069689393043518, |
| "loss_region": 0.059999238699674606, |
| "loss_total": 1.1296886205673218, |
| "lr": 0.0010014018614557605, |
| "router/selected_tokens_s0": 7408.125, |
| "router/selected_tokens_s1": 4289.875, |
| "step": 5990, |
| "tokens_trained": 19.623749248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7017941989929792, |
| "grad_norm": 0.14449970424175262, |
| "loss": 1.0972, |
| "loss_ce": 1.070797324180603, |
| "loss_region": 0.06000017747282982, |
| "loss_total": 1.1307975053787231, |
| "lr": 0.0010009949694719095, |
| "router/selected_tokens_s0": 7418.375, |
| "router/selected_tokens_s1": 4301.5, |
| "step": 6000, |
| "tokens_trained": 19.65651468 |
| }, |
| { |
| "epoch": 1.7017941989929792, |
| "eval_ppl": 2.8376622214251768, |
| "eval_runtime": 1.0214, |
| "step": 6000, |
| "tokens_trained": 19.65651468 |
| }, |
| { |
| "epoch": 1.7017941989929792, |
| "eval_F": 0.33497148214704275, |
| "eval_F_cds": 0.33165299482150223, |
| "eval_F_dig": 0.31479021798578016, |
| "eval_F_exon": 0.33605904306621376, |
| "eval_F_intron": 0.3353029212739146, |
| "eval_F_nig": 0.3358393120643159, |
| "eval_F_promoter": 0.333664595114122, |
| "eval_F_utr": 0.33729289597773715, |
| "eval_G": 0.3311586550239114, |
| "eval_G_cds": 0.3296152860203474, |
| "eval_G_dig": 0.29498825591843114, |
| "eval_G_exon": 0.3339356431102445, |
| "eval_G_intron": 0.3316809731709803, |
| "eval_G_nig": 0.3319550251551926, |
| "eval_G_promoter": 0.3294627592210916, |
| "eval_G_utr": 0.33407873737406274, |
| "eval_avg_bp_per_token": 2.9853287616915076, |
| "eval_bp_per_token/cds": 3.015199668370872, |
| "eval_bp_per_token/dig": 3.1767187887813355, |
| "eval_bp_per_token/exon": 2.975667581731969, |
| "eval_bp_per_token/intron": 2.982377833753149, |
| "eval_bp_per_token/nig": 2.977614484299837, |
| "eval_bp_per_token/promoter": 2.9970216038593303, |
| "eval_bp_per_token/utr": 2.9647822765469822, |
| "eval_ppl_cds": 3.545879467798099, |
| "eval_ppl_dig": 1.1176450122381292, |
| "eval_ppl_exon": 3.264747422456715, |
| "eval_ppl_intron": 2.9216169428282455, |
| "eval_ppl_nig": 2.628432349810158, |
| "eval_ppl_promoter": 3.22240321944314, |
| "eval_ppl_utr": 3.223965449989413, |
| "step": 6000, |
| "tokens_trained": 19.65651468 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7046308772427488, |
| "grad_norm": 0.34348469972610474, |
| "loss": 1.0964, |
| "loss_ce": 1.1062127351760864, |
| "loss_region": 0.05999796837568283, |
| "loss_total": 1.166210651397705, |
| "lr": 0.0010005880774880585, |
| "router/selected_tokens_s0": 7427.375, |
| "router/selected_tokens_s1": 4319.5, |
| "step": 6010, |
| "tokens_trained": 19.68928012 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7074675554925183, |
| "grad_norm": 0.10299943387508392, |
| "loss": 1.0924, |
| "loss_ce": 1.0347900390625, |
| "loss_region": 0.059998273849487305, |
| "loss_total": 1.0947883129119873, |
| "lr": 0.0010001811855042076, |
| "router/selected_tokens_s0": 7407.0, |
| "router/selected_tokens_s1": 4302.875, |
| "step": 6020, |
| "tokens_trained": 19.722045536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7103042337422878, |
| "grad_norm": 0.08903520554304123, |
| "loss": 1.0907, |
| "loss_ce": 1.0642378330230713, |
| "loss_region": 0.06000013276934624, |
| "loss_total": 1.1242380142211914, |
| "lr": 0.0009997742935203566, |
| "router/selected_tokens_s0": 7402.625, |
| "router/selected_tokens_s1": 4288.0, |
| "step": 6030, |
| "tokens_trained": 19.754810976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7131409119920573, |
| "grad_norm": 0.15829990804195404, |
| "loss": 1.0956, |
| "loss_ce": 1.1073267459869385, |
| "loss_region": 0.059999532997608185, |
| "loss_total": 1.1673263311386108, |
| "lr": 0.0009993674015365056, |
| "router/selected_tokens_s0": 7419.875, |
| "router/selected_tokens_s1": 4291.25, |
| "step": 6040, |
| "tokens_trained": 19.787576416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7159775902418268, |
| "grad_norm": 0.19429846107959747, |
| "loss": 1.0899, |
| "loss_ce": 0.995491623878479, |
| "loss_region": 0.06000322476029396, |
| "loss_total": 1.0554949045181274, |
| "lr": 0.0009989605095526545, |
| "router/selected_tokens_s0": 7405.25, |
| "router/selected_tokens_s1": 4325.25, |
| "step": 6050, |
| "tokens_trained": 19.820338544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7188142684915964, |
| "grad_norm": 0.06903360784053802, |
| "loss": 1.0944, |
| "loss_ce": 1.1205158233642578, |
| "loss_region": 0.06000121310353279, |
| "loss_total": 1.1805170774459839, |
| "lr": 0.0009985536175688035, |
| "router/selected_tokens_s0": 7457.0, |
| "router/selected_tokens_s1": 4315.125, |
| "step": 6060, |
| "tokens_trained": 19.853099816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7216509467413659, |
| "grad_norm": 0.25323474407196045, |
| "loss": 1.0937, |
| "loss_ce": 1.0416542291641235, |
| "loss_region": 0.0600033737719059, |
| "loss_total": 1.1016576290130615, |
| "lr": 0.0009981467255849525, |
| "router/selected_tokens_s0": 7445.375, |
| "router/selected_tokens_s1": 4345.5, |
| "step": 6070, |
| "tokens_trained": 19.885862632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7244876249911354, |
| "grad_norm": 0.137556791305542, |
| "loss": 1.0979, |
| "loss_ce": 0.9344243407249451, |
| "loss_region": 0.06000184267759323, |
| "loss_total": 0.9944261908531189, |
| "lr": 0.0009977398336011014, |
| "router/selected_tokens_s0": 7469.125, |
| "router/selected_tokens_s1": 4310.875, |
| "step": 6080, |
| "tokens_trained": 19.918628072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.727324303240905, |
| "grad_norm": 0.20417067408561707, |
| "loss": 1.0935, |
| "loss_ce": 1.066884160041809, |
| "loss_region": 0.06000082194805145, |
| "loss_total": 1.126884937286377, |
| "lr": 0.0009973329416172504, |
| "router/selected_tokens_s0": 7440.375, |
| "router/selected_tokens_s1": 4329.125, |
| "step": 6090, |
| "tokens_trained": 19.951393512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7301609814906744, |
| "grad_norm": 0.14983755350112915, |
| "loss": 1.0902, |
| "loss_ce": 1.0103222131729126, |
| "loss_region": 0.06000688672065735, |
| "loss_total": 1.0703290700912476, |
| "lr": 0.0009969260496333994, |
| "router/selected_tokens_s0": 7381.125, |
| "router/selected_tokens_s1": 4263.5, |
| "step": 6100, |
| "tokens_trained": 19.984154824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.732997659740444, |
| "grad_norm": 0.13496913015842438, |
| "loss": 1.0922, |
| "loss_ce": 1.0360006093978882, |
| "loss_region": 0.06000128760933876, |
| "loss_total": 1.0960018634796143, |
| "lr": 0.0009965191576495483, |
| "router/selected_tokens_s0": 7387.75, |
| "router/selected_tokens_s1": 4317.625, |
| "step": 6110, |
| "tokens_trained": 20.016919464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7358343379902135, |
| "grad_norm": 0.10311873257160187, |
| "loss": 1.085, |
| "loss_ce": 1.073644995689392, |
| "loss_region": 0.0599980466067791, |
| "loss_total": 1.1336430311203003, |
| "lr": 0.0009961122656656973, |
| "router/selected_tokens_s0": 7395.875, |
| "router/selected_tokens_s1": 4310.0, |
| "step": 6120, |
| "tokens_trained": 20.049684904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.738671016239983, |
| "grad_norm": 0.17320473492145538, |
| "loss": 1.0961, |
| "loss_ce": 1.0533865690231323, |
| "loss_region": 0.060003116726875305, |
| "loss_total": 1.1133897304534912, |
| "lr": 0.0009957053736818463, |
| "router/selected_tokens_s0": 7374.125, |
| "router/selected_tokens_s1": 4297.125, |
| "step": 6130, |
| "tokens_trained": 20.082450344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7415076944897525, |
| "grad_norm": 0.25150588154792786, |
| "loss": 1.091, |
| "loss_ce": 1.1272531747817993, |
| "loss_region": 0.06000062823295593, |
| "loss_total": 1.1872538328170776, |
| "lr": 0.0009952984816979952, |
| "router/selected_tokens_s0": 7418.125, |
| "router/selected_tokens_s1": 4277.875, |
| "step": 6140, |
| "tokens_trained": 20.115215784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.744344372739522, |
| "grad_norm": 0.14748281240463257, |
| "loss": 1.0968, |
| "loss_ce": 1.0051281452178955, |
| "loss_region": 0.0599982924759388, |
| "loss_total": 1.0651264190673828, |
| "lr": 0.0009948915897141442, |
| "router/selected_tokens_s0": 7441.25, |
| "router/selected_tokens_s1": 4307.0, |
| "step": 6150, |
| "tokens_trained": 20.147981224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7471810509892916, |
| "grad_norm": 0.23062776029109955, |
| "loss": 1.0922, |
| "loss_ce": 0.9553651213645935, |
| "loss_region": 0.06000138446688652, |
| "loss_total": 1.015366554260254, |
| "lr": 0.0009944846977302932, |
| "router/selected_tokens_s0": 7438.125, |
| "router/selected_tokens_s1": 4299.0, |
| "step": 6160, |
| "tokens_trained": 20.180746664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.750017729239061, |
| "grad_norm": 0.12702351808547974, |
| "loss": 1.0923, |
| "loss_ce": 0.978821873664856, |
| "loss_region": 0.060008883476257324, |
| "loss_total": 1.0388307571411133, |
| "lr": 0.0009940778057464421, |
| "router/selected_tokens_s0": 7447.75, |
| "router/selected_tokens_s1": 4285.75, |
| "step": 6170, |
| "tokens_trained": 20.213511184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7528544074888306, |
| "grad_norm": 0.16335810720920563, |
| "loss": 1.0928, |
| "loss_ce": 0.9987698793411255, |
| "loss_region": 0.06001174449920654, |
| "loss_total": 1.058781623840332, |
| "lr": 0.000993670913762591, |
| "router/selected_tokens_s0": 7450.625, |
| "router/selected_tokens_s1": 4250.875, |
| "step": 6180, |
| "tokens_trained": 20.246275824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7556910857386, |
| "grad_norm": 0.13845063745975494, |
| "loss": 1.0897, |
| "loss_ce": 1.0286316871643066, |
| "loss_region": 0.060003817081451416, |
| "loss_total": 1.0886354446411133, |
| "lr": 0.00099326402177874, |
| "router/selected_tokens_s0": 7433.75, |
| "router/selected_tokens_s1": 4268.75, |
| "step": 6190, |
| "tokens_trained": 20.279039664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7585277639883696, |
| "grad_norm": 0.12126460671424866, |
| "loss": 1.0927, |
| "loss_ce": 0.813042163848877, |
| "loss_region": 0.060008224099874496, |
| "loss_total": 0.8730503916740417, |
| "lr": 0.0009928571297948892, |
| "router/selected_tokens_s0": 7334.0, |
| "router/selected_tokens_s1": 4220.125, |
| "step": 6200, |
| "tokens_trained": 20.311804304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7613644422381391, |
| "grad_norm": 0.2719525098800659, |
| "loss": 1.096, |
| "loss_ce": 0.9927342534065247, |
| "loss_region": 0.06000946834683418, |
| "loss_total": 1.052743673324585, |
| "lr": 0.0009924502378110382, |
| "router/selected_tokens_s0": 7441.5, |
| "router/selected_tokens_s1": 4231.375, |
| "step": 6210, |
| "tokens_trained": 20.344569744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7642011204879087, |
| "grad_norm": 0.11398852616548538, |
| "loss": 1.0905, |
| "loss_ce": 1.0965769290924072, |
| "loss_region": 0.06000201404094696, |
| "loss_total": 1.1565788984298706, |
| "lr": 0.0009920433458271872, |
| "router/selected_tokens_s0": 7465.75, |
| "router/selected_tokens_s1": 4339.5, |
| "step": 6220, |
| "tokens_trained": 20.377335184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7670377987376782, |
| "grad_norm": 0.09571532905101776, |
| "loss": 1.087, |
| "loss_ce": 1.0089653730392456, |
| "loss_region": 0.060008108615875244, |
| "loss_total": 1.0689735412597656, |
| "lr": 0.000991636453843336, |
| "router/selected_tokens_s0": 7477.5, |
| "router/selected_tokens_s1": 4277.75, |
| "step": 6230, |
| "tokens_trained": 20.410099824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7698744769874477, |
| "grad_norm": 0.16976556181907654, |
| "loss": 1.0848, |
| "loss_ce": 0.9082801938056946, |
| "loss_region": 0.06001298874616623, |
| "loss_total": 0.9682931900024414, |
| "lr": 0.0009912295618594849, |
| "router/selected_tokens_s0": 7406.5, |
| "router/selected_tokens_s1": 4263.375, |
| "step": 6240, |
| "tokens_trained": 20.442865264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7727111552372172, |
| "grad_norm": 0.12344443053007126, |
| "loss": 1.0826, |
| "loss_ce": 1.0324137210845947, |
| "loss_region": 0.05999824032187462, |
| "loss_total": 1.092411994934082, |
| "lr": 0.0009908226698756338, |
| "router/selected_tokens_s0": 7416.75, |
| "router/selected_tokens_s1": 4288.875, |
| "step": 6250, |
| "tokens_trained": 20.475630704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7755478334869867, |
| "grad_norm": 0.1562611311674118, |
| "loss": 1.0947, |
| "loss_ce": 0.9874376058578491, |
| "loss_region": 0.060002829879522324, |
| "loss_total": 1.0474404096603394, |
| "lr": 0.0009904157778917828, |
| "router/selected_tokens_s0": 7420.375, |
| "router/selected_tokens_s1": 4293.25, |
| "step": 6260, |
| "tokens_trained": 20.508396144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7783845117367563, |
| "grad_norm": 0.12798430025577545, |
| "loss": 1.0816, |
| "loss_ce": 1.040648341178894, |
| "loss_region": 0.060009755194187164, |
| "loss_total": 1.1006580591201782, |
| "lr": 0.000990008885907932, |
| "router/selected_tokens_s0": 7448.75, |
| "router/selected_tokens_s1": 4249.125, |
| "step": 6270, |
| "tokens_trained": 20.541161584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7812211899865258, |
| "grad_norm": 0.18397857248783112, |
| "loss": 1.0944, |
| "loss_ce": 1.0447720289230347, |
| "loss_region": 0.059998903423547745, |
| "loss_total": 1.1047708988189697, |
| "lr": 0.000989601993924081, |
| "router/selected_tokens_s0": 7452.875, |
| "router/selected_tokens_s1": 4298.0, |
| "step": 6280, |
| "tokens_trained": 20.573927024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7840578682362953, |
| "grad_norm": 0.21940062940120697, |
| "loss": 1.0907, |
| "loss_ce": 1.0577622652053833, |
| "loss_region": 0.05999821051955223, |
| "loss_total": 1.117760419845581, |
| "lr": 0.00098919510194023, |
| "router/selected_tokens_s0": 7429.375, |
| "router/selected_tokens_s1": 4307.75, |
| "step": 6290, |
| "tokens_trained": 20.606692464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7868945464860648, |
| "grad_norm": 0.10880064219236374, |
| "loss": 1.0928, |
| "loss_ce": 1.0223802328109741, |
| "loss_region": 0.060006335377693176, |
| "loss_total": 1.0823866128921509, |
| "lr": 0.0009887882099563789, |
| "router/selected_tokens_s0": 7338.0, |
| "router/selected_tokens_s1": 4266.0, |
| "step": 6300, |
| "tokens_trained": 20.639457104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7897312247358343, |
| "grad_norm": 0.312143474817276, |
| "loss": 1.0911, |
| "loss_ce": 1.0233956575393677, |
| "loss_region": 0.05999905988574028, |
| "loss_total": 1.0833947658538818, |
| "lr": 0.0009883813179725278, |
| "router/selected_tokens_s0": 7434.375, |
| "router/selected_tokens_s1": 4351.375, |
| "step": 6310, |
| "tokens_trained": 20.672222544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7925679029856039, |
| "grad_norm": 0.10863707214593887, |
| "loss": 1.0938, |
| "loss_ce": 1.0155285596847534, |
| "loss_region": 0.06000586226582527, |
| "loss_total": 1.075534462928772, |
| "lr": 0.0009879744259886768, |
| "router/selected_tokens_s0": 7493.0, |
| "router/selected_tokens_s1": 4304.25, |
| "step": 6320, |
| "tokens_trained": 20.704987984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7954045812353734, |
| "grad_norm": 0.08422418683767319, |
| "loss": 1.0861, |
| "loss_ce": 1.0128514766693115, |
| "loss_region": 0.06000527739524841, |
| "loss_total": 1.0728567838668823, |
| "lr": 0.0009875675340048258, |
| "router/selected_tokens_s0": 7379.5, |
| "router/selected_tokens_s1": 4237.25, |
| "step": 6330, |
| "tokens_trained": 20.737753424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.798241259485143, |
| "grad_norm": 0.12360615283250809, |
| "loss": 1.0856, |
| "loss_ce": 1.1110481023788452, |
| "loss_region": 0.05999955162405968, |
| "loss_total": 1.1710476875305176, |
| "lr": 0.0009871606420209747, |
| "router/selected_tokens_s0": 7395.5, |
| "router/selected_tokens_s1": 4270.875, |
| "step": 6340, |
| "tokens_trained": 20.770518864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8010779377349124, |
| "grad_norm": 0.16334226727485657, |
| "loss": 1.0918, |
| "loss_ce": 0.9591702818870544, |
| "loss_region": 0.06000158563256264, |
| "loss_total": 1.0191718339920044, |
| "lr": 0.0009867537500371237, |
| "router/selected_tokens_s0": 7417.375, |
| "router/selected_tokens_s1": 4286.75, |
| "step": 6350, |
| "tokens_trained": 20.803284304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.803914615984682, |
| "grad_norm": 0.09888935089111328, |
| "loss": 1.087, |
| "loss_ce": 0.9803853631019592, |
| "loss_region": 0.059998247772455215, |
| "loss_total": 1.0403835773468018, |
| "lr": 0.0009863468580532727, |
| "router/selected_tokens_s0": 7420.0, |
| "router/selected_tokens_s1": 4307.25, |
| "step": 6360, |
| "tokens_trained": 20.836048936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8067512942344515, |
| "grad_norm": 0.12638556957244873, |
| "loss": 1.0933, |
| "loss_ce": 1.0328595638275146, |
| "loss_region": 0.05999911203980446, |
| "loss_total": 1.0928586721420288, |
| "lr": 0.0009859399660694216, |
| "router/selected_tokens_s0": 7449.875, |
| "router/selected_tokens_s1": 4304.125, |
| "step": 6370, |
| "tokens_trained": 20.868814376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.809587972484221, |
| "grad_norm": 0.08907432109117508, |
| "loss": 1.0935, |
| "loss_ce": 1.134492039680481, |
| "loss_region": 0.06000338867306709, |
| "loss_total": 1.194495439529419, |
| "lr": 0.0009855330740855706, |
| "router/selected_tokens_s0": 7486.375, |
| "router/selected_tokens_s1": 4320.375, |
| "step": 6380, |
| "tokens_trained": 20.901579816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8124246507339905, |
| "grad_norm": 0.24893128871917725, |
| "loss": 1.0895, |
| "loss_ce": 1.0298666954040527, |
| "loss_region": 0.059999607503414154, |
| "loss_total": 1.089866280555725, |
| "lr": 0.0009851261821017196, |
| "router/selected_tokens_s0": 7462.0, |
| "router/selected_tokens_s1": 4304.125, |
| "step": 6390, |
| "tokens_trained": 20.934345256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.81526132898376, |
| "grad_norm": 0.11238834261894226, |
| "loss": 1.0971, |
| "loss_ce": 1.033632516860962, |
| "loss_region": 0.060001298785209656, |
| "loss_total": 1.093633770942688, |
| "lr": 0.0009847192901178685, |
| "router/selected_tokens_s0": 7403.625, |
| "router/selected_tokens_s1": 4291.5, |
| "step": 6400, |
| "tokens_trained": 20.967109088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8180980072335295, |
| "grad_norm": 0.2275191992521286, |
| "loss": 1.0873, |
| "loss_ce": 1.0130294561386108, |
| "loss_region": 0.05999968945980072, |
| "loss_total": 1.0730291604995728, |
| "lr": 0.0009843123981340175, |
| "router/selected_tokens_s0": 7404.0, |
| "router/selected_tokens_s1": 4286.625, |
| "step": 6410, |
| "tokens_trained": 20.999874528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.820934685483299, |
| "grad_norm": 0.09575220942497253, |
| "loss": 1.0859, |
| "loss_ce": 1.015393853187561, |
| "loss_region": 0.05999882519245148, |
| "loss_total": 1.075392723083496, |
| "lr": 0.0009839055061501665, |
| "router/selected_tokens_s0": 7406.375, |
| "router/selected_tokens_s1": 4295.375, |
| "step": 6420, |
| "tokens_trained": 21.032639968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8237713637330686, |
| "grad_norm": 0.08018634468317032, |
| "loss": 1.0928, |
| "loss_ce": 0.9829267859458923, |
| "loss_region": 0.05999834090471268, |
| "loss_total": 1.0429251194000244, |
| "lr": 0.0009834986141663154, |
| "router/selected_tokens_s0": 7425.0, |
| "router/selected_tokens_s1": 4291.25, |
| "step": 6430, |
| "tokens_trained": 21.065405408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.826608041982838, |
| "grad_norm": 0.1278838813304901, |
| "loss": 1.0901, |
| "loss_ce": 1.0687657594680786, |
| "loss_region": 0.06000075116753578, |
| "loss_total": 1.1287665367126465, |
| "lr": 0.0009830917221824644, |
| "router/selected_tokens_s0": 7419.0, |
| "router/selected_tokens_s1": 4281.75, |
| "step": 6440, |
| "tokens_trained": 21.098170848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8294447202326076, |
| "grad_norm": 0.11121010035276413, |
| "loss": 1.0815, |
| "loss_ce": 1.0862224102020264, |
| "loss_region": 0.06000552698969841, |
| "loss_total": 1.1462279558181763, |
| "lr": 0.0009826848301986136, |
| "router/selected_tokens_s0": 7489.25, |
| "router/selected_tokens_s1": 4284.5, |
| "step": 6450, |
| "tokens_trained": 21.13093628 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8322813984823771, |
| "grad_norm": 0.16781003773212433, |
| "loss": 1.092, |
| "loss_ce": 1.0084702968597412, |
| "loss_region": 0.0600038543343544, |
| "loss_total": 1.0684741735458374, |
| "lr": 0.0009822779382147625, |
| "router/selected_tokens_s0": 7421.125, |
| "router/selected_tokens_s1": 4249.125, |
| "step": 6460, |
| "tokens_trained": 21.163697856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8351180767321467, |
| "grad_norm": 0.1009034812450409, |
| "loss": 1.0869, |
| "loss_ce": 0.990406334400177, |
| "loss_region": 0.059993598610162735, |
| "loss_total": 1.050399899482727, |
| "lr": 0.0009818710462309115, |
| "router/selected_tokens_s0": 7413.125, |
| "router/selected_tokens_s1": 4293.25, |
| "step": 6470, |
| "tokens_trained": 21.196463296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8379547549819162, |
| "grad_norm": 0.12884724140167236, |
| "loss": 1.0912, |
| "loss_ce": 1.0650509595870972, |
| "loss_region": 0.060001127421855927, |
| "loss_total": 1.1250520944595337, |
| "lr": 0.0009814641542470603, |
| "router/selected_tokens_s0": 7395.875, |
| "router/selected_tokens_s1": 4298.375, |
| "step": 6480, |
| "tokens_trained": 21.229228736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8407914332316857, |
| "grad_norm": 0.19979813694953918, |
| "loss": 1.0833, |
| "loss_ce": 1.0048813819885254, |
| "loss_region": 0.06000029295682907, |
| "loss_total": 1.064881682395935, |
| "lr": 0.0009810572622632092, |
| "router/selected_tokens_s0": 7402.125, |
| "router/selected_tokens_s1": 4293.0, |
| "step": 6490, |
| "tokens_trained": 21.261994176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8436281114814552, |
| "grad_norm": 0.16422773897647858, |
| "loss": 1.091, |
| "loss_ce": 1.0990355014801025, |
| "loss_region": 0.059999119490385056, |
| "loss_total": 1.1590346097946167, |
| "lr": 0.0009806503702793582, |
| "router/selected_tokens_s0": 7458.25, |
| "router/selected_tokens_s1": 4287.25, |
| "step": 6500, |
| "tokens_trained": 21.294759616 |
| }, |
| { |
| "epoch": 1.8436281114814552, |
| "eval_ppl": 2.8148460739009615, |
| "eval_runtime": 1.0215, |
| "step": 6500, |
| "tokens_trained": 21.294759616 |
| }, |
| { |
| "epoch": 1.8436281114814552, |
| "eval_F": 0.33702047034924604, |
| "eval_F_cds": 0.33144677145868656, |
| "eval_F_dig": 0.3183842487694351, |
| "eval_F_exon": 0.33819767685662766, |
| "eval_F_intron": 0.33763849896903503, |
| "eval_F_nig": 0.33872373174578946, |
| "eval_F_promoter": 0.333771905268462, |
| "eval_F_utr": 0.3387100930196604, |
| "eval_G": 0.33543778520904954, |
| "eval_G_cds": 0.3324244975453691, |
| "eval_G_dig": 0.30429180868622546, |
| "eval_G_exon": 0.3373760326246488, |
| "eval_G_intron": 0.3360761604092272, |
| "eval_G_nig": 0.33625805801952946, |
| "eval_G_promoter": 0.33342863306109216, |
| "eval_G_utr": 0.3377500124809967, |
| "eval_avg_bp_per_token": 2.9671788154699463, |
| "eval_bp_per_token/cds": 3.017075699965434, |
| "eval_bp_per_token/dig": 3.1408588957055215, |
| "eval_bp_per_token/exon": 2.956850588964662, |
| "eval_bp_per_token/intron": 2.961747558567693, |
| "eval_bp_per_token/nig": 2.9522584521786484, |
| "eval_bp_per_token/promoter": 2.996058039084123, |
| "eval_bp_per_token/utr": 2.9523773297831877, |
| "eval_ppl_cds": 3.428821805678024, |
| "eval_ppl_dig": 1.11498108266459, |
| "eval_ppl_exon": 3.261777228188115, |
| "eval_ppl_intron": 2.903715208272621, |
| "eval_ppl_nig": 2.597753309797117, |
| "eval_ppl_promoter": 3.1453687602347755, |
| "eval_ppl_utr": 3.148085691800301, |
| "step": 6500, |
| "tokens_trained": 21.294759616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8464647897312247, |
| "grad_norm": 0.0870455726981163, |
| "loss": 1.0815, |
| "loss_ce": 0.9556036591529846, |
| "loss_region": 0.060003068298101425, |
| "loss_total": 1.0156067609786987, |
| "lr": 0.0009802434782955071, |
| "router/selected_tokens_s0": 7424.0, |
| "router/selected_tokens_s1": 4282.875, |
| "step": 6510, |
| "tokens_trained": 21.327524256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8493014679809943, |
| "grad_norm": 0.17482812702655792, |
| "loss": 1.0878, |
| "loss_ce": 1.05632483959198, |
| "loss_region": 0.06000639125704765, |
| "loss_total": 1.1163312196731567, |
| "lr": 0.0009798365863116563, |
| "router/selected_tokens_s0": 7390.0, |
| "router/selected_tokens_s1": 4326.0, |
| "step": 6520, |
| "tokens_trained": 21.360289696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8521381462307638, |
| "grad_norm": 0.07318949699401855, |
| "loss": 1.0845, |
| "loss_ce": 1.0725730657577515, |
| "loss_region": 0.05999938026070595, |
| "loss_total": 1.1325724124908447, |
| "lr": 0.0009794296943278053, |
| "router/selected_tokens_s0": 7435.375, |
| "router/selected_tokens_s1": 4311.375, |
| "step": 6530, |
| "tokens_trained": 21.393055136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8549748244805333, |
| "grad_norm": 0.1223776638507843, |
| "loss": 1.0863, |
| "loss_ce": 1.1225980520248413, |
| "loss_region": 0.060001373291015625, |
| "loss_total": 1.182599425315857, |
| "lr": 0.0009790228023439543, |
| "router/selected_tokens_s0": 7443.875, |
| "router/selected_tokens_s1": 4267.875, |
| "step": 6540, |
| "tokens_trained": 21.425820576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8578115027303028, |
| "grad_norm": 0.14479322731494904, |
| "loss": 1.0845, |
| "loss_ce": 1.024285078048706, |
| "loss_region": 0.06000075861811638, |
| "loss_total": 1.084285855293274, |
| "lr": 0.0009786159103601032, |
| "router/selected_tokens_s0": 7439.625, |
| "router/selected_tokens_s1": 4291.375, |
| "step": 6550, |
| "tokens_trained": 21.458586016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8606481809800723, |
| "grad_norm": 0.1520773023366928, |
| "loss": 1.0901, |
| "loss_ce": 0.9158788919448853, |
| "loss_region": 0.06000827997922897, |
| "loss_total": 0.9758871793746948, |
| "lr": 0.0009782090183762522, |
| "router/selected_tokens_s0": 7440.5, |
| "router/selected_tokens_s1": 4345.0, |
| "step": 6560, |
| "tokens_trained": 21.491351456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8634848592298419, |
| "grad_norm": 0.12373050302267075, |
| "loss": 1.0838, |
| "loss_ce": 1.0457063913345337, |
| "loss_region": 0.059999339282512665, |
| "loss_total": 1.105705738067627, |
| "lr": 0.0009778021263924012, |
| "router/selected_tokens_s0": 7416.75, |
| "router/selected_tokens_s1": 4279.75, |
| "step": 6570, |
| "tokens_trained": 21.524116896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8663215374796114, |
| "grad_norm": 0.0876341238617897, |
| "loss": 1.0833, |
| "loss_ce": 0.98248291015625, |
| "loss_region": 0.06000464782118797, |
| "loss_total": 1.0424875020980835, |
| "lr": 0.0009773952344085501, |
| "router/selected_tokens_s0": 7340.375, |
| "router/selected_tokens_s1": 4247.375, |
| "step": 6580, |
| "tokens_trained": 21.556882336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.869158215729381, |
| "grad_norm": 0.10975274443626404, |
| "loss": 1.0856, |
| "loss_ce": 1.0620837211608887, |
| "loss_region": 0.06000051274895668, |
| "loss_total": 1.1220842599868774, |
| "lr": 0.000976988342424699, |
| "router/selected_tokens_s0": 7448.0, |
| "router/selected_tokens_s1": 4340.25, |
| "step": 6590, |
| "tokens_trained": 21.589647776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8719948939791504, |
| "grad_norm": 0.08602806180715561, |
| "loss": 1.0801, |
| "loss_ce": 0.9247351884841919, |
| "loss_region": 0.06001641973853111, |
| "loss_total": 0.9847515821456909, |
| "lr": 0.000976581450440848, |
| "router/selected_tokens_s0": 7384.875, |
| "router/selected_tokens_s1": 4269.75, |
| "step": 6600, |
| "tokens_trained": 21.622413216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.87483157222892, |
| "grad_norm": 0.1520455777645111, |
| "loss": 1.087, |
| "loss_ce": 0.9793323278427124, |
| "loss_region": 0.06001337245106697, |
| "loss_total": 1.0393457412719727, |
| "lr": 0.0009761745584569969, |
| "router/selected_tokens_s0": 7422.75, |
| "router/selected_tokens_s1": 4372.5, |
| "step": 6610, |
| "tokens_trained": 21.655178656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8776682504786895, |
| "grad_norm": 0.18168233335018158, |
| "loss": 1.0884, |
| "loss_ce": 0.9781649112701416, |
| "loss_region": 0.0600033737719059, |
| "loss_total": 1.0381683111190796, |
| "lr": 0.000975767666473146, |
| "router/selected_tokens_s0": 7411.75, |
| "router/selected_tokens_s1": 4281.125, |
| "step": 6620, |
| "tokens_trained": 21.687944096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.880504928728459, |
| "grad_norm": 0.08537963032722473, |
| "loss": 1.0892, |
| "loss_ce": 1.0330227613449097, |
| "loss_region": 0.05999727547168732, |
| "loss_total": 1.0930200815200806, |
| "lr": 0.000975360774489295, |
| "router/selected_tokens_s0": 7448.25, |
| "router/selected_tokens_s1": 4309.875, |
| "step": 6630, |
| "tokens_trained": 21.720707792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8833416069782285, |
| "grad_norm": 0.07447545975446701, |
| "loss": 1.0896, |
| "loss_ce": 1.0348316431045532, |
| "loss_region": 0.06000027805566788, |
| "loss_total": 1.094831943511963, |
| "lr": 0.0009749538825054439, |
| "router/selected_tokens_s0": 7475.875, |
| "router/selected_tokens_s1": 4340.75, |
| "step": 6640, |
| "tokens_trained": 21.753473232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.886178285227998, |
| "grad_norm": 0.052135758101940155, |
| "loss": 1.0821, |
| "loss_ce": 1.043008804321289, |
| "loss_region": 0.05999908968806267, |
| "loss_total": 1.1030079126358032, |
| "lr": 0.0009745469905215929, |
| "router/selected_tokens_s0": 7413.75, |
| "router/selected_tokens_s1": 4280.75, |
| "step": 6650, |
| "tokens_trained": 21.786238672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8890149634777675, |
| "grad_norm": 0.140451118350029, |
| "loss": 1.0833, |
| "loss_ce": 1.083349585533142, |
| "loss_region": 0.06000323221087456, |
| "loss_total": 1.1433528661727905, |
| "lr": 0.0009741400985377418, |
| "router/selected_tokens_s0": 7392.625, |
| "router/selected_tokens_s1": 4293.625, |
| "step": 6660, |
| "tokens_trained": 21.819004112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.891851641727537, |
| "grad_norm": 0.10239919275045395, |
| "loss": 1.0823, |
| "loss_ce": 0.984279453754425, |
| "loss_region": 0.05999943986535072, |
| "loss_total": 1.044278860092163, |
| "lr": 0.0009737332065538909, |
| "router/selected_tokens_s0": 7461.5, |
| "router/selected_tokens_s1": 4314.625, |
| "step": 6670, |
| "tokens_trained": 21.851769552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.8946883199773066, |
| "grad_norm": 0.11570015549659729, |
| "loss": 1.0758, |
| "loss_ce": 1.0949254035949707, |
| "loss_region": 0.06000664457678795, |
| "loss_total": 1.1549320220947266, |
| "lr": 0.0009733263145700399, |
| "router/selected_tokens_s0": 7371.75, |
| "router/selected_tokens_s1": 4303.875, |
| "step": 6680, |
| "tokens_trained": 21.884534992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.897524998227076, |
| "grad_norm": 0.07118403166532516, |
| "loss": 1.0803, |
| "loss_ce": 0.9520093202590942, |
| "loss_region": 0.060000941157341, |
| "loss_total": 1.0120102167129517, |
| "lr": 0.0009729194225861888, |
| "router/selected_tokens_s0": 7413.75, |
| "router/selected_tokens_s1": 4268.75, |
| "step": 6690, |
| "tokens_trained": 21.917300432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9003616764768456, |
| "grad_norm": 0.08157458156347275, |
| "loss": 1.0771, |
| "loss_ce": 1.1682521104812622, |
| "loss_region": 0.06000512093305588, |
| "loss_total": 1.228257179260254, |
| "lr": 0.0009725125306023378, |
| "router/selected_tokens_s0": 7389.625, |
| "router/selected_tokens_s1": 4302.75, |
| "step": 6700, |
| "tokens_trained": 21.950065872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9031983547266151, |
| "grad_norm": 0.13799847662448883, |
| "loss": 1.0839, |
| "loss_ce": 1.0058233737945557, |
| "loss_region": 0.06000097095966339, |
| "loss_total": 1.0658243894577026, |
| "lr": 0.0009721056386184868, |
| "router/selected_tokens_s0": 7442.75, |
| "router/selected_tokens_s1": 4334.125, |
| "step": 6710, |
| "tokens_trained": 21.982831312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9060350329763847, |
| "grad_norm": 0.07417915761470795, |
| "loss": 1.0874, |
| "loss_ce": 0.9934352040290833, |
| "loss_region": 0.060005929321050644, |
| "loss_total": 1.0534411668777466, |
| "lr": 0.0009716987466346357, |
| "router/selected_tokens_s0": 7397.5, |
| "router/selected_tokens_s1": 4303.125, |
| "step": 6720, |
| "tokens_trained": 22.015596752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9088717112261542, |
| "grad_norm": 0.12218568474054337, |
| "loss": 1.0765, |
| "loss_ce": 1.054198145866394, |
| "loss_region": 0.05999940261244774, |
| "loss_total": 1.1141974925994873, |
| "lr": 0.0009712918546507846, |
| "router/selected_tokens_s0": 7443.5, |
| "router/selected_tokens_s1": 4270.25, |
| "step": 6730, |
| "tokens_trained": 22.048362192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9117083894759237, |
| "grad_norm": 0.08367050439119339, |
| "loss": 1.075, |
| "loss_ce": 1.1362199783325195, |
| "loss_region": 0.05999961495399475, |
| "loss_total": 1.196219563484192, |
| "lr": 0.0009708849626669337, |
| "router/selected_tokens_s0": 7419.125, |
| "router/selected_tokens_s1": 4273.75, |
| "step": 6740, |
| "tokens_trained": 22.081127632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9145450677256932, |
| "grad_norm": 0.07572333514690399, |
| "loss": 1.0818, |
| "loss_ce": 1.1262247562408447, |
| "loss_region": 0.0599992461502552, |
| "loss_total": 1.1862239837646484, |
| "lr": 0.0009704780706830826, |
| "router/selected_tokens_s0": 7456.625, |
| "router/selected_tokens_s1": 4306.5, |
| "step": 6750, |
| "tokens_trained": 22.113893072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9173817459754627, |
| "grad_norm": 0.1779952049255371, |
| "loss": 1.0841, |
| "loss_ce": 0.9176620244979858, |
| "loss_region": 0.06000371649861336, |
| "loss_total": 0.9776657223701477, |
| "lr": 0.0009700711786992316, |
| "router/selected_tokens_s0": 7432.5, |
| "router/selected_tokens_s1": 4260.125, |
| "step": 6760, |
| "tokens_trained": 22.146655448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9202184242252323, |
| "grad_norm": 0.14154663681983948, |
| "loss": 1.0845, |
| "loss_ce": 0.9990371465682983, |
| "loss_region": 0.06000208854675293, |
| "loss_total": 1.0590392351150513, |
| "lr": 0.0009696642867153806, |
| "router/selected_tokens_s0": 7448.0, |
| "router/selected_tokens_s1": 4277.75, |
| "step": 6770, |
| "tokens_trained": 22.179420888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9230551024750018, |
| "grad_norm": 0.12332601845264435, |
| "loss": 1.0903, |
| "loss_ce": 1.0889134407043457, |
| "loss_region": 0.06000123545527458, |
| "loss_total": 1.1489146947860718, |
| "lr": 0.0009692573947315295, |
| "router/selected_tokens_s0": 7410.625, |
| "router/selected_tokens_s1": 4298.125, |
| "step": 6780, |
| "tokens_trained": 22.212186328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9258917807247713, |
| "grad_norm": 0.049108777195215225, |
| "loss": 1.0793, |
| "loss_ce": 1.0911067724227905, |
| "loss_region": 0.06000058352947235, |
| "loss_total": 1.1511073112487793, |
| "lr": 0.0009688505027476785, |
| "router/selected_tokens_s0": 7404.375, |
| "router/selected_tokens_s1": 4292.625, |
| "step": 6790, |
| "tokens_trained": 22.244951768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9287284589745408, |
| "grad_norm": 0.08588030934333801, |
| "loss": 1.0822, |
| "loss_ce": 1.0566279888153076, |
| "loss_region": 0.06000346690416336, |
| "loss_total": 1.1166315078735352, |
| "lr": 0.0009684436107638276, |
| "router/selected_tokens_s0": 7381.625, |
| "router/selected_tokens_s1": 4274.5, |
| "step": 6800, |
| "tokens_trained": 22.277717208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9315651372243103, |
| "grad_norm": 0.09908344596624374, |
| "loss": 1.0782, |
| "loss_ce": 1.0376368761062622, |
| "loss_region": 0.05999937653541565, |
| "loss_total": 1.0976362228393555, |
| "lr": 0.0009680367187799765, |
| "router/selected_tokens_s0": 7475.125, |
| "router/selected_tokens_s1": 4332.375, |
| "step": 6810, |
| "tokens_trained": 22.310482648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9344018154740799, |
| "grad_norm": 0.09425711631774902, |
| "loss": 1.0808, |
| "loss_ce": 1.1275286674499512, |
| "loss_region": 0.05999993532896042, |
| "loss_total": 1.1875286102294922, |
| "lr": 0.0009676298267961255, |
| "router/selected_tokens_s0": 7449.5, |
| "router/selected_tokens_s1": 4272.5, |
| "step": 6820, |
| "tokens_trained": 22.343246488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9372384937238494, |
| "grad_norm": 0.08792036026716232, |
| "loss": 1.0818, |
| "loss_ce": 1.053713321685791, |
| "loss_region": 0.06000383198261261, |
| "loss_total": 1.1137171983718872, |
| "lr": 0.0009672229348122745, |
| "router/selected_tokens_s0": 7464.25, |
| "router/selected_tokens_s1": 4300.625, |
| "step": 6830, |
| "tokens_trained": 22.376011928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.940075171973619, |
| "grad_norm": 0.14521412551403046, |
| "loss": 1.0828, |
| "loss_ce": 0.9131529927253723, |
| "loss_region": 0.0600009560585022, |
| "loss_total": 0.9731539487838745, |
| "lr": 0.0009668160428284234, |
| "router/selected_tokens_s0": 7414.875, |
| "router/selected_tokens_s1": 4263.25, |
| "step": 6840, |
| "tokens_trained": 22.408777368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9429118502233884, |
| "grad_norm": 0.11343439668416977, |
| "loss": 1.0838, |
| "loss_ce": 1.1041600704193115, |
| "loss_region": 0.06000589206814766, |
| "loss_total": 1.16416597366333, |
| "lr": 0.0009664091508445723, |
| "router/selected_tokens_s0": 7446.625, |
| "router/selected_tokens_s1": 4249.5, |
| "step": 6850, |
| "tokens_trained": 22.441542808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.945748528473158, |
| "grad_norm": 0.121289923787117, |
| "loss": 1.0837, |
| "loss_ce": 0.8764686584472656, |
| "loss_region": 0.05999890714883804, |
| "loss_total": 0.9364675879478455, |
| "lr": 0.0009660022588607213, |
| "router/selected_tokens_s0": 7444.0, |
| "router/selected_tokens_s1": 4321.375, |
| "step": 6860, |
| "tokens_trained": 22.474308248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9485852067229275, |
| "grad_norm": 0.06294595450162888, |
| "loss": 1.074, |
| "loss_ce": 0.9599717855453491, |
| "loss_region": 0.06000233069062233, |
| "loss_total": 1.0199741125106812, |
| "lr": 0.0009655953668768703, |
| "router/selected_tokens_s0": 7448.625, |
| "router/selected_tokens_s1": 4300.625, |
| "step": 6870, |
| "tokens_trained": 22.507073688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.951421884972697, |
| "grad_norm": 0.06342046707868576, |
| "loss": 1.0798, |
| "loss_ce": 1.0366178750991821, |
| "loss_region": 0.06000163033604622, |
| "loss_total": 1.0966194868087769, |
| "lr": 0.0009651884748930193, |
| "router/selected_tokens_s0": 7455.25, |
| "router/selected_tokens_s1": 4314.5, |
| "step": 6880, |
| "tokens_trained": 22.539837536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9542585632224665, |
| "grad_norm": 0.06787163019180298, |
| "loss": 1.0851, |
| "loss_ce": 0.9529252052307129, |
| "loss_region": 0.06001782417297363, |
| "loss_total": 1.0129430294036865, |
| "lr": 0.0009647815829091683, |
| "router/selected_tokens_s0": 7417.75, |
| "router/selected_tokens_s1": 4262.875, |
| "step": 6890, |
| "tokens_trained": 22.572602976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.957095241472236, |
| "grad_norm": 0.10369361191987991, |
| "loss": 1.0824, |
| "loss_ce": 1.1339085102081299, |
| "loss_region": 0.059998538345098495, |
| "loss_total": 1.1939070224761963, |
| "lr": 0.0009643746909253172, |
| "router/selected_tokens_s0": 7439.625, |
| "router/selected_tokens_s1": 4269.0, |
| "step": 6900, |
| "tokens_trained": 22.605367616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9599319197220055, |
| "grad_norm": 0.09577842801809311, |
| "loss": 1.0758, |
| "loss_ce": 0.8998700976371765, |
| "loss_region": 0.06000450626015663, |
| "loss_total": 0.9598746299743652, |
| "lr": 0.0009639677989414662, |
| "router/selected_tokens_s0": 7404.875, |
| "router/selected_tokens_s1": 4287.75, |
| "step": 6910, |
| "tokens_trained": 22.638133056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.962768597971775, |
| "grad_norm": 0.09918491542339325, |
| "loss": 1.0767, |
| "loss_ce": 0.9360069632530212, |
| "loss_region": 0.06000268831849098, |
| "loss_total": 0.9960096478462219, |
| "lr": 0.0009635609069576153, |
| "router/selected_tokens_s0": 7433.75, |
| "router/selected_tokens_s1": 4320.625, |
| "step": 6920, |
| "tokens_trained": 22.670898496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9656052762215446, |
| "grad_norm": 0.09037332236766815, |
| "loss": 1.0824, |
| "loss_ce": 1.1245343685150146, |
| "loss_region": 0.06000065803527832, |
| "loss_total": 1.184535026550293, |
| "lr": 0.0009631540149737642, |
| "router/selected_tokens_s0": 7447.0, |
| "router/selected_tokens_s1": 4299.375, |
| "step": 6930, |
| "tokens_trained": 22.703663936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.968441954471314, |
| "grad_norm": 0.0935681015253067, |
| "loss": 1.0775, |
| "loss_ce": 1.1298129558563232, |
| "loss_region": 0.060005635023117065, |
| "loss_total": 1.1898186206817627, |
| "lr": 0.0009627471229899132, |
| "router/selected_tokens_s0": 7427.5, |
| "router/selected_tokens_s1": 4319.75, |
| "step": 6940, |
| "tokens_trained": 22.736429376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9712786327210836, |
| "grad_norm": 0.07654014229774475, |
| "loss": 1.0788, |
| "loss_ce": 1.045467734336853, |
| "loss_region": 0.0600028894841671, |
| "loss_total": 1.1054706573486328, |
| "lr": 0.0009623402310060622, |
| "router/selected_tokens_s0": 7431.375, |
| "router/selected_tokens_s1": 4300.25, |
| "step": 6950, |
| "tokens_trained": 22.769194816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9741153109708531, |
| "grad_norm": 0.2214384526014328, |
| "loss": 1.0807, |
| "loss_ce": 1.138108253479004, |
| "loss_region": 0.06000784412026405, |
| "loss_total": 1.1981160640716553, |
| "lr": 0.0009619333390222111, |
| "router/selected_tokens_s0": 7450.875, |
| "router/selected_tokens_s1": 4360.125, |
| "step": 6960, |
| "tokens_trained": 22.801960256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9769519892206227, |
| "grad_norm": 0.10386006534099579, |
| "loss": 1.0887, |
| "loss_ce": 0.9496551752090454, |
| "loss_region": 0.05999850109219551, |
| "loss_total": 1.0096536874771118, |
| "lr": 0.00096152644703836, |
| "router/selected_tokens_s0": 7437.125, |
| "router/selected_tokens_s1": 4279.5, |
| "step": 6970, |
| "tokens_trained": 22.834725696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9797886674703922, |
| "grad_norm": 0.07160235196352005, |
| "loss": 1.0747, |
| "loss_ce": 1.0140540599822998, |
| "loss_region": 0.06000373139977455, |
| "loss_total": 1.0740578174591064, |
| "lr": 0.0009611195550545089, |
| "router/selected_tokens_s0": 7451.25, |
| "router/selected_tokens_s1": 4298.75, |
| "step": 6980, |
| "tokens_trained": 22.867491136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9826253457201617, |
| "grad_norm": 0.16528116166591644, |
| "loss": 1.0852, |
| "loss_ce": 0.9280127286911011, |
| "loss_region": 0.060000862926244736, |
| "loss_total": 0.9880135655403137, |
| "lr": 0.000960712663070658, |
| "router/selected_tokens_s0": 7405.625, |
| "router/selected_tokens_s1": 4259.5, |
| "step": 6990, |
| "tokens_trained": 22.900255448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9854620239699312, |
| "grad_norm": 0.08820237964391708, |
| "loss": 1.0759, |
| "loss_ce": 1.0279951095581055, |
| "loss_region": 0.059999141842126846, |
| "loss_total": 1.0879942178726196, |
| "lr": 0.000960305771086807, |
| "router/selected_tokens_s0": 7447.625, |
| "router/selected_tokens_s1": 4283.875, |
| "step": 7000, |
| "tokens_trained": 22.933020888 |
| }, |
| { |
| "epoch": 1.9854620239699312, |
| "eval_ppl": 2.794549019742632, |
| "eval_runtime": 1.0319, |
| "step": 7000, |
| "tokens_trained": 22.933020888 |
| }, |
| { |
| "epoch": 1.9854620239699312, |
| "eval_F": 0.3364006302575722, |
| "eval_F_cds": 0.3297511571422025, |
| "eval_F_dig": 0.3325259785920775, |
| "eval_F_exon": 0.33740093093470874, |
| "eval_F_intron": 0.3370067443465844, |
| "eval_F_nig": 0.33774423752098914, |
| "eval_F_promoter": 0.3332120262023403, |
| "eval_F_utr": 0.33739596485351336, |
| "eval_G": 0.33825298851472774, |
| "eval_G_cds": 0.33560851824217497, |
| "eval_G_dig": 0.3102000156262208, |
| "eval_G_exon": 0.3403066620015096, |
| "eval_G_intron": 0.33883673270951786, |
| "eval_G_nig": 0.3388791193714859, |
| "eval_G_promoter": 0.33646450986378607, |
| "eval_G_utr": 0.3406638219388544, |
| "eval_avg_bp_per_token": 2.972646035871957, |
| "eval_bp_per_token/cds": 3.032589813077618, |
| "eval_bp_per_token/dig": 3.0072838345864663, |
| "eval_bp_per_token/exon": 2.963832960477256, |
| "eval_bp_per_token/intron": 2.9672996661799154, |
| "eval_bp_per_token/nig": 2.9608203158102877, |
| "eval_bp_per_token/promoter": 3.0010921616399227, |
| "eval_bp_per_token/utr": 2.96387658469528, |
| "eval_ppl_cds": 3.4120921617320685, |
| "eval_ppl_dig": 1.11111283111052, |
| "eval_ppl_exon": 3.2512919962069096, |
| "eval_ppl_intron": 2.89221028657143, |
| "eval_ppl_nig": 2.586823069198443, |
| "eval_ppl_promoter": 3.0910928401544187, |
| "eval_ppl_utr": 3.108658160865457, |
| "step": 7000, |
| "tokens_trained": 22.933020888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9882987022197007, |
| "grad_norm": 0.1708163172006607, |
| "loss": 1.0825, |
| "loss_ce": 1.1052597761154175, |
| "loss_region": 0.05999952182173729, |
| "loss_total": 1.1652592420578003, |
| "lr": 0.000959898879102956, |
| "router/selected_tokens_s0": 7447.25, |
| "router/selected_tokens_s1": 4298.5, |
| "step": 7010, |
| "tokens_trained": 22.965785528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9911353804694703, |
| "grad_norm": 0.1802079677581787, |
| "loss": 1.0845, |
| "loss_ce": 1.109366774559021, |
| "loss_region": 0.05999884009361267, |
| "loss_total": 1.169365644454956, |
| "lr": 0.0009594919871191049, |
| "router/selected_tokens_s0": 7416.0, |
| "router/selected_tokens_s1": 4261.25, |
| "step": 7020, |
| "tokens_trained": 22.998550968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9939720587192398, |
| "grad_norm": 0.08593832701444626, |
| "loss": 1.0785, |
| "loss_ce": 0.929319441318512, |
| "loss_region": 0.06000542640686035, |
| "loss_total": 0.9893248677253723, |
| "lr": 0.0009590850951352539, |
| "router/selected_tokens_s0": 7443.0, |
| "router/selected_tokens_s1": 4242.375, |
| "step": 7030, |
| "tokens_trained": 23.031316408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9968087369690093, |
| "grad_norm": 0.09409149736166, |
| "loss": 1.0841, |
| "loss_ce": 0.970834493637085, |
| "loss_region": 0.06000007688999176, |
| "loss_total": 1.0308345556259155, |
| "lr": 0.0009586782031514028, |
| "router/selected_tokens_s0": 7411.25, |
| "router/selected_tokens_s1": 4277.625, |
| "step": 7040, |
| "tokens_trained": 23.064081848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.9996454152187788, |
| "grad_norm": 0.1600276380777359, |
| "loss": 1.0808, |
| "loss_ce": 0.9412515759468079, |
| "loss_region": 0.060001179575920105, |
| "loss_total": 1.0012527704238892, |
| "lr": 0.0009582713111675519, |
| "router/selected_tokens_s0": 7405.25, |
| "router/selected_tokens_s1": 4291.625, |
| "step": 7050, |
| "tokens_trained": 23.096847288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0022693425998157, |
| "grad_norm": 0.09644526243209839, |
| "loss": 1.0706, |
| "loss_ce": 0.971865713596344, |
| "loss_region": 0.060010332614183426, |
| "loss_total": 1.0318760871887207, |
| "lr": 0.0009578644191837009, |
| "router/selected_tokens_s0": 7413.25, |
| "router/selected_tokens_s1": 4330.5, |
| "step": 7060, |
| "tokens_trained": 23.12715532 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0051060208495852, |
| "grad_norm": 0.08329671621322632, |
| "loss": 1.076, |
| "loss_ce": 0.9211774468421936, |
| "loss_region": 0.06000356748700142, |
| "loss_total": 0.9811810255050659, |
| "lr": 0.0009574575271998498, |
| "router/selected_tokens_s0": 7464.0, |
| "router/selected_tokens_s1": 4346.5, |
| "step": 7070, |
| "tokens_trained": 23.15992076 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0079426990993547, |
| "grad_norm": 0.10517358034849167, |
| "loss": 1.0705, |
| "loss_ce": 1.066274881362915, |
| "loss_region": 0.0600065216422081, |
| "loss_total": 1.1262813806533813, |
| "lr": 0.0009570506352159988, |
| "router/selected_tokens_s0": 7373.875, |
| "router/selected_tokens_s1": 4275.75, |
| "step": 7080, |
| "tokens_trained": 23.1926862 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0107793773491243, |
| "grad_norm": 0.18958993256092072, |
| "loss": 1.0769, |
| "loss_ce": 1.0623862743377686, |
| "loss_region": 0.06000087037682533, |
| "loss_total": 1.122387170791626, |
| "lr": 0.0009566437432321478, |
| "router/selected_tokens_s0": 7403.25, |
| "router/selected_tokens_s1": 4238.125, |
| "step": 7090, |
| "tokens_trained": 23.22545164 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.013616055598894, |
| "grad_norm": 0.09266994893550873, |
| "loss": 1.0779, |
| "loss_ce": 1.0237703323364258, |
| "loss_region": 0.06000027060508728, |
| "loss_total": 1.0837706327438354, |
| "lr": 0.0009562368512482966, |
| "router/selected_tokens_s0": 7413.625, |
| "router/selected_tokens_s1": 4262.75, |
| "step": 7100, |
| "tokens_trained": 23.25821708 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0164527338486633, |
| "grad_norm": 0.08144967257976532, |
| "loss": 1.0765, |
| "loss_ce": 0.977138340473175, |
| "loss_region": 0.06000202149152756, |
| "loss_total": 1.0371403694152832, |
| "lr": 0.0009558299592644456, |
| "router/selected_tokens_s0": 7403.25, |
| "router/selected_tokens_s1": 4263.125, |
| "step": 7110, |
| "tokens_trained": 23.29098252 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.019289412098433, |
| "grad_norm": 0.09503156691789627, |
| "loss": 1.074, |
| "loss_ce": 1.0922651290893555, |
| "loss_region": 0.060002658516168594, |
| "loss_total": 1.1522678136825562, |
| "lr": 0.0009554230672805947, |
| "router/selected_tokens_s0": 7420.375, |
| "router/selected_tokens_s1": 4300.75, |
| "step": 7120, |
| "tokens_trained": 23.32374796 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0221260903482023, |
| "grad_norm": 0.15637706220149994, |
| "loss": 1.0697, |
| "loss_ce": 0.9792024493217468, |
| "loss_region": 0.05999979004263878, |
| "loss_total": 1.0392022132873535, |
| "lr": 0.0009550161752967436, |
| "router/selected_tokens_s0": 7453.625, |
| "router/selected_tokens_s1": 4296.0, |
| "step": 7130, |
| "tokens_trained": 23.3565134 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.024962768597972, |
| "grad_norm": 0.10303203016519547, |
| "loss": 1.0773, |
| "loss_ce": 1.0813617706298828, |
| "loss_region": 0.05999702960252762, |
| "loss_total": 1.1413588523864746, |
| "lr": 0.0009546092833128926, |
| "router/selected_tokens_s0": 7435.125, |
| "router/selected_tokens_s1": 4272.375, |
| "step": 7140, |
| "tokens_trained": 23.38927804 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0277994468477414, |
| "grad_norm": 0.09615137428045273, |
| "loss": 1.0687, |
| "loss_ce": 0.9444698095321655, |
| "loss_region": 0.06000514701008797, |
| "loss_total": 1.0044749975204468, |
| "lr": 0.0009542023913290416, |
| "router/selected_tokens_s0": 7447.625, |
| "router/selected_tokens_s1": 4315.75, |
| "step": 7150, |
| "tokens_trained": 23.42204348 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.030636125097511, |
| "grad_norm": 0.14985762536525726, |
| "loss": 1.0683, |
| "loss_ce": 1.0583648681640625, |
| "loss_region": 0.06000368297100067, |
| "loss_total": 1.1183685064315796, |
| "lr": 0.0009537954993451905, |
| "router/selected_tokens_s0": 7450.5, |
| "router/selected_tokens_s1": 4309.0, |
| "step": 7160, |
| "tokens_trained": 23.45480892 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0334728033472804, |
| "grad_norm": 0.08965207636356354, |
| "loss": 1.0702, |
| "loss_ce": 1.0723220109939575, |
| "loss_region": 0.06000346690416336, |
| "loss_total": 1.132325530052185, |
| "lr": 0.0009533886073613396, |
| "router/selected_tokens_s0": 7471.375, |
| "router/selected_tokens_s1": 4292.875, |
| "step": 7170, |
| "tokens_trained": 23.48757436 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.03630948159705, |
| "grad_norm": 0.07297336310148239, |
| "loss": 1.0677, |
| "loss_ce": 0.9436392784118652, |
| "loss_region": 0.060007572174072266, |
| "loss_total": 1.0036468505859375, |
| "lr": 0.0009529817153774886, |
| "router/selected_tokens_s0": 7418.5, |
| "router/selected_tokens_s1": 4244.0, |
| "step": 7180, |
| "tokens_trained": 23.520339 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0391461598468195, |
| "grad_norm": 0.12650638818740845, |
| "loss": 1.0704, |
| "loss_ce": 0.9642244577407837, |
| "loss_region": 0.06000884622335434, |
| "loss_total": 1.024233341217041, |
| "lr": 0.0009525748233936375, |
| "router/selected_tokens_s0": 7376.25, |
| "router/selected_tokens_s1": 4291.25, |
| "step": 7190, |
| "tokens_trained": 23.55310444 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.041982838096589, |
| "grad_norm": 0.1184626966714859, |
| "loss": 1.0826, |
| "loss_ce": 1.0093343257904053, |
| "loss_region": 0.06000237911939621, |
| "loss_total": 1.0693366527557373, |
| "lr": 0.0009521679314097865, |
| "router/selected_tokens_s0": 7418.125, |
| "router/selected_tokens_s1": 4302.625, |
| "step": 7200, |
| "tokens_trained": 23.58586988 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0448195163463585, |
| "grad_norm": 0.05724379047751427, |
| "loss": 1.0768, |
| "loss_ce": 1.079675316810608, |
| "loss_region": 0.060005199164152145, |
| "loss_total": 1.1396805047988892, |
| "lr": 0.0009517610394259355, |
| "router/selected_tokens_s0": 7453.625, |
| "router/selected_tokens_s1": 4327.25, |
| "step": 7210, |
| "tokens_trained": 23.61863532 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.047656194596128, |
| "grad_norm": 0.1011098176240921, |
| "loss": 1.0779, |
| "loss_ce": 1.0079035758972168, |
| "loss_region": 0.059999871999025345, |
| "loss_total": 1.0679033994674683, |
| "lr": 0.0009513541474420843, |
| "router/selected_tokens_s0": 7451.5, |
| "router/selected_tokens_s1": 4284.625, |
| "step": 7220, |
| "tokens_trained": 23.65140076 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0504928728458975, |
| "grad_norm": 0.10729926079511642, |
| "loss": 1.0713, |
| "loss_ce": 1.114392876625061, |
| "loss_region": 0.05999760329723358, |
| "loss_total": 1.174390435218811, |
| "lr": 0.0009509472554582333, |
| "router/selected_tokens_s0": 7431.25, |
| "router/selected_tokens_s1": 4259.875, |
| "step": 7230, |
| "tokens_trained": 23.6841662 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.053329551095667, |
| "grad_norm": 0.08418073505163193, |
| "loss": 1.0771, |
| "loss_ce": 0.9541222453117371, |
| "loss_region": 0.06000092998147011, |
| "loss_total": 1.0141232013702393, |
| "lr": 0.0009505403634743824, |
| "router/selected_tokens_s0": 7403.625, |
| "router/selected_tokens_s1": 4292.75, |
| "step": 7240, |
| "tokens_trained": 23.71693004 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0561662293454366, |
| "grad_norm": 0.11302497237920761, |
| "loss": 1.0722, |
| "loss_ce": 1.072424054145813, |
| "loss_region": 0.06000204384326935, |
| "loss_total": 1.132426142692566, |
| "lr": 0.0009501334714905313, |
| "router/selected_tokens_s0": 7415.625, |
| "router/selected_tokens_s1": 4278.375, |
| "step": 7250, |
| "tokens_trained": 23.74969548 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.059002907595206, |
| "grad_norm": 0.11750608682632446, |
| "loss": 1.069, |
| "loss_ce": 1.016484022140503, |
| "loss_region": 0.06000182405114174, |
| "loss_total": 1.0764858722686768, |
| "lr": 0.0009497265795066803, |
| "router/selected_tokens_s0": 7384.25, |
| "router/selected_tokens_s1": 4272.125, |
| "step": 7260, |
| "tokens_trained": 23.78246092 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0618395858449756, |
| "grad_norm": 0.1328291893005371, |
| "loss": 1.0696, |
| "loss_ce": 1.08993661403656, |
| "loss_region": 0.060007493942976, |
| "loss_total": 1.1499440670013428, |
| "lr": 0.0009493196875228293, |
| "router/selected_tokens_s0": 7411.125, |
| "router/selected_tokens_s1": 4311.625, |
| "step": 7270, |
| "tokens_trained": 23.81522636 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.064676264094745, |
| "grad_norm": 0.04345889016985893, |
| "loss": 1.0711, |
| "loss_ce": 1.0987412929534912, |
| "loss_region": 0.05999920889735222, |
| "loss_total": 1.158740520477295, |
| "lr": 0.0009489127955389782, |
| "router/selected_tokens_s0": 7362.375, |
| "router/selected_tokens_s1": 4233.375, |
| "step": 7280, |
| "tokens_trained": 23.8479918 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0675129423445147, |
| "grad_norm": 0.11810455471277237, |
| "loss": 1.0728, |
| "loss_ce": 0.9469420313835144, |
| "loss_region": 0.059997860342264175, |
| "loss_total": 1.0069398880004883, |
| "lr": 0.0009485059035551272, |
| "router/selected_tokens_s0": 7399.125, |
| "router/selected_tokens_s1": 4261.125, |
| "step": 7290, |
| "tokens_trained": 23.88075724 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.070349620594284, |
| "grad_norm": 0.047945018857717514, |
| "loss": 1.08, |
| "loss_ce": 1.002561092376709, |
| "loss_region": 0.06000149995088577, |
| "loss_total": 1.0625625848770142, |
| "lr": 0.0009480990115712763, |
| "router/selected_tokens_s0": 7451.75, |
| "router/selected_tokens_s1": 4322.375, |
| "step": 7300, |
| "tokens_trained": 23.91352268 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0731862988440537, |
| "grad_norm": 0.15315809845924377, |
| "loss": 1.0738, |
| "loss_ce": 1.0678074359893799, |
| "loss_region": 0.06000760942697525, |
| "loss_total": 1.1278150081634521, |
| "lr": 0.0009476921195874252, |
| "router/selected_tokens_s0": 7464.0, |
| "router/selected_tokens_s1": 4323.625, |
| "step": 7310, |
| "tokens_trained": 23.94628812 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.076022977093823, |
| "grad_norm": 0.08653237670660019, |
| "loss": 1.0697, |
| "loss_ce": 1.124541163444519, |
| "loss_region": 0.06001746654510498, |
| "loss_total": 1.184558629989624, |
| "lr": 0.0009472852276035742, |
| "router/selected_tokens_s0": 7398.125, |
| "router/selected_tokens_s1": 4335.625, |
| "step": 7320, |
| "tokens_trained": 23.97905276 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0788596553435927, |
| "grad_norm": 0.08466868847608566, |
| "loss": 1.0691, |
| "loss_ce": 0.8798490166664124, |
| "loss_region": 0.06001843884587288, |
| "loss_total": 0.9398674368858337, |
| "lr": 0.0009468783356197232, |
| "router/selected_tokens_s0": 7391.25, |
| "router/selected_tokens_s1": 4293.375, |
| "step": 7330, |
| "tokens_trained": 24.0118182 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0816963335933623, |
| "grad_norm": 0.136433944106102, |
| "loss": 1.0758, |
| "loss_ce": 1.1402568817138672, |
| "loss_region": 0.060009267181158066, |
| "loss_total": 1.2002661228179932, |
| "lr": 0.0009464714436358721, |
| "router/selected_tokens_s0": 7420.0, |
| "router/selected_tokens_s1": 4331.875, |
| "step": 7340, |
| "tokens_trained": 24.04458364 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0845330118431318, |
| "grad_norm": 0.20771656930446625, |
| "loss": 1.0594, |
| "loss_ce": 0.8919783234596252, |
| "loss_region": 0.06003022566437721, |
| "loss_total": 0.9520085453987122, |
| "lr": 0.000946064551652021, |
| "router/selected_tokens_s0": 7372.25, |
| "router/selected_tokens_s1": 4267.25, |
| "step": 7350, |
| "tokens_trained": 24.07734908 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0873696900929013, |
| "grad_norm": 0.07634517550468445, |
| "loss": 1.0697, |
| "loss_ce": 0.8688952326774597, |
| "loss_region": 0.06001392379403114, |
| "loss_total": 0.928909182548523, |
| "lr": 0.0009456576596681699, |
| "router/selected_tokens_s0": 7393.25, |
| "router/selected_tokens_s1": 4316.75, |
| "step": 7360, |
| "tokens_trained": 24.110113704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.090206368342671, |
| "grad_norm": 0.07431330531835556, |
| "loss": 1.0645, |
| "loss_ce": 0.980828583240509, |
| "loss_region": 0.06001806631684303, |
| "loss_total": 1.0408467054367065, |
| "lr": 0.000945250767684319, |
| "router/selected_tokens_s0": 7389.875, |
| "router/selected_tokens_s1": 4340.125, |
| "step": 7370, |
| "tokens_trained": 24.142879144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0930430465924403, |
| "grad_norm": 0.1355743110179901, |
| "loss": 1.0741, |
| "loss_ce": 1.0412564277648926, |
| "loss_region": 0.060002874583005905, |
| "loss_total": 1.1012593507766724, |
| "lr": 0.000944843875700468, |
| "router/selected_tokens_s0": 7403.75, |
| "router/selected_tokens_s1": 4279.875, |
| "step": 7380, |
| "tokens_trained": 24.175643784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.09587972484221, |
| "grad_norm": 0.09457384049892426, |
| "loss": 1.0677, |
| "loss_ce": 1.0146124362945557, |
| "loss_region": 0.060000814497470856, |
| "loss_total": 1.0746132135391235, |
| "lr": 0.000944436983716617, |
| "router/selected_tokens_s0": 7456.375, |
| "router/selected_tokens_s1": 4266.25, |
| "step": 7390, |
| "tokens_trained": 24.208409224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.0987164030919794, |
| "grad_norm": 0.08905760198831558, |
| "loss": 1.0615, |
| "loss_ce": 0.9676433801651001, |
| "loss_region": 0.0599987618625164, |
| "loss_total": 1.0276421308517456, |
| "lr": 0.0009440300917327659, |
| "router/selected_tokens_s0": 7453.625, |
| "router/selected_tokens_s1": 4254.625, |
| "step": 7400, |
| "tokens_trained": 24.241174664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.101553081341749, |
| "grad_norm": 0.09464745968580246, |
| "loss": 1.0739, |
| "loss_ce": 1.0466071367263794, |
| "loss_region": 0.05999983474612236, |
| "loss_total": 1.1066069602966309, |
| "lr": 0.0009436231997489149, |
| "router/selected_tokens_s0": 7391.0, |
| "router/selected_tokens_s1": 4242.75, |
| "step": 7410, |
| "tokens_trained": 24.273940104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1043897595915184, |
| "grad_norm": 0.09178861975669861, |
| "loss": 1.0623, |
| "loss_ce": 1.0746018886566162, |
| "loss_region": 0.060001909732818604, |
| "loss_total": 1.13460373878479, |
| "lr": 0.000943216307765064, |
| "router/selected_tokens_s0": 7428.125, |
| "router/selected_tokens_s1": 4302.25, |
| "step": 7420, |
| "tokens_trained": 24.306705544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.107226437841288, |
| "grad_norm": 0.08409807831048965, |
| "loss": 1.0672, |
| "loss_ce": 0.9870949983596802, |
| "loss_region": 0.06000875309109688, |
| "loss_total": 1.047103762626648, |
| "lr": 0.0009428094157812129, |
| "router/selected_tokens_s0": 7369.625, |
| "router/selected_tokens_s1": 4281.375, |
| "step": 7430, |
| "tokens_trained": 24.339470184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1100631160910575, |
| "grad_norm": 0.09397881478071213, |
| "loss": 1.0726, |
| "loss_ce": 1.0716584920883179, |
| "loss_region": 0.060008931905031204, |
| "loss_total": 1.1316673755645752, |
| "lr": 0.0009424025237973619, |
| "router/selected_tokens_s0": 7366.625, |
| "router/selected_tokens_s1": 4281.5, |
| "step": 7440, |
| "tokens_trained": 24.372235624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.112899794340827, |
| "grad_norm": 0.10131512582302094, |
| "loss": 1.0648, |
| "loss_ce": 1.0101292133331299, |
| "loss_region": 0.059999801218509674, |
| "loss_total": 1.0701290369033813, |
| "lr": 0.0009419956318135108, |
| "router/selected_tokens_s0": 7475.125, |
| "router/selected_tokens_s1": 4291.375, |
| "step": 7450, |
| "tokens_trained": 24.404997 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1157364725905965, |
| "grad_norm": 0.07813931256532669, |
| "loss": 1.0679, |
| "loss_ce": 1.0638779401779175, |
| "loss_region": 0.060002557933330536, |
| "loss_total": 1.1238805055618286, |
| "lr": 0.0009415887398296598, |
| "router/selected_tokens_s0": 7411.375, |
| "router/selected_tokens_s1": 4283.375, |
| "step": 7460, |
| "tokens_trained": 24.43776244 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.118573150840366, |
| "grad_norm": 0.15742971003055573, |
| "loss": 1.0728, |
| "loss_ce": 0.940522313117981, |
| "loss_region": 0.060000792145729065, |
| "loss_total": 1.0005230903625488, |
| "lr": 0.0009411818478458087, |
| "router/selected_tokens_s0": 7412.125, |
| "router/selected_tokens_s1": 4288.625, |
| "step": 7470, |
| "tokens_trained": 24.47052788 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1214098290901355, |
| "grad_norm": 0.10386386513710022, |
| "loss": 1.0647, |
| "loss_ce": 0.9601840972900391, |
| "loss_region": 0.060003604739904404, |
| "loss_total": 1.0201877355575562, |
| "lr": 0.0009407749558619576, |
| "router/selected_tokens_s0": 7382.5, |
| "router/selected_tokens_s1": 4285.375, |
| "step": 7480, |
| "tokens_trained": 24.50329252 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.124246507339905, |
| "grad_norm": 0.05695318058133125, |
| "loss": 1.0725, |
| "loss_ce": 1.0721138715744019, |
| "loss_region": 0.06000151485204697, |
| "loss_total": 1.132115364074707, |
| "lr": 0.0009403680638781067, |
| "router/selected_tokens_s0": 7455.5, |
| "router/selected_tokens_s1": 4300.25, |
| "step": 7490, |
| "tokens_trained": 24.53605796 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1270831855896746, |
| "grad_norm": 0.08580978959798813, |
| "loss": 1.0692, |
| "loss_ce": 1.0266975164413452, |
| "loss_region": 0.0600011944770813, |
| "loss_total": 1.0866987705230713, |
| "lr": 0.0009399611718942557, |
| "router/selected_tokens_s0": 7451.875, |
| "router/selected_tokens_s1": 4288.625, |
| "step": 7500, |
| "tokens_trained": 24.5688234 |
| }, |
| { |
| "epoch": 2.1270831855896746, |
| "eval_ppl": 2.7742442040790527, |
| "eval_runtime": 1.0454, |
| "step": 7500, |
| "tokens_trained": 24.5688234 |
| }, |
| { |
| "epoch": 2.1270831855896746, |
| "eval_F": 0.33731541526681774, |
| "eval_F_cds": 0.3319508730122359, |
| "eval_F_dig": 0.32635362137667007, |
| "eval_F_exon": 0.33907829077032753, |
| "eval_F_intron": 0.33807881279680363, |
| "eval_F_nig": 0.3386028850557167, |
| "eval_F_promoter": 0.3338978780583394, |
| "eval_F_utr": 0.3368548532556881, |
| "eval_G": 0.34280952226899236, |
| "eval_G_cds": 0.3411604825913111, |
| "eval_G_dig": 0.30843382539651537, |
| "eval_G_exon": 0.34520221031471465, |
| "eval_G_intron": 0.34351362208587694, |
| "eval_G_nig": 0.3433470244881507, |
| "eval_G_promoter": 0.3409342019623761, |
| "eval_G_utr": 0.34442165879692854, |
| "eval_avg_bp_per_token": 2.964584346698168, |
| "eval_bp_per_token/cds": 3.0124939601021605, |
| "eval_bp_per_token/dig": 3.0641608810150824, |
| "eval_bp_per_token/exon": 2.9491714073707644, |
| "eval_bp_per_token/intron": 2.957890178705261, |
| "eval_bp_per_token/nig": 2.953312107294807, |
| "eval_bp_per_token/promoter": 2.994927688115699, |
| "eval_bp_per_token/utr": 2.968637650118565, |
| "eval_ppl_cds": 3.3837930340528177, |
| "eval_ppl_dig": 1.108579965306159, |
| "eval_ppl_exon": 3.2416552682993873, |
| "eval_ppl_intron": 2.881913272039216, |
| "eval_ppl_nig": 2.5730585876659235, |
| "eval_ppl_promoter": 3.0510066204208863, |
| "eval_ppl_utr": 3.063097120424939, |
| "step": 7500, |
| "tokens_trained": 24.5688234 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.129919863839444, |
| "grad_norm": 0.09488757699728012, |
| "loss": 1.0692, |
| "loss_ce": 1.02474844455719, |
| "loss_region": 0.0600028894841671, |
| "loss_total": 1.0847513675689697, |
| "lr": 0.0009395542799104046, |
| "router/selected_tokens_s0": 7394.25, |
| "router/selected_tokens_s1": 4274.875, |
| "step": 7510, |
| "tokens_trained": 24.60158804 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1327565420892136, |
| "grad_norm": 0.09648344665765762, |
| "loss": 1.065, |
| "loss_ce": 1.0811760425567627, |
| "loss_region": 0.06000041216611862, |
| "loss_total": 1.141176462173462, |
| "lr": 0.0009391473879265536, |
| "router/selected_tokens_s0": 7421.875, |
| "router/selected_tokens_s1": 4280.375, |
| "step": 7520, |
| "tokens_trained": 24.634352656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.135593220338983, |
| "grad_norm": 0.07925688475370407, |
| "loss": 1.0667, |
| "loss_ce": 0.9782887101173401, |
| "loss_region": 0.06000470370054245, |
| "loss_total": 1.0382933616638184, |
| "lr": 0.0009387404959427026, |
| "router/selected_tokens_s0": 7414.375, |
| "router/selected_tokens_s1": 4294.75, |
| "step": 7530, |
| "tokens_trained": 24.66711596 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1384298985887527, |
| "grad_norm": 0.12421741336584091, |
| "loss": 1.0701, |
| "loss_ce": 1.038988471031189, |
| "loss_region": 0.05999799445271492, |
| "loss_total": 1.0989865064620972, |
| "lr": 0.0009383336039588515, |
| "router/selected_tokens_s0": 7395.625, |
| "router/selected_tokens_s1": 4239.375, |
| "step": 7540, |
| "tokens_trained": 24.6998814 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.141266576838522, |
| "grad_norm": 0.06676165759563446, |
| "loss": 1.0715, |
| "loss_ce": 1.0221954584121704, |
| "loss_region": 0.06001207232475281, |
| "loss_total": 1.0822075605392456, |
| "lr": 0.0009379267119750006, |
| "router/selected_tokens_s0": 7363.75, |
| "router/selected_tokens_s1": 4287.875, |
| "step": 7550, |
| "tokens_trained": 24.732646072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1441032550882917, |
| "grad_norm": 0.057106681168079376, |
| "loss": 1.0681, |
| "loss_ce": 1.0698057413101196, |
| "loss_region": 0.06000233814120293, |
| "loss_total": 1.1298080682754517, |
| "lr": 0.0009375198199911496, |
| "router/selected_tokens_s0": 7417.0, |
| "router/selected_tokens_s1": 4300.75, |
| "step": 7560, |
| "tokens_trained": 24.765403184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.146939933338061, |
| "grad_norm": 0.0772121250629425, |
| "loss": 1.0689, |
| "loss_ce": 1.0054962635040283, |
| "loss_region": 0.05999879539012909, |
| "loss_total": 1.0654950141906738, |
| "lr": 0.0009371129280072985, |
| "router/selected_tokens_s0": 7428.0, |
| "router/selected_tokens_s1": 4283.75, |
| "step": 7570, |
| "tokens_trained": 24.798168624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1497766115878307, |
| "grad_norm": 0.08018133044242859, |
| "loss": 1.0726, |
| "loss_ce": 0.999915599822998, |
| "loss_region": 0.060004789382219315, |
| "loss_total": 1.0599204301834106, |
| "lr": 0.0009367060360234475, |
| "router/selected_tokens_s0": 7452.875, |
| "router/selected_tokens_s1": 4282.125, |
| "step": 7580, |
| "tokens_trained": 24.830934064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1526132898376003, |
| "grad_norm": 0.08076012879610062, |
| "loss": 1.0553, |
| "loss_ce": 1.1044143438339233, |
| "loss_region": 0.05999838933348656, |
| "loss_total": 1.1644127368927002, |
| "lr": 0.0009362991440395965, |
| "router/selected_tokens_s0": 7389.875, |
| "router/selected_tokens_s1": 4254.375, |
| "step": 7590, |
| "tokens_trained": 24.863699504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1554499680873698, |
| "grad_norm": 0.10798979550600052, |
| "loss": 1.0623, |
| "loss_ce": 1.0324264764785767, |
| "loss_region": 0.06000140681862831, |
| "loss_total": 1.0924278497695923, |
| "lr": 0.0009358922520557453, |
| "router/selected_tokens_s0": 7404.25, |
| "router/selected_tokens_s1": 4272.75, |
| "step": 7600, |
| "tokens_trained": 24.896464928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1582866463371393, |
| "grad_norm": 0.06346011161804199, |
| "loss": 1.0697, |
| "loss_ce": 1.1126902103424072, |
| "loss_region": 0.05999580770730972, |
| "loss_total": 1.172685980796814, |
| "lr": 0.0009354853600718943, |
| "router/selected_tokens_s0": 7429.625, |
| "router/selected_tokens_s1": 4238.875, |
| "step": 7610, |
| "tokens_trained": 24.929227968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.161123324586909, |
| "grad_norm": 0.07517693191766739, |
| "loss": 1.0654, |
| "loss_ce": 1.0678085088729858, |
| "loss_region": 0.060007259249687195, |
| "loss_total": 1.1278157234191895, |
| "lr": 0.0009350784680880434, |
| "router/selected_tokens_s0": 7375.375, |
| "router/selected_tokens_s1": 4282.875, |
| "step": 7620, |
| "tokens_trained": 24.961993408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1639600028366783, |
| "grad_norm": 0.1385023593902588, |
| "loss": 1.0686, |
| "loss_ce": 1.038794994354248, |
| "loss_region": 0.060001399368047714, |
| "loss_total": 1.0987963676452637, |
| "lr": 0.0009346715761041923, |
| "router/selected_tokens_s0": 7449.75, |
| "router/selected_tokens_s1": 4309.625, |
| "step": 7630, |
| "tokens_trained": 24.994758848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.166796681086448, |
| "grad_norm": 0.12499692291021347, |
| "loss": 1.0615, |
| "loss_ce": 1.134606957435608, |
| "loss_region": 0.06000205874443054, |
| "loss_total": 1.1946090459823608, |
| "lr": 0.0009342646841203413, |
| "router/selected_tokens_s0": 7430.75, |
| "router/selected_tokens_s1": 4298.375, |
| "step": 7640, |
| "tokens_trained": 25.027524288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1696333593362174, |
| "grad_norm": 0.16676566004753113, |
| "loss": 1.0724, |
| "loss_ce": 0.8759081959724426, |
| "loss_region": 0.060003675520420074, |
| "loss_total": 0.9359118938446045, |
| "lr": 0.0009338577921364903, |
| "router/selected_tokens_s0": 7386.0, |
| "router/selected_tokens_s1": 4263.125, |
| "step": 7650, |
| "tokens_trained": 25.06028972 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.172470037585987, |
| "grad_norm": 0.1451634168624878, |
| "loss": 1.0642, |
| "loss_ce": 0.8085017204284668, |
| "loss_region": 0.06006048619747162, |
| "loss_total": 0.8685622215270996, |
| "lr": 0.0009334509001526392, |
| "router/selected_tokens_s0": 7334.75, |
| "router/selected_tokens_s1": 4194.0, |
| "step": 7660, |
| "tokens_trained": 25.09305516 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1753067158357564, |
| "grad_norm": 0.07147655636072159, |
| "loss": 1.0642, |
| "loss_ce": 1.1068609952926636, |
| "loss_region": 0.06000683829188347, |
| "loss_total": 1.1668678522109985, |
| "lr": 0.0009330440081687883, |
| "router/selected_tokens_s0": 7382.125, |
| "router/selected_tokens_s1": 4288.125, |
| "step": 7670, |
| "tokens_trained": 25.125817728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.178143394085526, |
| "grad_norm": 0.07307510077953339, |
| "loss": 1.068, |
| "loss_ce": 1.133186936378479, |
| "loss_region": 0.06001383066177368, |
| "loss_total": 1.1932008266448975, |
| "lr": 0.0009326371161849373, |
| "router/selected_tokens_s0": 7364.75, |
| "router/selected_tokens_s1": 4293.875, |
| "step": 7680, |
| "tokens_trained": 25.158583168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1809800723352954, |
| "grad_norm": 0.09967056661844254, |
| "loss": 1.0636, |
| "loss_ce": 1.1033625602722168, |
| "loss_region": 0.059995364397764206, |
| "loss_total": 1.1633579730987549, |
| "lr": 0.0009322302242010862, |
| "router/selected_tokens_s0": 7423.375, |
| "router/selected_tokens_s1": 4245.5, |
| "step": 7690, |
| "tokens_trained": 25.191341672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.183816750585065, |
| "grad_norm": 0.10262995958328247, |
| "loss": 1.0618, |
| "loss_ce": 0.9780526161193848, |
| "loss_region": 0.06000777333974838, |
| "loss_total": 1.0380604267120361, |
| "lr": 0.0009318233322172352, |
| "router/selected_tokens_s0": 7457.625, |
| "router/selected_tokens_s1": 4322.5, |
| "step": 7700, |
| "tokens_trained": 25.224107112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1866534288348345, |
| "grad_norm": 0.08990202844142914, |
| "loss": 1.0692, |
| "loss_ce": 1.038571834564209, |
| "loss_region": 0.06000371649861336, |
| "loss_total": 1.0985755920410156, |
| "lr": 0.0009314164402333842, |
| "router/selected_tokens_s0": 7422.0, |
| "router/selected_tokens_s1": 4297.0, |
| "step": 7710, |
| "tokens_trained": 25.256872552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.189490107084604, |
| "grad_norm": 0.07706000655889511, |
| "loss": 1.0667, |
| "loss_ce": 1.0372638702392578, |
| "loss_region": 0.05999591574072838, |
| "loss_total": 1.097259759902954, |
| "lr": 0.000931009548249533, |
| "router/selected_tokens_s0": 7437.625, |
| "router/selected_tokens_s1": 4269.25, |
| "step": 7720, |
| "tokens_trained": 25.289637992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1923267853343735, |
| "grad_norm": 0.08618722856044769, |
| "loss": 1.0628, |
| "loss_ce": 1.068077564239502, |
| "loss_region": 0.060001399368047714, |
| "loss_total": 1.1280789375305176, |
| "lr": 0.000930602656265682, |
| "router/selected_tokens_s0": 7391.75, |
| "router/selected_tokens_s1": 4263.75, |
| "step": 7730, |
| "tokens_trained": 25.322402632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.195163463584143, |
| "grad_norm": 0.14893005788326263, |
| "loss": 1.0646, |
| "loss_ce": 0.9990720152854919, |
| "loss_region": 0.060002293437719345, |
| "loss_total": 1.0590742826461792, |
| "lr": 0.000930195764281831, |
| "router/selected_tokens_s0": 7351.75, |
| "router/selected_tokens_s1": 4226.0, |
| "step": 7740, |
| "tokens_trained": 25.355165584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.1980001418339126, |
| "grad_norm": 0.10031340271234512, |
| "loss": 1.0704, |
| "loss_ce": 0.943006694316864, |
| "loss_region": 0.060008347034454346, |
| "loss_total": 1.0030150413513184, |
| "lr": 0.00092978887229798, |
| "router/selected_tokens_s0": 7386.125, |
| "router/selected_tokens_s1": 4292.125, |
| "step": 7750, |
| "tokens_trained": 25.387931024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.200836820083682, |
| "grad_norm": 0.06115954741835594, |
| "loss": 1.0593, |
| "loss_ce": 0.9749414324760437, |
| "loss_region": 0.05999942868947983, |
| "loss_total": 1.0349408388137817, |
| "lr": 0.000929381980314129, |
| "router/selected_tokens_s0": 7455.25, |
| "router/selected_tokens_s1": 4303.625, |
| "step": 7760, |
| "tokens_trained": 25.420695664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2036734983334516, |
| "grad_norm": 0.07545716315507889, |
| "loss": 1.0643, |
| "loss_ce": 0.9750471115112305, |
| "loss_region": 0.06000591441988945, |
| "loss_total": 1.035053014755249, |
| "lr": 0.000928975088330278, |
| "router/selected_tokens_s0": 7427.5, |
| "router/selected_tokens_s1": 4313.75, |
| "step": 7770, |
| "tokens_trained": 25.453461104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.206510176583221, |
| "grad_norm": 0.09267102926969528, |
| "loss": 1.0633, |
| "loss_ce": 1.0577032566070557, |
| "loss_region": 0.060004863888025284, |
| "loss_total": 1.1177080869674683, |
| "lr": 0.0009285681963464269, |
| "router/selected_tokens_s0": 7395.5, |
| "router/selected_tokens_s1": 4290.375, |
| "step": 7780, |
| "tokens_trained": 25.486226544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2093468548329906, |
| "grad_norm": 0.09359267354011536, |
| "loss": 1.0679, |
| "loss_ce": 0.8935257792472839, |
| "loss_region": 0.06000054255127907, |
| "loss_total": 0.9535263180732727, |
| "lr": 0.0009281613043625759, |
| "router/selected_tokens_s0": 7415.875, |
| "router/selected_tokens_s1": 4261.25, |
| "step": 7790, |
| "tokens_trained": 25.518991984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.21218353308276, |
| "grad_norm": 0.14112727344036102, |
| "loss": 1.0657, |
| "loss_ce": 0.8202201128005981, |
| "loss_region": 0.059999506920576096, |
| "loss_total": 0.8802196383476257, |
| "lr": 0.000927754412378725, |
| "router/selected_tokens_s0": 7408.75, |
| "router/selected_tokens_s1": 4247.25, |
| "step": 7800, |
| "tokens_trained": 25.551757424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2150202113325297, |
| "grad_norm": 0.09152037650346756, |
| "loss": 1.0671, |
| "loss_ce": 1.0103726387023926, |
| "loss_region": 0.05999613553285599, |
| "loss_total": 1.070368766784668, |
| "lr": 0.0009273475203948739, |
| "router/selected_tokens_s0": 7446.0, |
| "router/selected_tokens_s1": 4284.875, |
| "step": 7810, |
| "tokens_trained": 25.584522064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.217856889582299, |
| "grad_norm": 0.07887820154428482, |
| "loss": 1.0709, |
| "loss_ce": 1.094946265220642, |
| "loss_region": 0.0599982887506485, |
| "loss_total": 1.1549445390701294, |
| "lr": 0.0009269406284110229, |
| "router/selected_tokens_s0": 7361.75, |
| "router/selected_tokens_s1": 4215.75, |
| "step": 7820, |
| "tokens_trained": 25.617287344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2206935678320687, |
| "grad_norm": 0.07482978701591492, |
| "loss": 1.0653, |
| "loss_ce": 1.0106014013290405, |
| "loss_region": 0.059999577701091766, |
| "loss_total": 1.070600986480713, |
| "lr": 0.0009265337364271718, |
| "router/selected_tokens_s0": 7424.25, |
| "router/selected_tokens_s1": 4287.25, |
| "step": 7830, |
| "tokens_trained": 25.650052784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2235302460818382, |
| "grad_norm": 0.060630913823843, |
| "loss": 1.0643, |
| "loss_ce": 0.8532955646514893, |
| "loss_region": 0.060004934668540955, |
| "loss_total": 0.9133005142211914, |
| "lr": 0.0009261268444433207, |
| "router/selected_tokens_s0": 7419.125, |
| "router/selected_tokens_s1": 4301.125, |
| "step": 7840, |
| "tokens_trained": 25.682818224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2263669243316078, |
| "grad_norm": 0.1081499457359314, |
| "loss": 1.0658, |
| "loss_ce": 1.054540991783142, |
| "loss_region": 0.06000063568353653, |
| "loss_total": 1.1145416498184204, |
| "lr": 0.0009257199524594697, |
| "router/selected_tokens_s0": 7393.875, |
| "router/selected_tokens_s1": 4270.0, |
| "step": 7850, |
| "tokens_trained": 25.715583664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2292036025813773, |
| "grad_norm": 0.11165347695350647, |
| "loss": 1.0675, |
| "loss_ce": 0.9684391617774963, |
| "loss_region": 0.0600094199180603, |
| "loss_total": 1.0284485816955566, |
| "lr": 0.0009253130604756186, |
| "router/selected_tokens_s0": 7381.125, |
| "router/selected_tokens_s1": 4288.25, |
| "step": 7860, |
| "tokens_trained": 25.748349104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.232040280831147, |
| "grad_norm": 0.11471514403820038, |
| "loss": 1.0618, |
| "loss_ce": 0.9423663020133972, |
| "loss_region": 0.06000564247369766, |
| "loss_total": 1.002371907234192, |
| "lr": 0.0009249061684917677, |
| "router/selected_tokens_s0": 7354.375, |
| "router/selected_tokens_s1": 4260.625, |
| "step": 7870, |
| "tokens_trained": 25.781114544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2348769590809163, |
| "grad_norm": 0.08616702258586884, |
| "loss": 1.0568, |
| "loss_ce": 1.0355966091156006, |
| "loss_region": 0.06000853329896927, |
| "loss_total": 1.0956051349639893, |
| "lr": 0.0009244992765079167, |
| "router/selected_tokens_s0": 7475.5, |
| "router/selected_tokens_s1": 4341.5, |
| "step": 7880, |
| "tokens_trained": 25.813879984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.237713637330686, |
| "grad_norm": 0.09955234080553055, |
| "loss": 1.0637, |
| "loss_ce": 0.9721653461456299, |
| "loss_region": 0.06001753732562065, |
| "loss_total": 1.0321829319000244, |
| "lr": 0.0009240923845240656, |
| "router/selected_tokens_s0": 7451.625, |
| "router/selected_tokens_s1": 4340.875, |
| "step": 7890, |
| "tokens_trained": 25.846645424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2405503155804554, |
| "grad_norm": 0.07359630614519119, |
| "loss": 1.0617, |
| "loss_ce": 0.9473441243171692, |
| "loss_region": 0.0600101463496685, |
| "loss_total": 1.0073542594909668, |
| "lr": 0.0009236854925402146, |
| "router/selected_tokens_s0": 7386.25, |
| "router/selected_tokens_s1": 4292.875, |
| "step": 7900, |
| "tokens_trained": 25.879410864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.243386993830225, |
| "grad_norm": 0.09445139020681381, |
| "loss": 1.064, |
| "loss_ce": 1.011965036392212, |
| "loss_region": 0.06000087782740593, |
| "loss_total": 1.0719659328460693, |
| "lr": 0.0009232786005563636, |
| "router/selected_tokens_s0": 7392.125, |
| "router/selected_tokens_s1": 4271.25, |
| "step": 7910, |
| "tokens_trained": 25.912176304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2462236720799944, |
| "grad_norm": 0.05465928837656975, |
| "loss": 1.0572, |
| "loss_ce": 1.0671688318252563, |
| "loss_region": 0.05999647080898285, |
| "loss_total": 1.1271653175354004, |
| "lr": 0.0009228717085725126, |
| "router/selected_tokens_s0": 7432.375, |
| "router/selected_tokens_s1": 4276.375, |
| "step": 7920, |
| "tokens_trained": 25.944941744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.249060350329764, |
| "grad_norm": 0.060297515243291855, |
| "loss": 1.0578, |
| "loss_ce": 0.996216893196106, |
| "loss_region": 0.0600031316280365, |
| "loss_total": 1.0562200546264648, |
| "lr": 0.0009224648165886616, |
| "router/selected_tokens_s0": 7392.5, |
| "router/selected_tokens_s1": 4274.375, |
| "step": 7930, |
| "tokens_trained": 25.977707184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2518970285795334, |
| "grad_norm": 0.11160633713006973, |
| "loss": 1.0625, |
| "loss_ce": 1.0593509674072266, |
| "loss_region": 0.059995170682668686, |
| "loss_total": 1.1193461418151855, |
| "lr": 0.0009220579246048106, |
| "router/selected_tokens_s0": 7382.75, |
| "router/selected_tokens_s1": 4234.5, |
| "step": 7940, |
| "tokens_trained": 26.010471824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.254733706829303, |
| "grad_norm": 0.08213996887207031, |
| "loss": 1.0636, |
| "loss_ce": 0.9958966374397278, |
| "loss_region": 0.06001153588294983, |
| "loss_total": 1.055908203125, |
| "lr": 0.0009216510326209595, |
| "router/selected_tokens_s0": 7389.25, |
| "router/selected_tokens_s1": 4300.75, |
| "step": 7950, |
| "tokens_trained": 26.043237264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2575703850790725, |
| "grad_norm": 0.12535226345062256, |
| "loss": 1.0585, |
| "loss_ce": 1.0670008659362793, |
| "loss_region": 0.060003187507390976, |
| "loss_total": 1.1270040273666382, |
| "lr": 0.0009212441406371085, |
| "router/selected_tokens_s0": 7471.375, |
| "router/selected_tokens_s1": 4314.5, |
| "step": 7960, |
| "tokens_trained": 26.076002704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.260407063328842, |
| "grad_norm": 0.09521625936031342, |
| "loss": 1.0663, |
| "loss_ce": 0.9568244814872742, |
| "loss_region": 0.059986263513565063, |
| "loss_total": 1.0168107748031616, |
| "lr": 0.0009208372486532574, |
| "router/selected_tokens_s0": 7424.375, |
| "router/selected_tokens_s1": 4269.875, |
| "step": 7970, |
| "tokens_trained": 26.108768144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2632437415786115, |
| "grad_norm": 0.1134655624628067, |
| "loss": 1.063, |
| "loss_ce": 1.038812279701233, |
| "loss_region": 0.059999361634254456, |
| "loss_total": 1.0988116264343262, |
| "lr": 0.0009204303566694063, |
| "router/selected_tokens_s0": 7409.125, |
| "router/selected_tokens_s1": 4272.125, |
| "step": 7980, |
| "tokens_trained": 26.141533584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.266080419828381, |
| "grad_norm": 0.09075932949781418, |
| "loss": 1.0647, |
| "loss_ce": 0.9599190354347229, |
| "loss_region": 0.06002267077565193, |
| "loss_total": 1.0199416875839233, |
| "lr": 0.0009200234646855554, |
| "router/selected_tokens_s0": 7373.5, |
| "router/selected_tokens_s1": 4319.625, |
| "step": 7990, |
| "tokens_trained": 26.174299024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2689170980781506, |
| "grad_norm": 0.0673384889960289, |
| "loss": 1.054, |
| "loss_ce": 0.9738999009132385, |
| "loss_region": 0.059999220073223114, |
| "loss_total": 1.0338990688323975, |
| "lr": 0.0009196165727017044, |
| "router/selected_tokens_s0": 7463.375, |
| "router/selected_tokens_s1": 4269.375, |
| "step": 8000, |
| "tokens_trained": 26.207064464 |
| }, |
| { |
| "epoch": 2.2689170980781506, |
| "eval_ppl": 2.754127303226834, |
| "eval_runtime": 1.0212, |
| "step": 8000, |
| "tokens_trained": 26.207064464 |
| }, |
| { |
| "epoch": 2.2689170980781506, |
| "eval_F": 0.3358680365653567, |
| "eval_F_cds": 0.32883460886302185, |
| "eval_F_dig": 0.3228377216970076, |
| "eval_F_exon": 0.3363945150333375, |
| "eval_F_intron": 0.3369414292519282, |
| "eval_F_nig": 0.3380240930137892, |
| "eval_F_promoter": 0.3305992572271056, |
| "eval_F_utr": 0.3348192429591074, |
| "eval_G": 0.3438682406930392, |
| "eval_G_cds": 0.34103056366287066, |
| "eval_G_dig": 0.3056676485955934, |
| "eval_G_exon": 0.3462706262842286, |
| "eval_G_intron": 0.3448363649250059, |
| "eval_G_nig": 0.34470574228489287, |
| "eval_G_promoter": 0.3411262685489101, |
| "eval_G_utr": 0.3443868327900745, |
| "eval_avg_bp_per_token": 2.977359829253683, |
| "eval_bp_per_token/cds": 3.041042436067173, |
| "eval_bp_per_token/dig": 3.0975314617618586, |
| "eval_bp_per_token/exon": 2.9727000747943157, |
| "eval_bp_per_token/intron": 2.9678748684012635, |
| "eval_bp_per_token/nig": 2.958369005842451, |
| "eval_bp_per_token/promoter": 3.0248101837477774, |
| "eval_bp_per_token/utr": 2.9866861628443897, |
| "eval_ppl_cds": 3.318432024253968, |
| "eval_ppl_dig": 1.1105163039128156, |
| "eval_ppl_exon": 3.241143234233957, |
| "eval_ppl_intron": 2.8734801903701843, |
| "eval_ppl_nig": 2.573137410563195, |
| "eval_ppl_promoter": 2.988459127366735, |
| "eval_ppl_utr": 3.029647440548638, |
| "step": 8000, |
| "tokens_trained": 26.207064464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.27175377632792, |
| "grad_norm": 0.12231134623289108, |
| "loss": 1.0562, |
| "loss_ce": 0.9975427985191345, |
| "loss_region": 0.05999762564897537, |
| "loss_total": 1.0575404167175293, |
| "lr": 0.0009192096807178533, |
| "router/selected_tokens_s0": 7380.125, |
| "router/selected_tokens_s1": 4252.25, |
| "step": 8010, |
| "tokens_trained": 26.239829904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2745904545776896, |
| "grad_norm": 0.08055052161216736, |
| "loss": 1.0577, |
| "loss_ce": 1.1121338605880737, |
| "loss_region": 0.060004789382219315, |
| "loss_total": 1.1721386909484863, |
| "lr": 0.0009188027887340023, |
| "router/selected_tokens_s0": 7379.375, |
| "router/selected_tokens_s1": 4277.375, |
| "step": 8020, |
| "tokens_trained": 26.272595344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.277427132827459, |
| "grad_norm": 0.09591927379369736, |
| "loss": 1.0528, |
| "loss_ce": 0.9205632209777832, |
| "loss_region": 0.0599990114569664, |
| "loss_total": 0.9805622100830078, |
| "lr": 0.0009183958967501513, |
| "router/selected_tokens_s0": 7414.0, |
| "router/selected_tokens_s1": 4271.375, |
| "step": 8030, |
| "tokens_trained": 26.305360784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2802638110772286, |
| "grad_norm": 0.11075546592473984, |
| "loss": 1.061, |
| "loss_ce": 1.0516383647918701, |
| "loss_region": 0.06000576540827751, |
| "loss_total": 1.1116441488265991, |
| "lr": 0.0009179890047663002, |
| "router/selected_tokens_s0": 7414.5, |
| "router/selected_tokens_s1": 4294.0, |
| "step": 8040, |
| "tokens_trained": 26.338126208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.283100489326998, |
| "grad_norm": 0.09984137862920761, |
| "loss": 1.0645, |
| "loss_ce": 0.9013442993164062, |
| "loss_region": 0.06000048667192459, |
| "loss_total": 0.9613447785377502, |
| "lr": 0.0009175821127824493, |
| "router/selected_tokens_s0": 7456.875, |
| "router/selected_tokens_s1": 4280.5, |
| "step": 8050, |
| "tokens_trained": 26.370891648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2859371675767677, |
| "grad_norm": 0.07570505142211914, |
| "loss": 1.062, |
| "loss_ce": 1.0161606073379517, |
| "loss_region": 0.05999864265322685, |
| "loss_total": 1.0761592388153076, |
| "lr": 0.0009171752207985983, |
| "router/selected_tokens_s0": 7415.0, |
| "router/selected_tokens_s1": 4264.75, |
| "step": 8060, |
| "tokens_trained": 26.403656288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.288773845826537, |
| "grad_norm": 0.06136245280504227, |
| "loss": 1.0573, |
| "loss_ce": 0.9474148750305176, |
| "loss_region": 0.05999542027711868, |
| "loss_total": 1.0074102878570557, |
| "lr": 0.0009167683288147472, |
| "router/selected_tokens_s0": 7419.625, |
| "router/selected_tokens_s1": 4234.0, |
| "step": 8070, |
| "tokens_trained": 26.436421728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2916105240763067, |
| "grad_norm": 0.12230025231838226, |
| "loss": 1.059, |
| "loss_ce": 1.0230987071990967, |
| "loss_region": 0.059994686394929886, |
| "loss_total": 1.0830934047698975, |
| "lr": 0.0009163614368308962, |
| "router/selected_tokens_s0": 7427.0, |
| "router/selected_tokens_s1": 4254.375, |
| "step": 8080, |
| "tokens_trained": 26.469187168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2944472023260762, |
| "grad_norm": 0.10080068558454514, |
| "loss": 1.0537, |
| "loss_ce": 0.9321814775466919, |
| "loss_region": 0.0599973164498806, |
| "loss_total": 0.9921787977218628, |
| "lr": 0.000915954544847045, |
| "router/selected_tokens_s0": 7403.25, |
| "router/selected_tokens_s1": 4247.125, |
| "step": 8090, |
| "tokens_trained": 26.501951008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.2972838805758458, |
| "grad_norm": 0.073105588555336, |
| "loss": 1.0672, |
| "loss_ce": 1.0272918939590454, |
| "loss_region": 0.05999736115336418, |
| "loss_total": 1.0872892141342163, |
| "lr": 0.000915547652863194, |
| "router/selected_tokens_s0": 7454.5, |
| "router/selected_tokens_s1": 4261.5, |
| "step": 8100, |
| "tokens_trained": 26.534716448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3001205588256153, |
| "grad_norm": 0.06740695238113403, |
| "loss": 1.0552, |
| "loss_ce": 1.053105354309082, |
| "loss_region": 0.06000041216611862, |
| "loss_total": 1.1131057739257812, |
| "lr": 0.000915140760879343, |
| "router/selected_tokens_s0": 7447.25, |
| "router/selected_tokens_s1": 4282.625, |
| "step": 8110, |
| "tokens_trained": 26.5674814 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.302957237075385, |
| "grad_norm": 0.09442190825939178, |
| "loss": 1.0656, |
| "loss_ce": 1.0914051532745361, |
| "loss_region": 0.059995636343955994, |
| "loss_total": 1.1514008045196533, |
| "lr": 0.000914733868895492, |
| "router/selected_tokens_s0": 7415.0, |
| "router/selected_tokens_s1": 4259.375, |
| "step": 8120, |
| "tokens_trained": 26.60024684 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3057939153251543, |
| "grad_norm": 0.09779255837202072, |
| "loss": 1.0564, |
| "loss_ce": 1.083017349243164, |
| "loss_region": 0.059998348355293274, |
| "loss_total": 1.143015742301941, |
| "lr": 0.000914326976911641, |
| "router/selected_tokens_s0": 7457.125, |
| "router/selected_tokens_s1": 4291.125, |
| "step": 8130, |
| "tokens_trained": 26.633011272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.308630593574924, |
| "grad_norm": 0.1056966781616211, |
| "loss": 1.0573, |
| "loss_ce": 0.931369423866272, |
| "loss_region": 0.05998814478516579, |
| "loss_total": 0.9913575649261475, |
| "lr": 0.00091392008492779, |
| "router/selected_tokens_s0": 7414.625, |
| "router/selected_tokens_s1": 4299.375, |
| "step": 8140, |
| "tokens_trained": 26.665774 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3114672718246934, |
| "grad_norm": 0.09716489166021347, |
| "loss": 1.0589, |
| "loss_ce": 1.0023393630981445, |
| "loss_region": 0.06001187860965729, |
| "loss_total": 1.0623512268066406, |
| "lr": 0.000913513192943939, |
| "router/selected_tokens_s0": 7344.875, |
| "router/selected_tokens_s1": 4269.5, |
| "step": 8150, |
| "tokens_trained": 26.69853944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.314303950074463, |
| "grad_norm": 0.11967064440250397, |
| "loss": 1.0593, |
| "loss_ce": 1.0250205993652344, |
| "loss_region": 0.06000397726893425, |
| "loss_total": 1.0850245952606201, |
| "lr": 0.0009131063009600879, |
| "router/selected_tokens_s0": 7420.125, |
| "router/selected_tokens_s1": 4292.375, |
| "step": 8160, |
| "tokens_trained": 26.73130488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3171406283242324, |
| "grad_norm": 0.07524566352367401, |
| "loss": 1.0629, |
| "loss_ce": 0.9791299104690552, |
| "loss_region": 0.06000060588121414, |
| "loss_total": 1.0391305685043335, |
| "lr": 0.000912699408976237, |
| "router/selected_tokens_s0": 7456.625, |
| "router/selected_tokens_s1": 4279.25, |
| "step": 8170, |
| "tokens_trained": 26.76407032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.319977306574002, |
| "grad_norm": 0.09677954018115997, |
| "loss": 1.0572, |
| "loss_ce": 1.0635145902633667, |
| "loss_region": 0.06001080572605133, |
| "loss_total": 1.1235253810882568, |
| "lr": 0.000912292516992386, |
| "router/selected_tokens_s0": 7425.125, |
| "router/selected_tokens_s1": 4309.75, |
| "step": 8180, |
| "tokens_trained": 26.79683576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3228139848237714, |
| "grad_norm": 0.08558879047632217, |
| "loss": 1.0633, |
| "loss_ce": 1.102909803390503, |
| "loss_region": 0.060004692524671555, |
| "loss_total": 1.162914514541626, |
| "lr": 0.0009118856250085349, |
| "router/selected_tokens_s0": 7407.25, |
| "router/selected_tokens_s1": 4298.25, |
| "step": 8190, |
| "tokens_trained": 26.8296012 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.325650663073541, |
| "grad_norm": 0.1450275331735611, |
| "loss": 1.0604, |
| "loss_ce": 1.0180364847183228, |
| "loss_region": 0.0600145123898983, |
| "loss_total": 1.078050971031189, |
| "lr": 0.0009114787330246839, |
| "router/selected_tokens_s0": 7394.875, |
| "router/selected_tokens_s1": 4304.875, |
| "step": 8200, |
| "tokens_trained": 26.86236568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3284873413233105, |
| "grad_norm": 0.0971994549036026, |
| "loss": 1.0549, |
| "loss_ce": 0.9048692584037781, |
| "loss_region": 0.060006506741046906, |
| "loss_total": 0.9648757576942444, |
| "lr": 0.0009110718410408328, |
| "router/selected_tokens_s0": 7393.75, |
| "router/selected_tokens_s1": 4288.0, |
| "step": 8210, |
| "tokens_trained": 26.89513112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.33132401957308, |
| "grad_norm": 0.09337711334228516, |
| "loss": 1.0525, |
| "loss_ce": 0.9577232599258423, |
| "loss_region": 0.06000382453203201, |
| "loss_total": 1.0177271366119385, |
| "lr": 0.0009106649490569817, |
| "router/selected_tokens_s0": 7372.875, |
| "router/selected_tokens_s1": 4256.25, |
| "step": 8220, |
| "tokens_trained": 26.92789576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3341606978228495, |
| "grad_norm": 0.07152654230594635, |
| "loss": 1.0571, |
| "loss_ce": 0.9388102889060974, |
| "loss_region": 0.060004305094480515, |
| "loss_total": 0.998814582824707, |
| "lr": 0.0009102580570731307, |
| "router/selected_tokens_s0": 7412.875, |
| "router/selected_tokens_s1": 4286.625, |
| "step": 8230, |
| "tokens_trained": 26.9606612 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.336997376072619, |
| "grad_norm": 0.1019396185874939, |
| "loss": 1.057, |
| "loss_ce": 1.048132061958313, |
| "loss_region": 0.059996627271175385, |
| "loss_total": 1.1081286668777466, |
| "lr": 0.0009098511650892797, |
| "router/selected_tokens_s0": 7413.375, |
| "router/selected_tokens_s1": 4258.625, |
| "step": 8240, |
| "tokens_trained": 26.99342664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3398340543223886, |
| "grad_norm": 0.0851474180817604, |
| "loss": 1.0608, |
| "loss_ce": 0.8752225041389465, |
| "loss_region": 0.06000366061925888, |
| "loss_total": 0.9352261424064636, |
| "lr": 0.0009094442731054287, |
| "router/selected_tokens_s0": 7397.625, |
| "router/selected_tokens_s1": 4267.125, |
| "step": 8250, |
| "tokens_trained": 27.02619208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.342670732572158, |
| "grad_norm": 0.1310846209526062, |
| "loss": 1.0627, |
| "loss_ce": 0.8542342185974121, |
| "loss_region": 0.06000632792711258, |
| "loss_total": 0.9142405390739441, |
| "lr": 0.0009090373811215777, |
| "router/selected_tokens_s0": 7411.625, |
| "router/selected_tokens_s1": 4283.0, |
| "step": 8260, |
| "tokens_trained": 27.05895752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3455074108219276, |
| "grad_norm": 0.12587329745292664, |
| "loss": 1.0562, |
| "loss_ce": 0.9656789302825928, |
| "loss_region": 0.060002002865076065, |
| "loss_total": 1.0256808996200562, |
| "lr": 0.0009086304891377266, |
| "router/selected_tokens_s0": 7434.625, |
| "router/selected_tokens_s1": 4295.125, |
| "step": 8270, |
| "tokens_trained": 27.09172296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.348344089071697, |
| "grad_norm": 0.07155922055244446, |
| "loss": 1.0604, |
| "loss_ce": 0.9499186873435974, |
| "loss_region": 0.05999379605054855, |
| "loss_total": 1.0099124908447266, |
| "lr": 0.0009082235971538756, |
| "router/selected_tokens_s0": 7428.0, |
| "router/selected_tokens_s1": 4259.125, |
| "step": 8280, |
| "tokens_trained": 27.1244884 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3511807673214666, |
| "grad_norm": 0.09406653046607971, |
| "loss": 1.0513, |
| "loss_ce": 0.9733015894889832, |
| "loss_region": 0.06000078469514847, |
| "loss_total": 1.0333024263381958, |
| "lr": 0.0009078167051700246, |
| "router/selected_tokens_s0": 7426.125, |
| "router/selected_tokens_s1": 4277.625, |
| "step": 8290, |
| "tokens_trained": 27.15725384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.354017445571236, |
| "grad_norm": 0.08878457546234131, |
| "loss": 1.0544, |
| "loss_ce": 1.0448791980743408, |
| "loss_region": 0.0600077249109745, |
| "loss_total": 1.1048868894577026, |
| "lr": 0.0009074098131861736, |
| "router/selected_tokens_s0": 7380.625, |
| "router/selected_tokens_s1": 4254.75, |
| "step": 8300, |
| "tokens_trained": 27.19001928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3568541238210057, |
| "grad_norm": 0.08128737658262253, |
| "loss": 1.0589, |
| "loss_ce": 1.0410749912261963, |
| "loss_region": 0.05999433994293213, |
| "loss_total": 1.1010693311691284, |
| "lr": 0.0009070029212023226, |
| "router/selected_tokens_s0": 7437.875, |
| "router/selected_tokens_s1": 4243.375, |
| "step": 8310, |
| "tokens_trained": 27.22278472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.359690802070775, |
| "grad_norm": 0.14990991353988647, |
| "loss": 1.0583, |
| "loss_ce": 1.0307172536849976, |
| "loss_region": 0.06000310927629471, |
| "loss_total": 1.0907204151153564, |
| "lr": 0.0009065960292184716, |
| "router/selected_tokens_s0": 7408.875, |
| "router/selected_tokens_s1": 4284.125, |
| "step": 8320, |
| "tokens_trained": 27.25555016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3625274803205447, |
| "grad_norm": 0.08982635289430618, |
| "loss": 1.054, |
| "loss_ce": 1.0846338272094727, |
| "loss_region": 0.06000533327460289, |
| "loss_total": 1.1446391344070435, |
| "lr": 0.0009061891372346205, |
| "router/selected_tokens_s0": 7436.625, |
| "router/selected_tokens_s1": 4314.375, |
| "step": 8330, |
| "tokens_trained": 27.2883156 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3653641585703142, |
| "grad_norm": 0.11252354085445404, |
| "loss": 1.0536, |
| "loss_ce": 0.9864820837974548, |
| "loss_region": 0.06000399589538574, |
| "loss_total": 1.0464861392974854, |
| "lr": 0.0009057822452507694, |
| "router/selected_tokens_s0": 7402.125, |
| "router/selected_tokens_s1": 4284.25, |
| "step": 8340, |
| "tokens_trained": 27.32108104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3682008368200838, |
| "grad_norm": 0.09558703005313873, |
| "loss": 1.0559, |
| "loss_ce": 1.0942038297653198, |
| "loss_region": 0.0599936842918396, |
| "loss_total": 1.1541974544525146, |
| "lr": 0.0009053753532669184, |
| "router/selected_tokens_s0": 7449.5, |
| "router/selected_tokens_s1": 4267.625, |
| "step": 8350, |
| "tokens_trained": 27.35384648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3710375150698533, |
| "grad_norm": 0.08678382635116577, |
| "loss": 1.0571, |
| "loss_ce": 0.9340338110923767, |
| "loss_region": 0.06000067666172981, |
| "loss_total": 0.994034469127655, |
| "lr": 0.0009049684612830673, |
| "router/selected_tokens_s0": 7452.375, |
| "router/selected_tokens_s1": 4290.875, |
| "step": 8360, |
| "tokens_trained": 27.386609784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.373874193319623, |
| "grad_norm": 0.09878090023994446, |
| "loss": 1.0601, |
| "loss_ce": 0.9702523350715637, |
| "loss_region": 0.060000043362379074, |
| "loss_total": 1.0302523374557495, |
| "lr": 0.0009045615692992164, |
| "router/selected_tokens_s0": 7392.0, |
| "router/selected_tokens_s1": 4262.375, |
| "step": 8370, |
| "tokens_trained": 27.419375224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3767108715693923, |
| "grad_norm": 0.08482500910758972, |
| "loss": 1.0653, |
| "loss_ce": 0.9229720234870911, |
| "loss_region": 0.059997960925102234, |
| "loss_total": 0.9829699993133545, |
| "lr": 0.0009041546773153654, |
| "router/selected_tokens_s0": 7337.125, |
| "router/selected_tokens_s1": 4221.25, |
| "step": 8380, |
| "tokens_trained": 27.452140664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.379547549819162, |
| "grad_norm": 0.14045023918151855, |
| "loss": 1.0586, |
| "loss_ce": 1.0012965202331543, |
| "loss_region": 0.06000533327460289, |
| "loss_total": 1.061301827430725, |
| "lr": 0.0009037477853315143, |
| "router/selected_tokens_s0": 7421.125, |
| "router/selected_tokens_s1": 4302.25, |
| "step": 8390, |
| "tokens_trained": 27.484904504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3823842280689314, |
| "grad_norm": 0.07333158701658249, |
| "loss": 1.0632, |
| "loss_ce": 1.0295556783676147, |
| "loss_region": 0.060006577521562576, |
| "loss_total": 1.0895622968673706, |
| "lr": 0.0009033408933476633, |
| "router/selected_tokens_s0": 7440.5, |
| "router/selected_tokens_s1": 4315.375, |
| "step": 8400, |
| "tokens_trained": 27.517669944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.385220906318701, |
| "grad_norm": 0.08430515229701996, |
| "loss": 1.0553, |
| "loss_ce": 1.0218313932418823, |
| "loss_region": 0.05999813973903656, |
| "loss_total": 1.08182954788208, |
| "lr": 0.0009029340013638123, |
| "router/selected_tokens_s0": 7353.875, |
| "router/selected_tokens_s1": 4234.0, |
| "step": 8410, |
| "tokens_trained": 27.550435384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3880575845684704, |
| "grad_norm": 0.1045287624001503, |
| "loss": 1.0597, |
| "loss_ce": 0.8894641399383545, |
| "loss_region": 0.06000156328082085, |
| "loss_total": 0.9494656920433044, |
| "lr": 0.0009025271093799613, |
| "router/selected_tokens_s0": 7369.625, |
| "router/selected_tokens_s1": 4292.0, |
| "step": 8420, |
| "tokens_trained": 27.583198336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.39089426281824, |
| "grad_norm": 0.10882244259119034, |
| "loss": 1.0545, |
| "loss_ce": 0.9567221999168396, |
| "loss_region": 0.05999777466058731, |
| "loss_total": 1.016719937324524, |
| "lr": 0.0009021202173961103, |
| "router/selected_tokens_s0": 7478.125, |
| "router/selected_tokens_s1": 4277.875, |
| "step": 8430, |
| "tokens_trained": 27.615963616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3937309410680094, |
| "grad_norm": 0.08376454561948776, |
| "loss": 1.0607, |
| "loss_ce": 0.9995729923248291, |
| "loss_region": 0.05999734625220299, |
| "loss_total": 1.0595703125, |
| "lr": 0.0009017133254122593, |
| "router/selected_tokens_s0": 7459.125, |
| "router/selected_tokens_s1": 4281.75, |
| "step": 8440, |
| "tokens_trained": 27.648725192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.396567619317779, |
| "grad_norm": 0.10345184803009033, |
| "loss": 1.0598, |
| "loss_ce": 0.9711935520172119, |
| "loss_region": 0.05999723821878433, |
| "loss_total": 1.0311907529830933, |
| "lr": 0.0009013064334284082, |
| "router/selected_tokens_s0": 7377.375, |
| "router/selected_tokens_s1": 4241.875, |
| "step": 8450, |
| "tokens_trained": 27.681490632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.3994042975675485, |
| "grad_norm": 0.08357074856758118, |
| "loss": 1.0489, |
| "loss_ce": 0.9034456610679626, |
| "loss_region": 0.06000221520662308, |
| "loss_total": 0.9634478688240051, |
| "lr": 0.0009008995414445572, |
| "router/selected_tokens_s0": 7427.25, |
| "router/selected_tokens_s1": 4293.0, |
| "step": 8460, |
| "tokens_trained": 27.714256072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.402240975817318, |
| "grad_norm": 0.09288746863603592, |
| "loss": 1.0598, |
| "loss_ce": 1.0702778100967407, |
| "loss_region": 0.05999256670475006, |
| "loss_total": 1.1302703619003296, |
| "lr": 0.000900492649460706, |
| "router/selected_tokens_s0": 7414.25, |
| "router/selected_tokens_s1": 4245.5, |
| "step": 8470, |
| "tokens_trained": 27.747021512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4050776540670875, |
| "grad_norm": 0.11204234510660172, |
| "loss": 1.0492, |
| "loss_ce": 1.0531384944915771, |
| "loss_region": 0.05999329686164856, |
| "loss_total": 1.1131317615509033, |
| "lr": 0.000900085757476855, |
| "router/selected_tokens_s0": 7426.375, |
| "router/selected_tokens_s1": 4250.5, |
| "step": 8480, |
| "tokens_trained": 27.779786952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.407914332316857, |
| "grad_norm": 0.07378879934549332, |
| "loss": 1.0592, |
| "loss_ce": 1.143384575843811, |
| "loss_region": 0.06000464782118797, |
| "loss_total": 1.2033891677856445, |
| "lr": 0.0008996788654930041, |
| "router/selected_tokens_s0": 7419.0, |
| "router/selected_tokens_s1": 4290.125, |
| "step": 8490, |
| "tokens_trained": 27.812552392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4107510105666266, |
| "grad_norm": 0.07602507621049881, |
| "loss": 1.054, |
| "loss_ce": 0.9728952050209045, |
| "loss_region": 0.05999796837568283, |
| "loss_total": 1.032893180847168, |
| "lr": 0.0008992719735091531, |
| "router/selected_tokens_s0": 7366.375, |
| "router/selected_tokens_s1": 4244.625, |
| "step": 8500, |
| "tokens_trained": 27.845317832 |
| }, |
| { |
| "epoch": 2.4107510105666266, |
| "eval_ppl": 2.7353563129661476, |
| "eval_runtime": 1.0242, |
| "step": 8500, |
| "tokens_trained": 27.845317832 |
| }, |
| { |
| "epoch": 2.4107510105666266, |
| "eval_F": 0.3339355939266089, |
| "eval_F_cds": 0.32580999954172585, |
| "eval_F_dig": 0.31799359324947263, |
| "eval_F_exon": 0.3340042772675808, |
| "eval_F_intron": 0.33525900250336993, |
| "eval_F_nig": 0.33602694245153414, |
| "eval_F_promoter": 0.32815445197170745, |
| "eval_F_utr": 0.33298977041407923, |
| "eval_G": 0.34417926963084683, |
| "eval_G_cds": 0.34153050315636313, |
| "eval_G_dig": 0.30210047537893586, |
| "eval_G_exon": 0.34643828036545476, |
| "eval_G_intron": 0.34527678432674214, |
| "eval_G_nig": 0.34516020310790657, |
| "eval_G_promoter": 0.34088548592254075, |
| "eval_G_utr": 0.3447772665489964, |
| "eval_avg_bp_per_token": 2.9945894303791296, |
| "eval_bp_per_token/cds": 3.069273507278993, |
| "eval_bp_per_token/dig": 3.1447174447174446, |
| "eval_bp_per_token/exon": 2.9939736346516006, |
| "eval_bp_per_token/intron": 2.9827685238368753, |
| "eval_bp_per_token/nig": 2.9759518469095143, |
| "eval_bp_per_token/promoter": 3.0473455227912534, |
| "eval_bp_per_token/utr": 3.003095256519384, |
| "eval_ppl_cds": 3.2651534127504553, |
| "eval_ppl_dig": 1.1102873279172925, |
| "eval_ppl_exon": 3.2467158647548793, |
| "eval_ppl_intron": 2.863752192690005, |
| "eval_ppl_nig": 2.5461023597898556, |
| "eval_ppl_promoter": 2.9017922324048033, |
| "eval_ppl_utr": 2.9731277746153, |
| "step": 8500, |
| "tokens_trained": 27.845317832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.413587688816396, |
| "grad_norm": 0.0890352874994278, |
| "loss": 1.056, |
| "loss_ce": 0.8717209696769714, |
| "loss_region": 0.059997204691171646, |
| "loss_total": 0.9317181706428528, |
| "lr": 0.000898865081525302, |
| "router/selected_tokens_s0": 7415.0, |
| "router/selected_tokens_s1": 4265.25, |
| "step": 8510, |
| "tokens_trained": 27.878081672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4164243670661656, |
| "grad_norm": 0.10223552584648132, |
| "loss": 1.0517, |
| "loss_ce": 1.022995114326477, |
| "loss_region": 0.05999619513750076, |
| "loss_total": 1.082991361618042, |
| "lr": 0.000898458189541451, |
| "router/selected_tokens_s0": 7418.375, |
| "router/selected_tokens_s1": 4260.375, |
| "step": 8520, |
| "tokens_trained": 27.910847112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.419261045315935, |
| "grad_norm": 0.07929608225822449, |
| "loss": 1.0561, |
| "loss_ce": 0.8792555332183838, |
| "loss_region": 0.059996481984853745, |
| "loss_total": 0.9392520189285278, |
| "lr": 0.0008980512975576, |
| "router/selected_tokens_s0": 7442.875, |
| "router/selected_tokens_s1": 4277.375, |
| "step": 8530, |
| "tokens_trained": 27.943612552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4220977235657046, |
| "grad_norm": 0.08173707127571106, |
| "loss": 1.0565, |
| "loss_ce": 1.0021259784698486, |
| "loss_region": 0.0599919818341732, |
| "loss_total": 1.0621179342269897, |
| "lr": 0.0008976444055737489, |
| "router/selected_tokens_s0": 7401.75, |
| "router/selected_tokens_s1": 4235.75, |
| "step": 8540, |
| "tokens_trained": 27.976377992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.424934401815474, |
| "grad_norm": 0.09709572046995163, |
| "loss": 1.0557, |
| "loss_ce": 0.890242338180542, |
| "loss_region": 0.059995055198669434, |
| "loss_total": 0.9502373933792114, |
| "lr": 0.000897237513589898, |
| "router/selected_tokens_s0": 7385.625, |
| "router/selected_tokens_s1": 4234.375, |
| "step": 8550, |
| "tokens_trained": 28.009141296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4277710800652437, |
| "grad_norm": 0.103042833507061, |
| "loss": 1.0581, |
| "loss_ce": 1.0838439464569092, |
| "loss_region": 0.05999678373336792, |
| "loss_total": 1.1438407897949219, |
| "lr": 0.000896830621606047, |
| "router/selected_tokens_s0": 7459.25, |
| "router/selected_tokens_s1": 4277.625, |
| "step": 8560, |
| "tokens_trained": 28.041906736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.430607758315013, |
| "grad_norm": 0.08488387614488602, |
| "loss": 1.0616, |
| "loss_ce": 1.0874125957489014, |
| "loss_region": 0.059994734823703766, |
| "loss_total": 1.1474072933197021, |
| "lr": 0.0008964237296221959, |
| "router/selected_tokens_s0": 7402.375, |
| "router/selected_tokens_s1": 4249.125, |
| "step": 8570, |
| "tokens_trained": 28.074672176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4334444365647827, |
| "grad_norm": 0.07600562274456024, |
| "loss": 1.0523, |
| "loss_ce": 1.0529037714004517, |
| "loss_region": 0.059999555349349976, |
| "loss_total": 1.112903356552124, |
| "lr": 0.0008960168376383449, |
| "router/selected_tokens_s0": 7401.875, |
| "router/selected_tokens_s1": 4268.875, |
| "step": 8580, |
| "tokens_trained": 28.107436008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4362811148145522, |
| "grad_norm": 0.07200396806001663, |
| "loss": 1.055, |
| "loss_ce": 0.9904854893684387, |
| "loss_region": 0.05999687686562538, |
| "loss_total": 1.0504823923110962, |
| "lr": 0.0008956099456544937, |
| "router/selected_tokens_s0": 7439.375, |
| "router/selected_tokens_s1": 4280.625, |
| "step": 8590, |
| "tokens_trained": 28.140201448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4391177930643217, |
| "grad_norm": 0.1327984780073166, |
| "loss": 1.052, |
| "loss_ce": 1.0562522411346436, |
| "loss_region": 0.05999882519245148, |
| "loss_total": 1.1162511110305786, |
| "lr": 0.0008952030536706427, |
| "router/selected_tokens_s0": 7396.25, |
| "router/selected_tokens_s1": 4266.125, |
| "step": 8600, |
| "tokens_trained": 28.172966888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4419544713140913, |
| "grad_norm": 0.10581366717815399, |
| "loss": 1.0577, |
| "loss_ce": 0.9608261585235596, |
| "loss_region": 0.05999242514371872, |
| "loss_total": 1.0208185911178589, |
| "lr": 0.0008947961616867917, |
| "router/selected_tokens_s0": 7394.25, |
| "router/selected_tokens_s1": 4226.0, |
| "step": 8610, |
| "tokens_trained": 28.205730728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.444791149563861, |
| "grad_norm": 0.09172872453927994, |
| "loss": 1.0561, |
| "loss_ce": 1.1066234111785889, |
| "loss_region": 0.06000348925590515, |
| "loss_total": 1.1666269302368164, |
| "lr": 0.0008943892697029407, |
| "router/selected_tokens_s0": 7435.0, |
| "router/selected_tokens_s1": 4288.0, |
| "step": 8620, |
| "tokens_trained": 28.238496168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4476278278136303, |
| "grad_norm": 0.11657129973173141, |
| "loss": 1.0539, |
| "loss_ce": 1.0094045400619507, |
| "loss_region": 0.05999579280614853, |
| "loss_total": 1.0694003105163574, |
| "lr": 0.0008939823777190897, |
| "router/selected_tokens_s0": 7382.375, |
| "router/selected_tokens_s1": 4243.5, |
| "step": 8630, |
| "tokens_trained": 28.271258408 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4504645060634, |
| "grad_norm": 0.1171703115105629, |
| "loss": 1.0581, |
| "loss_ce": 1.1134108304977417, |
| "loss_region": 0.0599915012717247, |
| "loss_total": 1.1734023094177246, |
| "lr": 0.0008935754857352387, |
| "router/selected_tokens_s0": 7397.0, |
| "router/selected_tokens_s1": 4215.625, |
| "step": 8640, |
| "tokens_trained": 28.304020264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4533011843131693, |
| "grad_norm": 0.0931129977107048, |
| "loss": 1.0551, |
| "loss_ce": 1.0483548641204834, |
| "loss_region": 0.05999862402677536, |
| "loss_total": 1.1083534955978394, |
| "lr": 0.0008931685937513876, |
| "router/selected_tokens_s0": 7403.0, |
| "router/selected_tokens_s1": 4250.625, |
| "step": 8650, |
| "tokens_trained": 28.336784904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.456137862562939, |
| "grad_norm": 0.10282409936189651, |
| "loss": 1.0556, |
| "loss_ce": 1.0182043313980103, |
| "loss_region": 0.05999889597296715, |
| "loss_total": 1.0782032012939453, |
| "lr": 0.0008927617017675366, |
| "router/selected_tokens_s0": 7410.75, |
| "router/selected_tokens_s1": 4262.625, |
| "step": 8660, |
| "tokens_trained": 28.369550344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4589745408127084, |
| "grad_norm": 0.08674661070108414, |
| "loss": 1.0529, |
| "loss_ce": 1.028585433959961, |
| "loss_region": 0.05999217554926872, |
| "loss_total": 1.0885776281356812, |
| "lr": 0.0008923548097836857, |
| "router/selected_tokens_s0": 7433.75, |
| "router/selected_tokens_s1": 4253.375, |
| "step": 8670, |
| "tokens_trained": 28.402315784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.461811219062478, |
| "grad_norm": 0.10035062581300735, |
| "loss": 1.057, |
| "loss_ce": 0.9080606698989868, |
| "loss_region": 0.05999957025051117, |
| "loss_total": 0.9680602550506592, |
| "lr": 0.0008919479177998346, |
| "router/selected_tokens_s0": 7416.5, |
| "router/selected_tokens_s1": 4270.375, |
| "step": 8680, |
| "tokens_trained": 28.435081224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4646478973122474, |
| "grad_norm": 0.06010536104440689, |
| "loss": 1.0518, |
| "loss_ce": 1.0172017812728882, |
| "loss_region": 0.06000963971018791, |
| "loss_total": 1.0772113800048828, |
| "lr": 0.0008915410258159836, |
| "router/selected_tokens_s0": 7396.125, |
| "router/selected_tokens_s1": 4298.625, |
| "step": 8690, |
| "tokens_trained": 28.467846664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.467484575562017, |
| "grad_norm": 0.1024271696805954, |
| "loss": 1.0586, |
| "loss_ce": 1.0059974193572998, |
| "loss_region": 0.05999946594238281, |
| "loss_total": 1.0659968852996826, |
| "lr": 0.0008911341338321326, |
| "router/selected_tokens_s0": 7422.75, |
| "router/selected_tokens_s1": 4281.125, |
| "step": 8700, |
| "tokens_trained": 28.500612104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4703212538117865, |
| "grad_norm": 0.09084313362836838, |
| "loss": 1.0477, |
| "loss_ce": 1.0958322286605835, |
| "loss_region": 0.06000053510069847, |
| "loss_total": 1.1558327674865723, |
| "lr": 0.0008907272418482815, |
| "router/selected_tokens_s0": 7403.875, |
| "router/selected_tokens_s1": 4275.0, |
| "step": 8710, |
| "tokens_trained": 28.533377544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.473157932061556, |
| "grad_norm": 0.09956429153680801, |
| "loss": 1.0578, |
| "loss_ce": 1.1075488328933716, |
| "loss_region": 0.059996794909238815, |
| "loss_total": 1.1675456762313843, |
| "lr": 0.0008903203498644304, |
| "router/selected_tokens_s0": 7410.125, |
| "router/selected_tokens_s1": 4261.375, |
| "step": 8720, |
| "tokens_trained": 28.566141984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4759946103113255, |
| "grad_norm": 0.07981985062360764, |
| "loss": 1.0495, |
| "loss_ce": 1.1032137870788574, |
| "loss_region": 0.06000319495797157, |
| "loss_total": 1.1632169485092163, |
| "lr": 0.0008899134578805794, |
| "router/selected_tokens_s0": 7392.125, |
| "router/selected_tokens_s1": 4270.375, |
| "step": 8730, |
| "tokens_trained": 28.598907424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.478831288561095, |
| "grad_norm": 0.08922786265611649, |
| "loss": 1.0549, |
| "loss_ce": 0.8978429436683655, |
| "loss_region": 0.060004547238349915, |
| "loss_total": 0.9578474760055542, |
| "lr": 0.0008895065658967284, |
| "router/selected_tokens_s0": 7302.125, |
| "router/selected_tokens_s1": 4239.125, |
| "step": 8740, |
| "tokens_trained": 28.631672864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4816679668108645, |
| "grad_norm": 0.1105889230966568, |
| "loss": 1.0617, |
| "loss_ce": 1.044762134552002, |
| "loss_region": 0.060012366622686386, |
| "loss_total": 1.1047744750976562, |
| "lr": 0.0008890996739128774, |
| "router/selected_tokens_s0": 7422.625, |
| "router/selected_tokens_s1": 4317.125, |
| "step": 8750, |
| "tokens_trained": 28.664438304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.484504645060634, |
| "grad_norm": 0.11524499952793121, |
| "loss": 1.0444, |
| "loss_ce": 1.0382664203643799, |
| "loss_region": 0.06000260263681412, |
| "loss_total": 1.098268985748291, |
| "lr": 0.0008886927819290264, |
| "router/selected_tokens_s0": 7386.0, |
| "router/selected_tokens_s1": 4268.75, |
| "step": 8760, |
| "tokens_trained": 28.697203744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4873413233104036, |
| "grad_norm": 0.11217573285102844, |
| "loss": 1.0526, |
| "loss_ce": 0.9426054954528809, |
| "loss_region": 0.059998948127031326, |
| "loss_total": 1.0026044845581055, |
| "lr": 0.0008882858899451753, |
| "router/selected_tokens_s0": 7367.375, |
| "router/selected_tokens_s1": 4257.5, |
| "step": 8770, |
| "tokens_trained": 28.729969184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.490178001560173, |
| "grad_norm": 0.09291733801364899, |
| "loss": 1.0554, |
| "loss_ce": 1.0549249649047852, |
| "loss_region": 0.06003744900226593, |
| "loss_total": 1.1149624586105347, |
| "lr": 0.0008878789979613243, |
| "router/selected_tokens_s0": 7397.75, |
| "router/selected_tokens_s1": 4349.25, |
| "step": 8780, |
| "tokens_trained": 28.762734624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4930146798099426, |
| "grad_norm": 0.07984460145235062, |
| "loss": 1.0505, |
| "loss_ce": 1.01552414894104, |
| "loss_region": 0.05999819189310074, |
| "loss_total": 1.0755223035812378, |
| "lr": 0.0008874721059774733, |
| "router/selected_tokens_s0": 7407.125, |
| "router/selected_tokens_s1": 4255.75, |
| "step": 8790, |
| "tokens_trained": 28.795500064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.495851358059712, |
| "grad_norm": 0.14549370110034943, |
| "loss": 1.0546, |
| "loss_ce": 0.9735129475593567, |
| "loss_region": 0.05998510867357254, |
| "loss_total": 1.0334980487823486, |
| "lr": 0.0008870652139936223, |
| "router/selected_tokens_s0": 7398.625, |
| "router/selected_tokens_s1": 4285.625, |
| "step": 8800, |
| "tokens_trained": 28.828265504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.4986880363094817, |
| "grad_norm": 0.10017771273851395, |
| "loss": 1.0547, |
| "loss_ce": 0.7661262154579163, |
| "loss_region": 0.05999436974525452, |
| "loss_total": 0.8261206150054932, |
| "lr": 0.0008866583220097713, |
| "router/selected_tokens_s0": 7422.0, |
| "router/selected_tokens_s1": 4255.625, |
| "step": 8810, |
| "tokens_trained": 28.861030944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.501524714559251, |
| "grad_norm": 0.0791676864027977, |
| "loss": 1.0558, |
| "loss_ce": 0.8253862857818604, |
| "loss_region": 0.05999884754419327, |
| "loss_total": 0.8853851556777954, |
| "lr": 0.0008862514300259203, |
| "router/selected_tokens_s0": 7395.5, |
| "router/selected_tokens_s1": 4261.0, |
| "step": 8820, |
| "tokens_trained": 28.893796384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5043613928090207, |
| "grad_norm": 0.0699949711561203, |
| "loss": 1.0561, |
| "loss_ce": 1.0247315168380737, |
| "loss_region": 0.06000084429979324, |
| "loss_total": 1.0847324132919312, |
| "lr": 0.0008858445380420692, |
| "router/selected_tokens_s0": 7448.125, |
| "router/selected_tokens_s1": 4285.25, |
| "step": 8830, |
| "tokens_trained": 28.926561824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5071980710587902, |
| "grad_norm": 0.1134558767080307, |
| "loss": 1.0605, |
| "loss_ce": 1.0843263864517212, |
| "loss_region": 0.06000571325421333, |
| "loss_total": 1.1443320512771606, |
| "lr": 0.0008854376460582181, |
| "router/selected_tokens_s0": 7421.875, |
| "router/selected_tokens_s1": 4288.625, |
| "step": 8840, |
| "tokens_trained": 28.959327264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5100347493085597, |
| "grad_norm": 0.07717236131429672, |
| "loss": 1.0467, |
| "loss_ce": 0.9929010272026062, |
| "loss_region": 0.06002375856041908, |
| "loss_total": 1.0529247522354126, |
| "lr": 0.000885030754074367, |
| "router/selected_tokens_s0": 7356.125, |
| "router/selected_tokens_s1": 4301.625, |
| "step": 8850, |
| "tokens_trained": 28.992092704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5128714275583293, |
| "grad_norm": 0.10177972167730331, |
| "loss": 1.0577, |
| "loss_ce": 1.1059026718139648, |
| "loss_region": 0.06001473218202591, |
| "loss_total": 1.1659173965454102, |
| "lr": 0.000884623862090516, |
| "router/selected_tokens_s0": 7414.25, |
| "router/selected_tokens_s1": 4308.75, |
| "step": 8860, |
| "tokens_trained": 29.024858144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.515708105808099, |
| "grad_norm": 0.06407687813043594, |
| "loss": 1.0539, |
| "loss_ce": 1.0892680883407593, |
| "loss_region": 0.059991251677274704, |
| "loss_total": 1.149259328842163, |
| "lr": 0.0008842169701066651, |
| "router/selected_tokens_s0": 7435.0, |
| "router/selected_tokens_s1": 4263.0, |
| "step": 8870, |
| "tokens_trained": 29.057623584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5185447840578683, |
| "grad_norm": 0.09954580664634705, |
| "loss": 1.0534, |
| "loss_ce": 0.8777329921722412, |
| "loss_region": 0.060006193816661835, |
| "loss_total": 0.9377391934394836, |
| "lr": 0.0008838100781228141, |
| "router/selected_tokens_s0": 7430.375, |
| "router/selected_tokens_s1": 4296.25, |
| "step": 8880, |
| "tokens_trained": 29.090389024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.521381462307638, |
| "grad_norm": 0.10266491025686264, |
| "loss": 1.0446, |
| "loss_ce": 1.1141995191574097, |
| "loss_region": 0.060007236897945404, |
| "loss_total": 1.1742067337036133, |
| "lr": 0.000883403186138963, |
| "router/selected_tokens_s0": 7403.0, |
| "router/selected_tokens_s1": 4294.625, |
| "step": 8890, |
| "tokens_trained": 29.123153664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5242181405574073, |
| "grad_norm": 0.09132856875658035, |
| "loss": 1.0464, |
| "loss_ce": 1.117302417755127, |
| "loss_region": 0.05999713018536568, |
| "loss_total": 1.1772994995117188, |
| "lr": 0.000882996294155112, |
| "router/selected_tokens_s0": 7382.375, |
| "router/selected_tokens_s1": 4240.25, |
| "step": 8900, |
| "tokens_trained": 29.155917496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.527054818807177, |
| "grad_norm": 0.08423041552305222, |
| "loss": 1.0514, |
| "loss_ce": 1.1031696796417236, |
| "loss_region": 0.06001175567507744, |
| "loss_total": 1.1631814241409302, |
| "lr": 0.000882589402171261, |
| "router/selected_tokens_s0": 7401.75, |
| "router/selected_tokens_s1": 4303.25, |
| "step": 8910, |
| "tokens_trained": 29.188682936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5298914970569464, |
| "grad_norm": 0.12287457287311554, |
| "loss": 1.0485, |
| "loss_ce": 0.8693131804466248, |
| "loss_region": 0.06000253185629845, |
| "loss_total": 0.9293156862258911, |
| "lr": 0.00088218251018741, |
| "router/selected_tokens_s0": 7403.0, |
| "router/selected_tokens_s1": 4264.625, |
| "step": 8920, |
| "tokens_trained": 29.221448376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.532728175306716, |
| "grad_norm": 0.09478288888931274, |
| "loss": 1.0539, |
| "loss_ce": 1.0362886190414429, |
| "loss_region": 0.05999806895852089, |
| "loss_total": 1.096286654472351, |
| "lr": 0.000881775618203559, |
| "router/selected_tokens_s0": 7375.875, |
| "router/selected_tokens_s1": 4248.375, |
| "step": 8930, |
| "tokens_trained": 29.254213816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5355648535564854, |
| "grad_norm": 0.06375641375780106, |
| "loss": 1.0485, |
| "loss_ce": 1.0071486234664917, |
| "loss_region": 0.06001626327633858, |
| "loss_total": 1.0671648979187012, |
| "lr": 0.000881368726219708, |
| "router/selected_tokens_s0": 7397.25, |
| "router/selected_tokens_s1": 4307.5, |
| "step": 8940, |
| "tokens_trained": 29.286979256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.538401531806255, |
| "grad_norm": 0.12006037682294846, |
| "loss": 1.0391, |
| "loss_ce": 0.9393020272254944, |
| "loss_region": 0.060008224099874496, |
| "loss_total": 0.9993102550506592, |
| "lr": 0.0008809618342358569, |
| "router/selected_tokens_s0": 7438.875, |
| "router/selected_tokens_s1": 4308.375, |
| "step": 8950, |
| "tokens_trained": 29.319744696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5412382100560245, |
| "grad_norm": 0.08267606049776077, |
| "loss": 1.0516, |
| "loss_ce": 1.027431607246399, |
| "loss_region": 0.05999532714486122, |
| "loss_total": 1.0874269008636475, |
| "lr": 0.0008805549422520058, |
| "router/selected_tokens_s0": 7441.0, |
| "router/selected_tokens_s1": 4274.125, |
| "step": 8960, |
| "tokens_trained": 29.352510136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.544074888305794, |
| "grad_norm": 0.1047692596912384, |
| "loss": 1.0568, |
| "loss_ce": 0.9955906271934509, |
| "loss_region": 0.05999518185853958, |
| "loss_total": 1.0555858612060547, |
| "lr": 0.0008801480502681547, |
| "router/selected_tokens_s0": 7389.25, |
| "router/selected_tokens_s1": 4252.75, |
| "step": 8970, |
| "tokens_trained": 29.385275576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5469115665555635, |
| "grad_norm": 0.07880797237157822, |
| "loss": 1.0427, |
| "loss_ce": 1.063873529434204, |
| "loss_region": 0.06000230833888054, |
| "loss_total": 1.1238758563995361, |
| "lr": 0.0008797411582843037, |
| "router/selected_tokens_s0": 7401.375, |
| "router/selected_tokens_s1": 4278.0, |
| "step": 8980, |
| "tokens_trained": 29.418036064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.549748244805333, |
| "grad_norm": 0.09025935083627701, |
| "loss": 1.0523, |
| "loss_ce": 0.9206526875495911, |
| "loss_region": 0.06000004708766937, |
| "loss_total": 0.9806527495384216, |
| "lr": 0.0008793342663004528, |
| "router/selected_tokens_s0": 7416.0, |
| "router/selected_tokens_s1": 4278.75, |
| "step": 8990, |
| "tokens_trained": 29.450801504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 2.5525849230551025, |
| "grad_norm": 0.09820877760648727, |
| "loss": 1.0528, |
| "loss_ce": 1.007702112197876, |
| "loss_region": 0.06000753864645958, |
| "loss_total": 1.0677096843719482, |
| "lr": 0.0008789273743166017, |
| "router/selected_tokens_s0": 7384.0, |
| "router/selected_tokens_s1": 4275.375, |
| "step": 9000, |
| "tokens_trained": 29.483566944 |
| }, |
| { |
| "epoch": 2.5525849230551025, |
| "eval_ppl": 2.7250221948753346, |
| "eval_runtime": 1.0167, |
| "step": 9000, |
| "tokens_trained": 29.483566944 |
| }, |
| { |
| "epoch": 2.5525849230551025, |
| "eval_F": 0.3320773758366539, |
| "eval_F_cds": 0.32461848677879107, |
| "eval_F_dig": 0.3169778888975701, |
| "eval_F_exon": 0.33236885142785255, |
| "eval_F_intron": 0.33329054068511027, |
| "eval_F_nig": 0.3344018724876609, |
| "eval_F_promoter": 0.3263581733012336, |
| "eval_F_utr": 0.3302584452060089, |
| "eval_G": 0.3439454638281474, |
| "eval_G_cds": 0.3421455034714266, |
| "eval_G_dig": 0.3039759270743808, |
| "eval_G_exon": 0.3462285284836667, |
| "eval_G_intron": 0.3449180219900721, |
| "eval_G_nig": 0.34477996088582213, |
| "eval_G_promoter": 0.34113107089701955, |
| "eval_G_utr": 0.3442766678637687, |
| "eval_avg_bp_per_token": 3.011346369142268, |
| "eval_bp_per_token/cds": 3.0805392814286723, |
| "eval_bp_per_token/dig": 3.154794182893764, |
| "eval_bp_per_token/exon": 3.0087055261165783, |
| "eval_bp_per_token/intron": 3.0003851832830457, |
| "eval_bp_per_token/nig": 2.990413877054169, |
| "eval_bp_per_token/promoter": 3.064118143218631, |
| "eval_bp_per_token/utr": 3.0279316532729967, |
| "eval_ppl_cds": 3.2245429468340667, |
| "eval_ppl_dig": 1.1062662889876407, |
| "eval_ppl_exon": 3.2440991246218105, |
| "eval_ppl_intron": 2.8497260833235316, |
| "eval_ppl_nig": 2.5493835432583385, |
| "eval_ppl_promoter": 2.8852114125443373, |
| "eval_ppl_utr": 3.007436285101054, |
| "step": 9000, |
| "tokens_trained": 29.483566944 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 30600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 3000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|