{ "run_name": "run_large_20251112_071557", "timestamp": "20251112_071557", "phase": "large", "config": { "arch_layout": [ "m4", [ "T22" ], "m4" ], "d_model": [ 1024, 1536 ], "d_intermediate": [ 0, 4096 ], "vocab_size": 256, "ssm_cfg": { "chunk_size": 256, "d_conv": 4, "d_state": 128, "expand": 2 }, "attn_cfg": { "num_heads": [ 16, 16 ], "rotary_emb_dim": [ 32, 48 ], "window_size": [ 1023, -1 ] }, "tie_embeddings": false }, "training_args": { "data": "datasets/moses/smiles-molecules-moses_all.csv", "max_samples": null, "batch_size": 16, "epochs": 5, "lr": 0.0001, "weight_decay": 0.1, "gradient_accumulation": 8, "concatenate": true, "num_concatenate": 10, "concatenate_separator": " ", "checkpoint_bytes": 1000000, "num_test_samples": 5, "num_visualize": 5, "skip_visualization": false }, "dataset_info": { "train_size": 193691, "test_size": 5, "test_smiles_file": "checkpoints/run_large_20251112_071557/test_smiles.txt" }, "model_info": { "num_parameters": 622923776, "device": "cuda", "dtype": "torch.bfloat16", "use_amp": true }, "training_history": [ { "checkpoint_type": "bytes", "bytes_threshold": 1000000, "cumulative_training_bytes": 1005180, "metrics": { "loss": 2.7717187923543594, "ce_loss": 2.76171875, "lb_loss": 0.9999999957926133 } }, { "checkpoint_type": "bytes", "bytes_threshold": 2000000, "cumulative_training_bytes": 2005824, "metrics": { "loss": 2.068881838019267, "ce_loss": 2.0588818215339235, "lb_loss": 0.999999996131852 } }, { "checkpoint_type": "bytes", "bytes_threshold": 3000000, "cumulative_training_bytes": 3000192, "metrics": { "loss": 1.7394671552985377, "ce_loss": 1.7294671474358974, "lb_loss": 0.9999999945920836 } }, { "checkpoint_type": "bytes", "bytes_threshold": 4000000, "cumulative_training_bytes": 4000199, "metrics": { "loss": 1.532599624444747, "ce_loss": 1.5225996209319526, "lb_loss": 0.9999999955913724 } }, { "checkpoint_type": "bytes", "bytes_threshold": 5000000, "cumulative_training_bytes": 5000601, "metrics": { "loss": 1.3878337656957864, "ce_loss": 1.3778337647928993, "lb_loss": 0.9999999956971795 } }, { "checkpoint_type": "bytes", "bytes_threshold": 6000000, "cumulative_training_bytes": 6001219, "metrics": { "loss": 1.2813033152378992, "ce_loss": 1.2713033160749507, "lb_loss": 0.9999999946508652 } }, { "checkpoint_type": "bytes", "bytes_threshold": 7000000, "cumulative_training_bytes": 7000167, "metrics": { "loss": 1.1998906361280814, "ce_loss": 1.189890638207946, "lb_loss": 0.9999999956669489 } }, { "checkpoint_type": "bytes", "bytes_threshold": 8000000, "cumulative_training_bytes": 8005274, "metrics": { "loss": 1.1353377880031588, "ce_loss": 1.1253377910199556, "lb_loss": 0.9999999970484027 } }, { "checkpoint_type": "bytes", "bytes_threshold": 9000000, "cumulative_training_bytes": 9004340, "metrics": { "loss": 1.082989381114694, "ce_loss": 1.0729893848554533, "lb_loss": 0.9999999974153045 } }, { "checkpoint_type": "bytes", "bytes_threshold": 10000000, "cumulative_training_bytes": 10005356, "metrics": { "loss": 1.0403814267858396, "ce_loss": 1.0303814311058546, "lb_loss": 0.999999997638373 } }, { "checkpoint_type": "bytes", "bytes_threshold": 11000000, "cumulative_training_bytes": 11004976, "metrics": { "loss": 1.0042456269264222, "ce_loss": 0.9942456317204301, "lb_loss": 0.9999999972761319 } }, { "checkpoint_type": "bytes", "bytes_threshold": 12000000, "cumulative_training_bytes": 12005836, "metrics": { "loss": 0.9732034663486622, "ce_loss": 0.9632034715377032, "lb_loss": 0.9999999970036108 } }, { "checkpoint_type": "bytes", "bytes_threshold": 13000000, "cumulative_training_bytes": 13000402, "metrics": { "loss": 0.9467905753842798, "ce_loss": 0.9367905809057806, "lb_loss": 0.9999999964188379 } }, { "checkpoint_type": "bytes", "bytes_threshold": 14000000, "cumulative_training_bytes": 14000972, "metrics": { "loss": 0.9233459631266074, "ce_loss": 0.9133459689349113, "lb_loss": 0.9999999965234823 } }, { "checkpoint_type": "bytes", "bytes_threshold": 15000000, "cumulative_training_bytes": 15000856, "metrics": { "loss": 0.9026359037064472, "ce_loss": 0.8926359097633136, "lb_loss": 0.9999999959087936 } }, { "checkpoint_type": "bytes", "bytes_threshold": 16000000, "cumulative_training_bytes": 16000794, "metrics": { "loss": 0.8842589041361442, "ce_loss": 0.874258910410503, "lb_loss": 0.9999999957015882 } }, { "checkpoint_type": "bytes", "bytes_threshold": 17000000, "cumulative_training_bytes": 17000571, "metrics": { "loss": 0.8677488623120169, "ce_loss": 0.8577488687782805, "lb_loss": 0.9999999955602527 } }, { "checkpoint_type": "bytes", "bytes_threshold": 18000000, "cumulative_training_bytes": 18004713, "metrics": { "loss": 0.8526691315482018, "ce_loss": 0.8426691381860006, "lb_loss": 0.9999999956319962 } }, { "checkpoint_type": "bytes", "bytes_threshold": 19000000, "cumulative_training_bytes": 19004995, "metrics": { "loss": 0.8389714284525119, "ce_loss": 0.8289714352428393, "lb_loss": 0.9999999954535685 } }, { "checkpoint_type": "bytes", "bytes_threshold": 20000000, "cumulative_training_bytes": 20004594, "metrics": { "loss": 0.8264131751930668, "ce_loss": 0.8164131821206744, "lb_loss": 0.9999999955926764 } }, { "checkpoint_type": "bytes", "bytes_threshold": 21000000, "cumulative_training_bytes": 21004699, "metrics": { "loss": 0.8155721760467745, "ce_loss": 0.8055721830985916, "lb_loss": 0.9999999949629877 } }, { "checkpoint_type": "bytes", "bytes_threshold": 22000000, "cumulative_training_bytes": 22004049, "metrics": { "loss": 0.8053173244432212, "ce_loss": 0.7953173316079591, "lb_loss": 0.9999999948072318 } }, { "checkpoint_type": "bytes", "bytes_threshold": 23000000, "cumulative_training_bytes": 23003811, "metrics": { "loss": 0.795621416021767, "ce_loss": 0.785621423289609, "lb_loss": 0.9999999948336509 } }, { "checkpoint_type": "bytes", "bytes_threshold": 24000000, "cumulative_training_bytes": 24003387, "metrics": { "loss": 0.7866196854525316, "ce_loss": 0.7766196928148879, "lb_loss": 0.999999995078246 } }, { "checkpoint_type": "bytes", "bytes_threshold": 25000000, "cumulative_training_bytes": 25003311, "metrics": { "loss": 0.7783351499098937, "ce_loss": 0.7683351573592049, "lb_loss": 0.9999999949365671 } }, { "checkpoint_type": "bytes", "bytes_threshold": 26000000, "cumulative_training_bytes": 26003789, "metrics": { "loss": 0.7705019833691698, "ce_loss": 0.7605019908987486, "lb_loss": 0.9999999948464698 } }, { "checkpoint_type": "bytes", "bytes_threshold": 27000000, "cumulative_training_bytes": 27005374, "metrics": { "loss": 0.7630905858996038, "ce_loss": 0.7530905935035057, "lb_loss": 0.999999994763045 } }, { "checkpoint_type": "bytes", "bytes_threshold": 28000000, "cumulative_training_bytes": 28004376, "metrics": { "loss": 0.7561506895592821, "ce_loss": 0.7461506972321994, "lb_loss": 0.9999999949374462 } }, { "checkpoint_type": "bytes", "bytes_threshold": 29000000, "cumulative_training_bytes": 29003450, "metrics": { "loss": 0.7496948872036664, "ce_loss": 0.7396948949408405, "lb_loss": 0.9999999950268667 } }, { "checkpoint_type": "bytes", "bytes_threshold": 30000000, "cumulative_training_bytes": 30002714, "metrics": { "loss": 0.7436015993809234, "ce_loss": 0.7336016071780714, "lb_loss": 0.999999995180851 } }, { "checkpoint_type": "bytes", "bytes_threshold": 31000000, "cumulative_training_bytes": 31003719, "metrics": { "loss": 0.7378081684349147, "ce_loss": 0.727808176288168, "lb_loss": 0.9999999952907781 } }, { "checkpoint_type": "bytes", "bytes_threshold": 32000000, "cumulative_training_bytes": 32002795, "metrics": { "loss": 0.7323312628928164, "ce_loss": 0.7223312707986689, "lb_loss": 0.9999999951954843 } }, { "checkpoint_type": "bytes", "bytes_threshold": 33000000, "cumulative_training_bytes": 33002982, "metrics": { "loss": 0.7271596202716984, "ce_loss": 0.7171596282269631, "lb_loss": 0.9999999955120203 } }, { "checkpoint_type": "bytes", "bytes_threshold": 34000000, "cumulative_training_bytes": 34002814, "metrics": { "loss": 0.7221670737365277, "ce_loss": 0.7121670817382982, "lb_loss": 0.9999999956336253 } }, { "checkpoint_type": "bytes", "bytes_threshold": 35000000, "cumulative_training_bytes": 35001491, "metrics": { "loss": 0.7178196230395731, "ce_loss": 0.7078196310851927, "lb_loss": 0.9999999956173056 } }, { "checkpoint_type": "bytes", "bytes_threshold": 36000000, "cumulative_training_bytes": 36002685, "metrics": { "loss": 0.7134658659885629, "ce_loss": 0.7034658740755957, "lb_loss": 0.9999999959055478 } }, { "checkpoint_type": "bytes", "bytes_threshold": 37000000, "cumulative_training_bytes": 37002731, "metrics": { "loss": 0.7092018776668845, "ce_loss": 0.6992018857930924, "lb_loss": 0.9999999959590071 } }, { "checkpoint_type": "bytes", "bytes_threshold": 38000000, "cumulative_training_bytes": 38001457, "metrics": { "loss": 0.7051239603872, "ce_loss": 0.6951239685505216, "lb_loss": 0.9999999958890149 } }, { "checkpoint_type": "bytes", "bytes_threshold": 39000000, "cumulative_training_bytes": 39001008, "metrics": { "loss": 0.7012219511840528, "ce_loss": 0.6912219593825849, "lb_loss": 0.9999999958406953 } }, { "checkpoint_type": "bytes", "bytes_threshold": 40000000, "cumulative_training_bytes": 40002237, "metrics": { "loss": 0.6974433710758123, "ce_loss": 0.6874433793077948, "lb_loss": 0.9999999959270306 } }, { "checkpoint_type": "bytes", "bytes_threshold": 41000000, "cumulative_training_bytes": 41003402, "metrics": { "loss": 0.6939161623711194, "ce_loss": 0.6839161706349206, "lb_loss": 0.9999999961123666 } }, { "checkpoint_type": "bytes", "bytes_threshold": 42000000, "cumulative_training_bytes": 42003174, "metrics": { "loss": 0.6905112212804828, "ce_loss": 0.680511229574588, "lb_loss": 0.9999999960034074 } }, { "checkpoint_type": "bytes", "bytes_threshold": 43000000, "cumulative_training_bytes": 43003031, "metrics": { "loss": 0.6871942206602144, "ce_loss": 0.6771942289832141, "lb_loss": 0.9999999960389319 } }, { "checkpoint_type": "bytes", "bytes_threshold": 44000000, "cumulative_training_bytes": 44004167, "metrics": { "loss": 0.6840069627062967, "ce_loss": 0.6740069710568778, "lb_loss": 0.9999999959686519 } }, { "checkpoint_type": "bytes", "bytes_threshold": 45000000, "cumulative_training_bytes": 45004421, "metrics": { "loss": 0.6809336089975044, "ce_loss": 0.6709336173744412, "lb_loss": 0.9999999959798602 } }, { "checkpoint_type": "bytes", "bytes_threshold": 46000000, "cumulative_training_bytes": 46004726, "metrics": { "loss": 0.6780240068711652, "ce_loss": 0.6680240152733119, "lb_loss": 0.9999999960059138 } }, { "checkpoint_type": "bytes", "bytes_threshold": 47000000, "cumulative_training_bytes": 47003808, "metrics": { "loss": 0.6751821455578549, "ce_loss": 0.6651821539841389, "lb_loss": 0.9999999959183123 } }, { "checkpoint_type": "bytes", "bytes_threshold": 48000000, "cumulative_training_bytes": 48003562, "metrics": { "loss": 0.6724254583939221, "ce_loss": 0.6624254668433378, "lb_loss": 0.9999999958784414 } }, { "checkpoint_type": "bytes", "bytes_threshold": 49000000, "cumulative_training_bytes": 49002795, "metrics": { "loss": 0.6697642958932843, "ce_loss": 0.6597643043648878, "lb_loss": 0.9999999957538348 } }, { "checkpoint_type": "bytes", "bytes_threshold": 50000000, "cumulative_training_bytes": 50002223, "metrics": { "loss": 0.6672086428501326, "ce_loss": 0.6572086513430363, "lb_loss": 0.9999999955707352 } }, { "checkpoint_type": "bytes", "bytes_threshold": 51000000, "cumulative_training_bytes": 51002866, "metrics": { "loss": 0.6648764597290906, "ce_loss": 0.6548764682424594, "lb_loss": 0.9999999957958673 } }, { "checkpoint_type": "bytes", "bytes_threshold": 52000000, "cumulative_training_bytes": 52002541, "metrics": { "loss": 0.662538409517926, "ce_loss": 0.6525384180509728, "lb_loss": 0.9999999958292347 } }, { "checkpoint_type": "bytes", "bytes_threshold": 53000000, "cumulative_training_bytes": 53002612, "metrics": { "loss": 0.6602227322941887, "ce_loss": 0.650222740846171, "lb_loss": 0.9999999958347279 } }, { "checkpoint_type": "bytes", "bytes_threshold": 54000000, "cumulative_training_bytes": 54002019, "metrics": { "loss": 0.6579624243212044, "ce_loss": 0.6479624328914211, "lb_loss": 0.9999999960620576 } }, { "checkpoint_type": "bytes", "bytes_threshold": 55000000, "cumulative_training_bytes": 55003666, "metrics": { "loss": 0.6557700134647078, "ce_loss": 0.6457700220524957, "lb_loss": 0.9999999959477048 } }, { "checkpoint_type": "bytes", "bytes_threshold": 56000000, "cumulative_training_bytes": 56004562, "metrics": { "loss": 0.653625767280107, "ce_loss": 0.6436257758848389, "lb_loss": 0.9999999958311384 } }, { "checkpoint_type": "bytes", "bytes_threshold": 57000000, "cumulative_training_bytes": 57003399, "metrics": { "loss": 0.6515486406938451, "ce_loss": 0.6415486493149263, "lb_loss": 0.999999995693914 } }, { "checkpoint_type": "bytes", "bytes_threshold": 58000000, "cumulative_training_bytes": 58004263, "metrics": { "loss": 0.6495315760820968, "ce_loss": 0.6395315847189635, "lb_loss": 0.9999999955675012 } }, { "checkpoint_type": "bytes", "bytes_threshold": 59000000, "cumulative_training_bytes": 59005055, "metrics": { "loss": 0.6475570260450345, "ce_loss": 0.6375570346971521, "lb_loss": 0.999999995678484 } }, { "checkpoint_type": "bytes", "bytes_threshold": 60000000, "cumulative_training_bytes": 60005248, "metrics": { "loss": 0.6456633104091682, "ce_loss": 0.635663319076028, "lb_loss": 0.9999999955741744 } }, { "checkpoint_type": "bytes", "bytes_threshold": 61000000, "cumulative_training_bytes": 61004559, "metrics": { "loss": 0.6438197430768832, "ce_loss": 0.633819751758002, "lb_loss": 0.9999999955889094 } }, { "checkpoint_type": "bytes", "bytes_threshold": 62000000, "cumulative_training_bytes": 62004749, "metrics": { "loss": 0.642214755655211, "ce_loss": 0.6322147643501288, "lb_loss": 0.999999995517849 } }, { "checkpoint_type": "bytes", "bytes_threshold": 63000000, "cumulative_training_bytes": 63005338, "metrics": { "loss": 0.6405543280803199, "ce_loss": 0.6305543367885988, "lb_loss": 0.999999995768115 } }, { "checkpoint_type": "bytes", "bytes_threshold": 64000000, "cumulative_training_bytes": 64005014, "metrics": { "loss": 0.638879157175052, "ce_loss": 0.6288791658962744, "lb_loss": 0.9999999957625986 } }, { "checkpoint_type": "bytes", "bytes_threshold": 65000000, "cumulative_training_bytes": 65000026, "metrics": { "loss": 0.6372423956814168, "ce_loss": 0.6272424044151115, "lb_loss": 0.9999999956700495 } }, { "checkpoint_type": "bytes", "bytes_threshold": 66000000, "cumulative_training_bytes": 66005586, "metrics": { "loss": 0.6356362670272628, "ce_loss": 0.6256362757731959, "lb_loss": 0.9999999957306938 } }, { "checkpoint_type": "bytes", "bytes_threshold": 67000000, "cumulative_training_bytes": 67004497, "metrics": { "loss": 0.6340658579854652, "ce_loss": 0.6240658667432003, "lb_loss": 0.9999999956786106 } }, { "checkpoint_type": "bytes", "bytes_threshold": 68000000, "cumulative_training_bytes": 68004301, "metrics": { "loss": 0.6325890467972416, "ce_loss": 0.6225890555664317, "lb_loss": 0.9999999957162241 } }, { "checkpoint_type": "bytes", "bytes_threshold": 69000000, "cumulative_training_bytes": 69003767, "metrics": { "loss": 0.6311319261365108, "ce_loss": 0.6211319349168238, "lb_loss": 0.9999999957016372 } }, { "checkpoint_type": "bytes", "bytes_threshold": 70000000, "cumulative_training_bytes": 70004275, "metrics": { "loss": 0.6296715307554119, "ce_loss": 0.6196715395465303, "lb_loss": 0.9999999958637974 } }, { "checkpoint_type": "bytes", "bytes_threshold": 71000000, "cumulative_training_bytes": 71003661, "metrics": { "loss": 0.6282464104692141, "ce_loss": 0.6182464192708333, "lb_loss": 0.9999999958972137 } }, { "epoch": 1, "checkpoint_type": "epoch", "metrics": { "loss": 0.6273426957392759, "ce_loss": 0.6173427045473319, "lb_loss": 0.9999999960463795, "training_bytes": 71629728 }, "cumulative_training_bytes": 71629728, "training_bytes_this_epoch": 71629728 }, { "checkpoint_type": "bytes", "bytes_threshold": 72000000, "cumulative_training_bytes": 72002782, "metrics": { "loss": 0.5245709230029394, "ce_loss": 0.5145709325396826, "lb_loss": 0.9999999943233672 } }, { "checkpoint_type": "bytes", "bytes_threshold": 73000000, "cumulative_training_bytes": 73002022, "metrics": { "loss": 0.5237897264546362, "ce_loss": 0.5137897359913793, "lb_loss": 0.9999999958893349 } }, { "checkpoint_type": "bytes", "bytes_threshold": 74000000, "cumulative_training_bytes": 74002053, "metrics": { "loss": 0.523175060600414, "ce_loss": 0.5131750701371571, "lb_loss": 0.9999999982163199 } }, { "checkpoint_type": "bytes", "bytes_threshold": 75000000, "cumulative_training_bytes": 75001463, "metrics": { "loss": 0.5227672602000989, "ce_loss": 0.5127672697368421, "lb_loss": 0.9999999993725827 } }, { "checkpoint_type": "bytes", "bytes_threshold": 76000000, "cumulative_training_bytes": 76001540, "metrics": { "loss": 0.5225935501723231, "ce_loss": 0.5125935597090663, "lb_loss": 0.999999997419014 } }, { "checkpoint_type": "bytes", "bytes_threshold": 77000000, "cumulative_training_bytes": 77001527, "metrics": { "loss": 0.5224307276603934, "ce_loss": 0.5124307371971366, "lb_loss": 0.9999999961926549 } }, { "checkpoint_type": "bytes", "bytes_threshold": 78000000, "cumulative_training_bytes": 78001457, "metrics": { "loss": 0.522277294316553, "ce_loss": 0.5122773038532962, "lb_loss": 0.9999999968454366 } }, { "checkpoint_type": "bytes", "bytes_threshold": 79000000, "cumulative_training_bytes": 79001702, "metrics": { "loss": 0.522033811189581, "ce_loss": 0.5120338207263242, "lb_loss": 0.9999999971297924 } }, { "checkpoint_type": "bytes", "bytes_threshold": 80000000, "cumulative_training_bytes": 80002593, "metrics": { "loss": 0.5217684312759777, "ce_loss": 0.5117684408127209, "lb_loss": 0.999999997514718 } }, { "checkpoint_type": "bytes", "bytes_threshold": 81000000, "cumulative_training_bytes": 81002708, "metrics": { "loss": 0.5216262629537871, "ce_loss": 0.5116262724905303, "lb_loss": 0.9999999968015184 } }, { "checkpoint_type": "bytes", "bytes_threshold": 82000000, "cumulative_training_bytes": 82002370, "metrics": { "loss": 0.5214791957541867, "ce_loss": 0.5114792052909298, "lb_loss": 0.9999999966678521 } }, { "checkpoint_type": "bytes", "bytes_threshold": 83000000, "cumulative_training_bytes": 83002802, "metrics": { "loss": 0.5213061656713733, "ce_loss": 0.5113061752081165, "lb_loss": 0.9999999966197157 } }, { "checkpoint_type": "bytes", "bytes_threshold": 84000000, "cumulative_training_bytes": 84001825, "metrics": { "loss": 0.5217131360873601, "ce_loss": 0.5117131456241033, "lb_loss": 0.9999999977195736 } }, { "checkpoint_type": "bytes", "bytes_threshold": 85000000, "cumulative_training_bytes": 85001219, "metrics": { "loss": 0.5217455311159117, "ce_loss": 0.5117455406526549, "lb_loss": 0.9999999976791112 } }, { "checkpoint_type": "bytes", "bytes_threshold": 86000000, "cumulative_training_bytes": 86001317, "metrics": { "loss": 0.5215764172335327, "ce_loss": 0.5115764267702758, "lb_loss": 0.9999999972025815 } }, { "checkpoint_type": "bytes", "bytes_threshold": 87000000, "cumulative_training_bytes": 87002232, "metrics": { "loss": 0.5213811911464747, "ce_loss": 0.5113812006832179, "lb_loss": 0.9999999972010136 } }, { "checkpoint_type": "bytes", "bytes_threshold": 88000000, "cumulative_training_bytes": 88002341, "metrics": { "loss": 0.5211851074672322, "ce_loss": 0.5111851170039754, "lb_loss": 0.999999997091931 } }, { "checkpoint_type": "bytes", "bytes_threshold": 89000000, "cumulative_training_bytes": 89002058, "metrics": { "loss": 0.5209484002895511, "ce_loss": 0.5109484098262943, "lb_loss": 0.9999999970157075 } }, { "checkpoint_type": "bytes", "bytes_threshold": 90000000, "cumulative_training_bytes": 90002506, "metrics": { "loss": 0.5207305409866063, "ce_loss": 0.5107305505233495, "lb_loss": 0.999999997312512 } }, { "checkpoint_type": "bytes", "bytes_threshold": 91000000, "cumulative_training_bytes": 91001385, "metrics": { "loss": 0.5205339798493289, "ce_loss": 0.5105339893860721, "lb_loss": 0.999999997414826 } }, { "checkpoint_type": "bytes", "bytes_threshold": 92000000, "cumulative_training_bytes": 92001245, "metrics": { "loss": 0.5203743006578546, "ce_loss": 0.5103743101945978, "lb_loss": 0.9999999980091391 } }, { "checkpoint_type": "bytes", "bytes_threshold": 93000000, "cumulative_training_bytes": 93002978, "metrics": { "loss": 0.5201673795482513, "ce_loss": 0.5101673890849945, "lb_loss": 0.999999998036281 } }, { "checkpoint_type": "bytes", "bytes_threshold": 94000000, "cumulative_training_bytes": 94003884, "metrics": { "loss": 0.519991870091662, "ce_loss": 0.5099918796284052, "lb_loss": 0.9999999981082895 } }, { "checkpoint_type": "bytes", "bytes_threshold": 95000000, "cumulative_training_bytes": 95004709, "metrics": { "loss": 0.5197606708430037, "ce_loss": 0.5097606803797469, "lb_loss": 0.999999998113777 } }, { "checkpoint_type": "bytes", "bytes_threshold": 96000000, "cumulative_training_bytes": 96004703, "metrics": { "loss": 0.5195778423387121, "ce_loss": 0.5095778518754552, "lb_loss": 0.999999998133285 } }, { "checkpoint_type": "bytes", "bytes_threshold": 97000000, "cumulative_training_bytes": 97004722, "metrics": { "loss": 0.5193511229842457, "ce_loss": 0.5093511325209888, "lb_loss": 0.9999999983041589 } }, { "checkpoint_type": "bytes", "bytes_threshold": 98000000, "cumulative_training_bytes": 98005936, "metrics": { "loss": 0.5191288888253838, "ce_loss": 0.5091288983621269, "lb_loss": 0.999999998542314 } }, { "checkpoint_type": "bytes", "bytes_threshold": 99000000, "cumulative_training_bytes": 99005325, "metrics": { "loss": 0.5189959339079174, "ce_loss": 0.5089959434446606, "lb_loss": 0.9999999980415248 } }, { "checkpoint_type": "bytes", "bytes_threshold": 100000000, "cumulative_training_bytes": 100005645, "metrics": { "loss": 0.5188124741702432, "ce_loss": 0.5088124837069864, "lb_loss": 0.9999999982224267 } }, { "checkpoint_type": "bytes", "bytes_threshold": 101000000, "cumulative_training_bytes": 101005293, "metrics": { "loss": 0.5186718025099933, "ce_loss": 0.5086718120467365, "lb_loss": 0.9999999980307893 } }, { "checkpoint_type": "bytes", "bytes_threshold": 102000000, "cumulative_training_bytes": 102004390, "metrics": { "loss": 0.5184943529218581, "ce_loss": 0.5084943624586012, "lb_loss": 0.9999999979678915 } }, { "checkpoint_type": "bytes", "bytes_threshold": 103000000, "cumulative_training_bytes": 103003700, "metrics": { "loss": 0.5183400036658219, "ce_loss": 0.508340013202565, "lb_loss": 0.9999999980888741 } }, { "checkpoint_type": "bytes", "bytes_threshold": 104000000, "cumulative_training_bytes": 104004107, "metrics": { "loss": 0.5181776973495664, "ce_loss": 0.5081777068863096, "lb_loss": 0.9999999981152251 } }, { "checkpoint_type": "bytes", "bytes_threshold": 105000000, "cumulative_training_bytes": 105003741, "metrics": { "loss": 0.5180538606981859, "ce_loss": 0.5080538702349291, "lb_loss": 0.9999999981928379 } }, { "checkpoint_type": "bytes", "bytes_threshold": 106000000, "cumulative_training_bytes": 106003973, "metrics": { "loss": 0.5178760367922292, "ce_loss": 0.5078760463289723, "lb_loss": 0.9999999980915022 } }, { "checkpoint_type": "bytes", "bytes_threshold": 107000000, "cumulative_training_bytes": 107004552, "metrics": { "loss": 0.5179294557944712, "ce_loss": 0.5079294653312144, "lb_loss": 0.999999998235192 } }, { "checkpoint_type": "bytes", "bytes_threshold": 108000000, "cumulative_training_bytes": 108004388, "metrics": { "loss": 0.518020289999616, "ce_loss": 0.5080202995363592, "lb_loss": 0.9999999982255328 } }, { "checkpoint_type": "bytes", "bytes_threshold": 109000000, "cumulative_training_bytes": 109004031, "metrics": { "loss": 0.5179760755052137, "ce_loss": 0.5079760850419569, "lb_loss": 0.9999999984334434 } }, { "checkpoint_type": "bytes", "bytes_threshold": 110000000, "cumulative_training_bytes": 110003930, "metrics": { "loss": 0.5178546550738967, "ce_loss": 0.5078546646106399, "lb_loss": 0.9999999985202239 } }, { "checkpoint_type": "bytes", "bytes_threshold": 111000000, "cumulative_training_bytes": 111002483, "metrics": { "loss": 0.5177637650725144, "ce_loss": 0.5077637746092576, "lb_loss": 0.9999999984951037 } }, { "checkpoint_type": "bytes", "bytes_threshold": 112000000, "cumulative_training_bytes": 112000941, "metrics": { "loss": 0.5176624922952955, "ce_loss": 0.5076625018320386, "lb_loss": 0.999999998462492 } }, { "checkpoint_type": "bytes", "bytes_threshold": 113000000, "cumulative_training_bytes": 113001291, "metrics": { "loss": 0.5176278486404593, "ce_loss": 0.5076278581772025, "lb_loss": 0.999999998840642 } }, { "checkpoint_type": "bytes", "bytes_threshold": 114000000, "cumulative_training_bytes": 114005949, "metrics": { "loss": 0.5175975976260605, "ce_loss": 0.5075976071628037, "lb_loss": 0.999999998751648 } }, { "checkpoint_type": "bytes", "bytes_threshold": 115000000, "cumulative_training_bytes": 115005446, "metrics": { "loss": 0.5174949178265088, "ce_loss": 0.507494927363252, "lb_loss": 0.9999999990487323 } }, { "checkpoint_type": "bytes", "bytes_threshold": 116000000, "cumulative_training_bytes": 116005050, "metrics": { "loss": 0.5173872300465902, "ce_loss": 0.5073872395833333, "lb_loss": 0.9999999989589056 } }, { "checkpoint_type": "bytes", "bytes_threshold": 117000000, "cumulative_training_bytes": 117004462, "metrics": { "loss": 0.5172662063160408, "ce_loss": 0.507266215852784, "lb_loss": 0.9999999990906581 } }, { "checkpoint_type": "bytes", "bytes_threshold": 118000000, "cumulative_training_bytes": 118004814, "metrics": { "loss": 0.5171274763493247, "ce_loss": 0.5071274858860678, "lb_loss": 0.9999999993155884 } }, { "checkpoint_type": "bytes", "bytes_threshold": 119000000, "cumulative_training_bytes": 119005101, "metrics": { "loss": 0.5169919194004368, "ce_loss": 0.5069919289371799, "lb_loss": 0.9999999993077018 } }, { "checkpoint_type": "bytes", "bytes_threshold": 120000000, "cumulative_training_bytes": 120004824, "metrics": { "loss": 0.5170007587179046, "ce_loss": 0.5070007682546478, "lb_loss": 0.9999999993511726 } }, { "checkpoint_type": "bytes", "bytes_threshold": 121000000, "cumulative_training_bytes": 121005427, "metrics": { "loss": 0.51693738748243, "ce_loss": 0.5069373970191732, "lb_loss": 0.9999999995357337 } }, { "checkpoint_type": "bytes", "bytes_threshold": 122000000, "cumulative_training_bytes": 122001139, "metrics": { "loss": 0.5168287009648427, "ce_loss": 0.5068287105015858, "lb_loss": 0.999999999698931 } }, { "checkpoint_type": "bytes", "bytes_threshold": 123000000, "cumulative_training_bytes": 123001524, "metrics": { "loss": 0.5166915019957379, "ce_loss": 0.506691511532481, "lb_loss": 0.9999999996018119 } }, { "checkpoint_type": "bytes", "bytes_threshold": 124000000, "cumulative_training_bytes": 124002811, "metrics": { "loss": 0.5165513776652679, "ce_loss": 0.5065513872020111, "lb_loss": 0.9999999995420725 } }, { "checkpoint_type": "bytes", "bytes_threshold": 125000000, "cumulative_training_bytes": 125003361, "metrics": { "loss": 0.516395933506494, "ce_loss": 0.5063959430432372, "lb_loss": 0.9999999994845719 } }, { "checkpoint_type": "bytes", "bytes_threshold": 126000000, "cumulative_training_bytes": 126002775, "metrics": { "loss": 0.5162708629058512, "ce_loss": 0.5062708724425944, "lb_loss": 0.9999999993059422 } }, { "checkpoint_type": "bytes", "bytes_threshold": 127000000, "cumulative_training_bytes": 127003533, "metrics": { "loss": 0.5161498922852273, "ce_loss": 0.5061499018219705, "lb_loss": 0.9999999994267559 } }, { "checkpoint_type": "bytes", "bytes_threshold": 128000000, "cumulative_training_bytes": 128003350, "metrics": { "loss": 0.5160164026995326, "ce_loss": 0.5060164122362758, "lb_loss": 0.9999999997184623 } }, { "checkpoint_type": "bytes", "bytes_threshold": 129000000, "cumulative_training_bytes": 129003039, "metrics": { "loss": 0.5158909909402577, "ce_loss": 0.5058910004770009, "lb_loss": 0.999999999569686 } }, { "checkpoint_type": "bytes", "bytes_threshold": 130000000, "cumulative_training_bytes": 130003634, "metrics": { "loss": 0.515758194974154, "ce_loss": 0.5057582045108971, "lb_loss": 0.9999999994441331 } }, { "checkpoint_type": "bytes", "bytes_threshold": 131000000, "cumulative_training_bytes": 131003813, "metrics": { "loss": 0.5156362957751962, "ce_loss": 0.5056363053119394, "lb_loss": 0.9999999995544799 } }, { "checkpoint_type": "bytes", "bytes_threshold": 132000000, "cumulative_training_bytes": 132004994, "metrics": { "loss": 0.5154975693003636, "ce_loss": 0.5054975788371068, "lb_loss": 0.9999999996085944 } }, { "checkpoint_type": "bytes", "bytes_threshold": 133000000, "cumulative_training_bytes": 133003835, "metrics": { "loss": 0.5153652466819225, "ce_loss": 0.5053652562186657, "lb_loss": 0.9999999995804918 } }, { "checkpoint_type": "bytes", "bytes_threshold": 134000000, "cumulative_training_bytes": 134003355, "metrics": { "loss": 0.5152516195188492, "ce_loss": 0.5052516290555924, "lb_loss": 0.9999999994910902 } }, { "checkpoint_type": "bytes", "bytes_threshold": 135000000, "cumulative_training_bytes": 135002497, "metrics": { "loss": 0.515110941093509, "ce_loss": 0.5051109506302521, "lb_loss": 0.9999999995269473 } }, { "checkpoint_type": "bytes", "bytes_threshold": 136000000, "cumulative_training_bytes": 136002211, "metrics": { "loss": 0.5149785831073418, "ce_loss": 0.504978592644085, "lb_loss": 0.9999999994904649 } }, { "checkpoint_type": "bytes", "bytes_threshold": 137000000, "cumulative_training_bytes": 137002818, "metrics": { "loss": 0.5148538101591293, "ce_loss": 0.5048538196958725, "lb_loss": 0.9999999994011481 } }, { "checkpoint_type": "bytes", "bytes_threshold": 138000000, "cumulative_training_bytes": 138002176, "metrics": { "loss": 0.514731752242253, "ce_loss": 0.5047317617789961, "lb_loss": 0.9999999992188747 } }, { "checkpoint_type": "bytes", "bytes_threshold": 139000000, "cumulative_training_bytes": 139002858, "metrics": { "loss": 0.5145917039710735, "ce_loss": 0.5045917135078166, "lb_loss": 0.9999999994450999 } }, { "checkpoint_type": "bytes", "bytes_threshold": 140000000, "cumulative_training_bytes": 140002848, "metrics": { "loss": 0.5144674152793538, "ce_loss": 0.504467424816097, "lb_loss": 0.999999999458374 } }, { "checkpoint_type": "bytes", "bytes_threshold": 141000000, "cumulative_training_bytes": 141001755, "metrics": { "loss": 0.5143557057694663, "ce_loss": 0.5043557153062095, "lb_loss": 0.9999999994610975 } }, { "checkpoint_type": "bytes", "bytes_threshold": 142000000, "cumulative_training_bytes": 142001377, "metrics": { "loss": 0.5142396167139925, "ce_loss": 0.5042396262507357, "lb_loss": 0.9999999993935792 } }, { "checkpoint_type": "bytes", "bytes_threshold": 143000000, "cumulative_training_bytes": 143001209, "metrics": { "loss": 0.5141645089407066, "ce_loss": 0.5041645184774498, "lb_loss": 0.9999999995503215 } }, { "epoch": 2, "checkpoint_type": "epoch", "metrics": { "loss": 0.5141700236596057, "ce_loss": 0.5041700331963489, "lb_loss": 0.9999999995568793, "training_bytes": 71629742 }, "cumulative_training_bytes": 143259470, "training_bytes_this_epoch": 71629742 }, { "checkpoint_type": "bytes", "bytes_threshold": 144000000, "cumulative_training_bytes": 144004798, "metrics": { "loss": 0.5033345638759552, "ce_loss": 0.49333457341269843, "lb_loss": 1.000000001892211 } }, { "checkpoint_type": "bytes", "bytes_threshold": 145000000, "cumulative_training_bytes": 145004453, "metrics": { "loss": 0.5023530095310534, "ce_loss": 0.4923530190677966, "lb_loss": 1.0000000127291275 } }, { "checkpoint_type": "bytes", "bytes_threshold": 146000000, "cumulative_training_bytes": 146003901, "metrics": { "loss": 0.5018044422412741, "ce_loss": 0.49180445177801724, "lb_loss": 1.0000000069367474 } }, { "checkpoint_type": "bytes", "bytes_threshold": 147000000, "cumulative_training_bytes": 147004659, "metrics": { "loss": 0.5011353330777908, "ce_loss": 0.49113534261453395, "lb_loss": 1.0000000053672429 } }, { "checkpoint_type": "bytes", "bytes_threshold": 148000000, "cumulative_training_bytes": 148005351, "metrics": { "loss": 0.5010307144345786, "ce_loss": 0.4910307239713217, "lb_loss": 1.0000000031957603 } }, { "checkpoint_type": "bytes", "bytes_threshold": 149000000, "cumulative_training_bytes": 149005165, "metrics": { "loss": 0.5008639515858109, "ce_loss": 0.4908639611225541, "lb_loss": 1.0000000036217034 } }, { "checkpoint_type": "bytes", "bytes_threshold": 150000000, "cumulative_training_bytes": 150005636, "metrics": { "loss": 0.5008031702878183, "ce_loss": 0.4908031798245614, "lb_loss": 1.0000000014639738 } }, { "checkpoint_type": "bytes", "bytes_threshold": 151000000, "cumulative_training_bytes": 151000050, "metrics": { "loss": 0.5007569906543884, "ce_loss": 0.4907570001911315, "lb_loss": 1.000000002096188 } }, { "checkpoint_type": "bytes", "bytes_threshold": 152000000, "cumulative_training_bytes": 152000047, "metrics": { "loss": 0.500738184437529, "ce_loss": 0.49073819397427215, "lb_loss": 1.0000000011703012 } }, { "checkpoint_type": "bytes", "bytes_threshold": 153000000, "cumulative_training_bytes": 153000782, "metrics": { "loss": 0.5005915286619202, "ce_loss": 0.4905915381986634, "lb_loss": 1.0000000017019552 } }, { "checkpoint_type": "bytes", "bytes_threshold": 154000000, "cumulative_training_bytes": 154000682, "metrics": { "loss": 0.50057656587648, "ce_loss": 0.49057657541322314, "lb_loss": 1.0000000016420012 } }, { "checkpoint_type": "bytes", "bytes_threshold": 155000000, "cumulative_training_bytes": 155000553, "metrics": { "loss": 0.5005808876406762, "ce_loss": 0.4905808971774194, "lb_loss": 1.000000001982816 } }, { "checkpoint_type": "bytes", "bytes_threshold": 156000000, "cumulative_training_bytes": 156000853, "metrics": { "loss": 0.5005047004609345, "ce_loss": 0.49050470999767765, "lb_loss": 1.000000002159388 } }, { "checkpoint_type": "bytes", "bytes_threshold": 157000000, "cumulative_training_bytes": 157005344, "metrics": { "loss": 0.5004092470818535, "ce_loss": 0.49040925661859663, "lb_loss": 1.0000000026171647 } }, { "checkpoint_type": "bytes", "bytes_threshold": 158000000, "cumulative_training_bytes": 158005059, "metrics": { "loss": 0.5003464428609294, "ce_loss": 0.49034645239767255, "lb_loss": 1.0000000022244109 } }, { "checkpoint_type": "bytes", "bytes_threshold": 159000000, "cumulative_training_bytes": 159004522, "metrics": { "loss": 0.5003165714008743, "ce_loss": 0.49031658093761743, "lb_loss": 1.0000000018591453 } }, { "checkpoint_type": "bytes", "bytes_threshold": 160000000, "cumulative_training_bytes": 160005391, "metrics": { "loss": 0.5002716335727975, "ce_loss": 0.49027164310954063, "lb_loss": 1.0000000016217518 } }, { "checkpoint_type": "bytes", "bytes_threshold": 161000000, "cumulative_training_bytes": 161005358, "metrics": { "loss": 0.5002122226656577, "ce_loss": 0.4902122322024008, "lb_loss": 1.0000000014111137 } }, { "checkpoint_type": "bytes", "bytes_threshold": 162000000, "cumulative_training_bytes": 162005279, "metrics": { "loss": 0.5003447219578907, "ce_loss": 0.4903447314946338, "lb_loss": 1.0000000011100612 } }, { "checkpoint_type": "bytes", "bytes_threshold": 163000000, "cumulative_training_bytes": 163004202, "metrics": { "loss": 0.5003701535963104, "ce_loss": 0.49037016313305365, "lb_loss": 1.0000000013574926 } }, { "checkpoint_type": "bytes", "bytes_threshold": 164000000, "cumulative_training_bytes": 164003475, "metrics": { "loss": 0.5003257267011347, "ce_loss": 0.4903257362378779, "lb_loss": 1.0000000012580559 } }, { "checkpoint_type": "bytes", "bytes_threshold": 165000000, "cumulative_training_bytes": 165003971, "metrics": { "loss": 0.5002354283884269, "ce_loss": 0.49023543792517005, "lb_loss": 1.0000000012650783 } }, { "checkpoint_type": "bytes", "bytes_threshold": 166000000, "cumulative_training_bytes": 166003094, "metrics": { "loss": 0.5001840638568572, "ce_loss": 0.49018407339360043, "lb_loss": 1.0000000017211539 } }, { "checkpoint_type": "bytes", "bytes_threshold": 167000000, "cumulative_training_bytes": 167003137, "metrics": { "loss": 0.5001199910862322, "ce_loss": 0.49012000062297534, "lb_loss": 1.0000000016635235 } }, { "checkpoint_type": "bytes", "bytes_threshold": 168000000, "cumulative_training_bytes": 168003662, "metrics": { "loss": 0.50006763528033, "ce_loss": 0.49006764481707316, "lb_loss": 1.0000000019241098 } }, { "checkpoint_type": "bytes", "bytes_threshold": 169000000, "cumulative_training_bytes": 169003714, "metrics": { "loss": 0.500026528902696, "ce_loss": 0.4900265384394392, "lb_loss": 1.0000000021370545 } }, { "checkpoint_type": "bytes", "bytes_threshold": 170000000, "cumulative_training_bytes": 170005211, "metrics": { "loss": 0.4999621378636993, "ce_loss": 0.48996214740044247, "lb_loss": 1.0000000022153939 } }, { "checkpoint_type": "bytes", "bytes_threshold": 171000000, "cumulative_training_bytes": 171005841, "metrics": { "loss": 0.49993529449929863, "ce_loss": 0.4899353040360418, "lb_loss": 1.0000000024406253 } }, { "checkpoint_type": "bytes", "bytes_threshold": 172000000, "cumulative_training_bytes": 172005501, "metrics": { "loss": 0.49990348405629925, "ce_loss": 0.4899034935930424, "lb_loss": 1.0000000021226025 } }, { "checkpoint_type": "bytes", "bytes_threshold": 173000000, "cumulative_training_bytes": 173005085, "metrics": { "loss": 0.49985011230530974, "ce_loss": 0.4898501218420529, "lb_loss": 1.000000002193527 } }, { "checkpoint_type": "bytes", "bytes_threshold": 174000000, "cumulative_training_bytes": 174000244, "metrics": { "loss": 0.4997937374555571, "ce_loss": 0.48979374699230027, "lb_loss": 1.0000000023864806 } }, { "checkpoint_type": "bytes", "bytes_threshold": 175000000, "cumulative_training_bytes": 175005818, "metrics": { "loss": 0.49977711942411424, "ce_loss": 0.4897771289608574, "lb_loss": 1.000000002455289 } }, { "checkpoint_type": "bytes", "bytes_threshold": 176000000, "cumulative_training_bytes": 176000495, "metrics": { "loss": 0.4997098139089463, "ce_loss": 0.4897098234456895, "lb_loss": 1.0000000023053306 } }, { "checkpoint_type": "bytes", "bytes_threshold": 177000000, "cumulative_training_bytes": 177000876, "metrics": { "loss": 0.49965548414968264, "ce_loss": 0.4896554936864258, "lb_loss": 1.00000000232063 } }, { "checkpoint_type": "bytes", "bytes_threshold": 178000000, "cumulative_training_bytes": 178000666, "metrics": { "loss": 0.49961792182077686, "ce_loss": 0.48961793135752, "lb_loss": 1.0000000020812387 } }, { "checkpoint_type": "bytes", "bytes_threshold": 179000000, "cumulative_training_bytes": 179000024, "metrics": { "loss": 0.4995814913945482, "ce_loss": 0.4895815009312914, "lb_loss": 1.00000000217103 } }, { "checkpoint_type": "bytes", "bytes_threshold": 180000000, "cumulative_training_bytes": 180005268, "metrics": { "loss": 0.49955155799546297, "ce_loss": 0.48955156753220613, "lb_loss": 1.0000000022171776 } }, { "checkpoint_type": "bytes", "bytes_threshold": 181000000, "cumulative_training_bytes": 181005588, "metrics": { "loss": 0.49950381894342616, "ce_loss": 0.4895038284801693, "lb_loss": 1.0000000026256317 } }, { "checkpoint_type": "bytes", "bytes_threshold": 182000000, "cumulative_training_bytes": 182000786, "metrics": { "loss": 0.4994432111177551, "ce_loss": 0.48944322065449825, "lb_loss": 1.0000000024125906 } }, { "checkpoint_type": "bytes", "bytes_threshold": 183000000, "cumulative_training_bytes": 183000940, "metrics": { "loss": 0.49941193581763443, "ce_loss": 0.4894119453543776, "lb_loss": 1.0000000023341307 } }, { "checkpoint_type": "bytes", "bytes_threshold": 184000000, "cumulative_training_bytes": 184001325, "metrics": { "loss": 0.49934361564481095, "ce_loss": 0.4893436251815541, "lb_loss": 1.00000000238938 } }, { "checkpoint_type": "bytes", "bytes_threshold": 185000000, "cumulative_training_bytes": 185001001, "metrics": { "loss": 0.4993054266519441, "ce_loss": 0.48930543618868727, "lb_loss": 1.0000000024081832 } }, { "checkpoint_type": "bytes", "bytes_threshold": 186000000, "cumulative_training_bytes": 186000105, "metrics": { "loss": 0.49926686148291627, "ce_loss": 0.48926687101965943, "lb_loss": 1.0000000024673665 } }, { "checkpoint_type": "bytes", "bytes_threshold": 187000000, "cumulative_training_bytes": 187005589, "metrics": { "loss": 0.4993321712423722, "ce_loss": 0.4893321807791154, "lb_loss": 1.0000000023138824 } }, { "checkpoint_type": "bytes", "bytes_threshold": 188000000, "cumulative_training_bytes": 188000312, "metrics": { "loss": 0.49934911756284683, "ce_loss": 0.48934912709959, "lb_loss": 1.000000002364951 } }, { "checkpoint_type": "bytes", "bytes_threshold": 189000000, "cumulative_training_bytes": 189005692, "metrics": { "loss": 0.49930618266025595, "ce_loss": 0.4893061921969991, "lb_loss": 1.0000000023283666 } }, { "checkpoint_type": "bytes", "bytes_threshold": 190000000, "cumulative_training_bytes": 190000372, "metrics": { "loss": 0.4992524868235556, "ce_loss": 0.48925249636029877, "lb_loss": 1.0000000023543043 } }, { "checkpoint_type": "bytes", "bytes_threshold": 191000000, "cumulative_training_bytes": 191000957, "metrics": { "loss": 0.4992190696960283, "ce_loss": 0.48921907923277147, "lb_loss": 1.0000000023862543 } }, { "checkpoint_type": "bytes", "bytes_threshold": 192000000, "cumulative_training_bytes": 192000891, "metrics": { "loss": 0.49916283798055683, "ce_loss": 0.4891628475173, "lb_loss": 1.0000000025326727 } }, { "checkpoint_type": "bytes", "bytes_threshold": 193000000, "cumulative_training_bytes": 193001130, "metrics": { "loss": 0.4991221111954719, "ce_loss": 0.48912212073221506, "lb_loss": 1.0000000026519316 } }, { "checkpoint_type": "bytes", "bytes_threshold": 194000000, "cumulative_training_bytes": 194000556, "metrics": { "loss": 0.49906408485101195, "ce_loss": 0.4890640943877551, "lb_loss": 1.000000002599666 } }, { "checkpoint_type": "bytes", "bytes_threshold": 195000000, "cumulative_training_bytes": 195001954, "metrics": { "loss": 0.49900763141705373, "ce_loss": 0.4890076409537969, "lb_loss": 1.000000002699387 } }, { "checkpoint_type": "bytes", "bytes_threshold": 196000000, "cumulative_training_bytes": 196000689, "metrics": { "loss": 0.4989594545185693, "ce_loss": 0.4889594640553125, "lb_loss": 1.000000002741827 } }, { "checkpoint_type": "bytes", "bytes_threshold": 197000000, "cumulative_training_bytes": 197001752, "metrics": { "loss": 0.49890855444971355, "ce_loss": 0.4889085639864567, "lb_loss": 1.0000000027695617 } }, { "checkpoint_type": "bytes", "bytes_threshold": 198000000, "cumulative_training_bytes": 198002474, "metrics": { "loss": 0.49885149131451617, "ce_loss": 0.48885150085125934, "lb_loss": 1.000000002751182 } }, { "checkpoint_type": "bytes", "bytes_threshold": 199000000, "cumulative_training_bytes": 199002221, "metrics": { "loss": 0.49881285537833114, "ce_loss": 0.4888128649150743, "lb_loss": 1.0000000026512044 } }, { "checkpoint_type": "bytes", "bytes_threshold": 200000000, "cumulative_training_bytes": 200000993, "metrics": { "loss": 0.49877435920608715, "ce_loss": 0.4887743687428303, "lb_loss": 1.0000000026977178 } }, { "checkpoint_type": "bytes", "bytes_threshold": 201000000, "cumulative_training_bytes": 201000542, "metrics": { "loss": 0.4987275889721726, "ce_loss": 0.4887275985089158, "lb_loss": 1.0000000027365117 } }, { "checkpoint_type": "bytes", "bytes_threshold": 202000000, "cumulative_training_bytes": 202000266, "metrics": { "loss": 0.49867375425141036, "ce_loss": 0.4886737637881535, "lb_loss": 1.0000000025758429 } }, { "checkpoint_type": "bytes", "bytes_threshold": 203000000, "cumulative_training_bytes": 203004847, "metrics": { "loss": 0.4986276846422209, "ce_loss": 0.48862769417896407, "lb_loss": 1.0000000024321198 } }, { "checkpoint_type": "bytes", "bytes_threshold": 204000000, "cumulative_training_bytes": 204000174, "metrics": { "loss": 0.4986164967954536, "ce_loss": 0.4886165063321968, "lb_loss": 1.0000000022994096 } }, { "checkpoint_type": "bytes", "bytes_threshold": 205000000, "cumulative_training_bytes": 205000569, "metrics": { "loss": 0.4985904758954976, "ce_loss": 0.48859048543224076, "lb_loss": 1.000000002336429 } }, { "checkpoint_type": "bytes", "bytes_threshold": 206000000, "cumulative_training_bytes": 206000959, "metrics": { "loss": 0.4985317591843735, "ce_loss": 0.48853176872111664, "lb_loss": 1.0000000022767028 } }, { "checkpoint_type": "bytes", "bytes_threshold": 207000000, "cumulative_training_bytes": 207001002, "metrics": { "loss": 0.4984893900640738, "ce_loss": 0.48848939960081694, "lb_loss": 1.0000000022465174 } }, { "checkpoint_type": "bytes", "bytes_threshold": 208000000, "cumulative_training_bytes": 208000719, "metrics": { "loss": 0.49843654790773173, "ce_loss": 0.4884365574444749, "lb_loss": 1.000000002075621 } }, { "checkpoint_type": "bytes", "bytes_threshold": 209000000, "cumulative_training_bytes": 209005520, "metrics": { "loss": 0.49840692531385533, "ce_loss": 0.4884069348505985, "lb_loss": 1.0000000021457887 } }, { "checkpoint_type": "bytes", "bytes_threshold": 210000000, "cumulative_training_bytes": 210005103, "metrics": { "loss": 0.49835361690385965, "ce_loss": 0.4883536264406028, "lb_loss": 1.0000000021611968 } }, { "checkpoint_type": "bytes", "bytes_threshold": 211000000, "cumulative_training_bytes": 211004535, "metrics": { "loss": 0.49829659387840225, "ce_loss": 0.4882966034151454, "lb_loss": 1.0000000022542415 } }, { "checkpoint_type": "bytes", "bytes_threshold": 212000000, "cumulative_training_bytes": 212003237, "metrics": { "loss": 0.49826476548907883, "ce_loss": 0.488264775025822, "lb_loss": 1.0000000022368416 } }, { "checkpoint_type": "bytes", "bytes_threshold": 213000000, "cumulative_training_bytes": 213003872, "metrics": { "loss": 0.49821131437943567, "ce_loss": 0.48821132391617883, "lb_loss": 1.0000000022199405 } }, { "checkpoint_type": "bytes", "bytes_threshold": 214000000, "cumulative_training_bytes": 214003977, "metrics": { "loss": 0.4981701560704833, "ce_loss": 0.4881701656072265, "lb_loss": 1.000000002153664 } }, { "epoch": 3, "checkpoint_type": "epoch", "metrics": { "loss": 0.4981207118514115, "ce_loss": 0.48812072138815465, "lb_loss": 1.000000002058049, "training_bytes": 71629748 }, "cumulative_training_bytes": 214889218, "training_bytes_this_epoch": 71629748 }, { "checkpoint_type": "bytes", "bytes_threshold": 215000000, "cumulative_training_bytes": 215001580, "metrics": { "loss": 0.4880016352000989, "ce_loss": 0.4780016447368421, "lb_loss": 1.0000000313708657 } }, { "checkpoint_type": "bytes", "bytes_threshold": 216000000, "cumulative_training_bytes": 216001732, "metrics": { "loss": 0.4874559412611292, "ce_loss": 0.47745595079787234, "lb_loss": 1.0000000231443567 } }, { "checkpoint_type": "bytes", "bytes_threshold": 217000000, "cumulative_training_bytes": 217001957, "metrics": { "loss": 0.48738313121955934, "ce_loss": 0.4773831407563025, "lb_loss": 1.0000000128559037 } }, { "checkpoint_type": "bytes", "bytes_threshold": 218000000, "cumulative_training_bytes": 218001275, "metrics": { "loss": 0.4878064008720021, "ce_loss": 0.4778064104087453, "lb_loss": 1.0000000087253949 } }, { "checkpoint_type": "bytes", "bytes_threshold": 219000000, "cumulative_training_bytes": 219001184, "metrics": { "loss": 0.48801539064311295, "ce_loss": 0.4780154001798561, "lb_loss": 1.0000000052314868 } }, { "checkpoint_type": "bytes", "bytes_threshold": 220000000, "cumulative_training_bytes": 220001433, "metrics": { "loss": 0.4881019327375624, "ce_loss": 0.4781019422743056, "lb_loss": 1.000000007312607 } }, { "checkpoint_type": "bytes", "bytes_threshold": 221000000, "cumulative_training_bytes": 221002027, "metrics": { "loss": 0.4882320058311174, "ce_loss": 0.4782320153678606, "lb_loss": 1.00000000657786 } }, { "checkpoint_type": "bytes", "bytes_threshold": 222000000, "cumulative_training_bytes": 222000843, "metrics": { "loss": 0.4884359955589307, "ce_loss": 0.47843600509567386, "lb_loss": 1.0000000053059044 } }, { "checkpoint_type": "bytes", "bytes_threshold": 223000000, "cumulative_training_bytes": 223001364, "metrics": { "loss": 0.4885213138585887, "ce_loss": 0.4785213233953319, "lb_loss": 1.0000000031736973 } }, { "checkpoint_type": "bytes", "bytes_threshold": 224000000, "cumulative_training_bytes": 224001492, "metrics": { "loss": 0.48859297950546465, "ce_loss": 0.4785929890422078, "lb_loss": 1.0000000030189367 } }, { "checkpoint_type": "bytes", "bytes_threshold": 225000000, "cumulative_training_bytes": 225001545, "metrics": { "loss": 0.4886264715925722, "ce_loss": 0.4786264811293154, "lb_loss": 1.0000000019182302 } }, { "checkpoint_type": "bytes", "bytes_threshold": 226000000, "cumulative_training_bytes": 226001650, "metrics": { "loss": 0.4886456157960577, "ce_loss": 0.47864562533280086, "lb_loss": 1.0000000023486388 } }, { "checkpoint_type": "bytes", "bytes_threshold": 227000000, "cumulative_training_bytes": 227001584, "metrics": { "loss": 0.4887551046547566, "ce_loss": 0.47875511419149974, "lb_loss": 1.0000000021256175 } }, { "checkpoint_type": "bytes", "bytes_threshold": 228000000, "cumulative_training_bytes": 228001190, "metrics": { "loss": 0.4888778602173182, "ce_loss": 0.4788778697540614, "lb_loss": 1.000000001533152 } }, { "checkpoint_type": "bytes", "bytes_threshold": 229000000, "cumulative_training_bytes": 229001032, "metrics": { "loss": 0.4889627459664015, "ce_loss": 0.47896275550314465, "lb_loss": 1.0000000022742233 } }, { "checkpoint_type": "bytes", "bytes_threshold": 230000000, "cumulative_training_bytes": 230000968, "metrics": { "loss": 0.48901880898322553, "ce_loss": 0.4790188185199687, "lb_loss": 1.000000001867021 } }, { "checkpoint_type": "bytes", "bytes_threshold": 231000000, "cumulative_training_bytes": 231000807, "metrics": { "loss": 0.48906002305598545, "ce_loss": 0.4790600325927286, "lb_loss": 1.0000000024953837 } }, { "checkpoint_type": "bytes", "bytes_threshold": 232000000, "cumulative_training_bytes": 232001935, "metrics": { "loss": 0.48905724971291104, "ce_loss": 0.4790572592496542, "lb_loss": 1.000000002040408 } }, { "checkpoint_type": "bytes", "bytes_threshold": 233000000, "cumulative_training_bytes": 233002684, "metrics": { "loss": 0.4890720104322212, "ce_loss": 0.47907201996896437, "lb_loss": 1.0000000021419506 } }, { "checkpoint_type": "bytes", "bytes_threshold": 234000000, "cumulative_training_bytes": 234002126, "metrics": { "loss": 0.4890822031180556, "ce_loss": 0.4790822126547988, "lb_loss": 1.0000000019191588 } }, { "checkpoint_type": "bytes", "bytes_threshold": 235000000, "cumulative_training_bytes": 235001865, "metrics": { "loss": 0.48909425532056783, "ce_loss": 0.479094264857311, "lb_loss": 1.0000000020517044 } }, { "checkpoint_type": "bytes", "bytes_threshold": 236000000, "cumulative_training_bytes": 236002486, "metrics": { "loss": 0.4891166612171806, "ce_loss": 0.4791166707539238, "lb_loss": 1.0000000017707658 } }, { "checkpoint_type": "bytes", "bytes_threshold": 237000000, "cumulative_training_bytes": 237003585, "metrics": { "loss": 0.4891103856666285, "ce_loss": 0.4791103952033717, "lb_loss": 1.0000000018661341 } }, { "checkpoint_type": "bytes", "bytes_threshold": 238000000, "cumulative_training_bytes": 238002820, "metrics": { "loss": 0.48916915728993376, "ce_loss": 0.47916916682667693, "lb_loss": 1.0000000021668867 } }, { "checkpoint_type": "bytes", "bytes_threshold": 239000000, "cumulative_training_bytes": 239002121, "metrics": { "loss": 0.48922257481908504, "ce_loss": 0.4792225843558282, "lb_loss": 1.0000000026328433 } }, { "checkpoint_type": "bytes", "bytes_threshold": 240000000, "cumulative_training_bytes": 240002063, "metrics": { "loss": 0.48926897682635767, "ce_loss": 0.47926898636310084, "lb_loss": 1.000000002373512 } }, { "checkpoint_type": "bytes", "bytes_threshold": 241000000, "cumulative_training_bytes": 241003283, "metrics": { "loss": 0.48924765007689835, "ce_loss": 0.4792476596136415, "lb_loss": 1.0000000022420963 } }, { "checkpoint_type": "bytes", "bytes_threshold": 242000000, "cumulative_training_bytes": 242002834, "metrics": { "loss": 0.4892743589158976, "ce_loss": 0.47927436845264076, "lb_loss": 1.000000002783805 } }, { "checkpoint_type": "bytes", "bytes_threshold": 243000000, "cumulative_training_bytes": 243002432, "metrics": { "loss": 0.48929176785222755, "ce_loss": 0.4792917773889707, "lb_loss": 1.0000000026471438 } }, { "checkpoint_type": "bytes", "bytes_threshold": 244000000, "cumulative_training_bytes": 244001977, "metrics": { "loss": 0.48928654407098043, "ce_loss": 0.4792865536077236, "lb_loss": 1.0000000030771503 } }, { "checkpoint_type": "bytes", "bytes_threshold": 245000000, "cumulative_training_bytes": 245003684, "metrics": { "loss": 0.4892567206656542, "ce_loss": 0.47925673020239734, "lb_loss": 1.0000000031506484 } }, { "checkpoint_type": "bytes", "bytes_threshold": 246000000, "cumulative_training_bytes": 246003783, "metrics": { "loss": 0.4892511018649305, "ce_loss": 0.47925111140167365, "lb_loss": 1.0000000031400698 } }, { "checkpoint_type": "bytes", "bytes_threshold": 247000000, "cumulative_training_bytes": 247004190, "metrics": { "loss": 0.48922172035546246, "ce_loss": 0.47922172989220563, "lb_loss": 1.0000000033827585 } }, { "checkpoint_type": "bytes", "bytes_threshold": 248000000, "cumulative_training_bytes": 248001380, "metrics": { "loss": 0.489257635432878, "ce_loss": 0.47925764496962114, "lb_loss": 1.0000000028438956 } }, { "checkpoint_type": "bytes", "bytes_threshold": 249000000, "cumulative_training_bytes": 249000805, "metrics": { "loss": 0.48925180637826116, "ce_loss": 0.4792518159150043, "lb_loss": 1.0000000031430722 } }, { "checkpoint_type": "bytes", "bytes_threshold": 250000000, "cumulative_training_bytes": 250001009, "metrics": { "loss": 0.4892397265181945, "ce_loss": 0.47923973605493764, "lb_loss": 1.000000003174093 } }, { "checkpoint_type": "bytes", "bytes_threshold": 251000000, "cumulative_training_bytes": 251000542, "metrics": { "loss": 0.4892279956451346, "ce_loss": 0.47922800518187775, "lb_loss": 1.0000000031740963 } }, { "checkpoint_type": "bytes", "bytes_threshold": 252000000, "cumulative_training_bytes": 252000674, "metrics": { "loss": 0.4892209452025744, "ce_loss": 0.4792209547393176, "lb_loss": 1.0000000032216159 } }, { "checkpoint_type": "bytes", "bytes_threshold": 253000000, "cumulative_training_bytes": 253005686, "metrics": { "loss": 0.4892111244278641, "ce_loss": 0.47921113396460724, "lb_loss": 1.0000000030718321 } }, { "checkpoint_type": "bytes", "bytes_threshold": 254000000, "cumulative_training_bytes": 254000399, "metrics": { "loss": 0.489192856386101, "ce_loss": 0.47919286592284416, "lb_loss": 1.0000000032642786 } }, { "checkpoint_type": "bytes", "bytes_threshold": 255000000, "cumulative_training_bytes": 255000527, "metrics": { "loss": 0.4891710748783623, "ce_loss": 0.4791710844151055, "lb_loss": 1.000000002980672 } }, { "checkpoint_type": "bytes", "bytes_threshold": 256000000, "cumulative_training_bytes": 256001300, "metrics": { "loss": 0.48916581381170243, "ce_loss": 0.4791658233484456, "lb_loss": 1.0000000028566993 } }, { "checkpoint_type": "bytes", "bytes_threshold": 257000000, "cumulative_training_bytes": 257002023, "metrics": { "loss": 0.4891462577633833, "ce_loss": 0.47914626730012644, "lb_loss": 1.0000000031154879 } }, { "checkpoint_type": "bytes", "bytes_threshold": 258000000, "cumulative_training_bytes": 258002271, "metrics": { "loss": 0.4892640542499711, "ce_loss": 0.47926406378671427, "lb_loss": 1.000000003092308 } }, { "checkpoint_type": "bytes", "bytes_threshold": 259000000, "cumulative_training_bytes": 259002781, "metrics": { "loss": 0.4893791298831093, "ce_loss": 0.47937913941985244, "lb_loss": 1.000000003134141 } }, { "checkpoint_type": "bytes", "bytes_threshold": 260000000, "cumulative_training_bytes": 260002747, "metrics": { "loss": 0.4893935482249305, "ce_loss": 0.47939355776167364, "lb_loss": 1.0000000030177587 } }, { "checkpoint_type": "bytes", "bytes_threshold": 261000000, "cumulative_training_bytes": 261003435, "metrics": { "loss": 0.48941761685873997, "ce_loss": 0.47941762639548313, "lb_loss": 1.0000000031435274 } }, { "checkpoint_type": "bytes", "bytes_threshold": 262000000, "cumulative_training_bytes": 262003338, "metrics": { "loss": 0.4894158878508479, "ce_loss": 0.47941589738759105, "lb_loss": 1.000000002949539 } }, { "checkpoint_type": "bytes", "bytes_threshold": 263000000, "cumulative_training_bytes": 263003784, "metrics": { "loss": 0.4895384178553365, "ce_loss": 0.4795384273920797, "lb_loss": 1.0000000027929368 } }, { "checkpoint_type": "bytes", "bytes_threshold": 264000000, "cumulative_training_bytes": 264003117, "metrics": { "loss": 0.4896001881289195, "ce_loss": 0.47960019766566264, "lb_loss": 1.0000000026283493 } }, { "checkpoint_type": "bytes", "bytes_threshold": 265000000, "cumulative_training_bytes": 265003414, "metrics": { "loss": 0.4895905376057766, "ce_loss": 0.47959054714251975, "lb_loss": 1.0000000025266345 } }, { "checkpoint_type": "bytes", "bytes_threshold": 266000000, "cumulative_training_bytes": 266003442, "metrics": { "loss": 0.48961065880372917, "ce_loss": 0.47961066834047233, "lb_loss": 1.000000002311595 } }, { "checkpoint_type": "bytes", "bytes_threshold": 267000000, "cumulative_training_bytes": 267003434, "metrics": { "loss": 0.4895792225371753, "ce_loss": 0.47957923207391845, "lb_loss": 1.000000002152183 } }, { "checkpoint_type": "bytes", "bytes_threshold": 268000000, "cumulative_training_bytes": 268003818, "metrics": { "loss": 0.48956441921771315, "ce_loss": 0.4795644287544563, "lb_loss": 1.0000000020120552 } }, { "checkpoint_type": "bytes", "bytes_threshold": 269000000, "cumulative_training_bytes": 269002978, "metrics": { "loss": 0.48955592950098237, "ce_loss": 0.47955593903772553, "lb_loss": 1.000000002202993 } }, { "checkpoint_type": "bytes", "bytes_threshold": 270000000, "cumulative_training_bytes": 270002483, "metrics": { "loss": 0.48952992356128133, "ce_loss": 0.4795299330980245, "lb_loss": 1.0000000023806022 } }, { "checkpoint_type": "bytes", "bytes_threshold": 271000000, "cumulative_training_bytes": 271002540, "metrics": { "loss": 0.4895038147409116, "ce_loss": 0.47950382427765476, "lb_loss": 1.0000000024261724 } }, { "checkpoint_type": "bytes", "bytes_threshold": 272000000, "cumulative_training_bytes": 272002993, "metrics": { "loss": 0.48948651203909604, "ce_loss": 0.4794865215758392, "lb_loss": 1.0000000023651656 } }, { "checkpoint_type": "bytes", "bytes_threshold": 273000000, "cumulative_training_bytes": 273003373, "metrics": { "loss": 0.48946483302002297, "ce_loss": 0.47946484255676614, "lb_loss": 1.000000002154531 } }, { "checkpoint_type": "bytes", "bytes_threshold": 274000000, "cumulative_training_bytes": 274002590, "metrics": { "loss": 0.4894640248101037, "ce_loss": 0.47946403434684687, "lb_loss": 1.0000000023090088 } }, { "checkpoint_type": "bytes", "bytes_threshold": 275000000, "cumulative_training_bytes": 275002224, "metrics": { "loss": 0.48944997785128713, "ce_loss": 0.4794499873880303, "lb_loss": 1.0000000022060582 } }, { "checkpoint_type": "bytes", "bytes_threshold": 276000000, "cumulative_training_bytes": 276003651, "metrics": { "loss": 0.4894235311415101, "ce_loss": 0.47942354067825327, "lb_loss": 1.0000000022276716 } }, { "checkpoint_type": "bytes", "bytes_threshold": 277000000, "cumulative_training_bytes": 277002727, "metrics": { "loss": 0.48940091304828115, "ce_loss": 0.4794009225850243, "lb_loss": 1.000000002265624 } }, { "checkpoint_type": "bytes", "bytes_threshold": 278000000, "cumulative_training_bytes": 278002905, "metrics": { "loss": 0.48939109742697334, "ce_loss": 0.4793911069637165, "lb_loss": 1.0000000021459012 } }, { "checkpoint_type": "bytes", "bytes_threshold": 279000000, "cumulative_training_bytes": 279002296, "metrics": { "loss": 0.48937239470875754, "ce_loss": 0.4793724042455007, "lb_loss": 1.0000000020409159 } }, { "checkpoint_type": "bytes", "bytes_threshold": 280000000, "cumulative_training_bytes": 280002829, "metrics": { "loss": 0.48934805423552147, "ce_loss": 0.47934806377226463, "lb_loss": 1.000000002193737 } }, { "checkpoint_type": "bytes", "bytes_threshold": 281000000, "cumulative_training_bytes": 281001549, "metrics": { "loss": 0.4893414064325578, "ce_loss": 0.47934141596930097, "lb_loss": 1.0000000022459103 } }, { "checkpoint_type": "bytes", "bytes_threshold": 282000000, "cumulative_training_bytes": 282001567, "metrics": { "loss": 0.489328068624075, "ce_loss": 0.4793280781608182, "lb_loss": 1.0000000024594404 } }, { "checkpoint_type": "bytes", "bytes_threshold": 283000000, "cumulative_training_bytes": 283001333, "metrics": { "loss": 0.48931800692577093, "ce_loss": 0.4793180164625141, "lb_loss": 1.000000002594208 } }, { "checkpoint_type": "bytes", "bytes_threshold": 284000000, "cumulative_training_bytes": 284000025, "metrics": { "loss": 0.48929820322010614, "ce_loss": 0.4792982127568493, "lb_loss": 1.0000000026281157 } }, { "checkpoint_type": "bytes", "bytes_threshold": 285000000, "cumulative_training_bytes": 285005735, "metrics": { "loss": 0.4892746146721176, "ce_loss": 0.47927462420886074, "lb_loss": 1.0000000025853828 } }, { "checkpoint_type": "bytes", "bytes_threshold": 286000000, "cumulative_training_bytes": 286000791, "metrics": { "loss": 0.48924303995152446, "ce_loss": 0.4792430494882676, "lb_loss": 1.0000000023756552 } }, { "epoch": 4, "checkpoint_type": "epoch", "metrics": { "loss": 0.4892372682077637, "ce_loss": 0.47923727774450686, "lb_loss": 1.0000000023534628, "training_bytes": 71629711 }, "cumulative_training_bytes": 286518929, "training_bytes_this_epoch": 71629711 }, { "checkpoint_type": "bytes", "bytes_threshold": 287000000, "cumulative_training_bytes": 287004047, "metrics": { "loss": 0.4806078410730129, "ce_loss": 0.4706078506097561, "lb_loss": 1.0000000225334633 } }, { "checkpoint_type": "bytes", "bytes_threshold": 288000000, "cumulative_training_bytes": 288004715, "metrics": { "loss": 0.4802595771166433, "ce_loss": 0.47025958665338646, "lb_loss": 1.0000000116359664 } }, { "checkpoint_type": "bytes", "bytes_threshold": 289000000, "cumulative_training_bytes": 289004562, "metrics": { "loss": 0.48041944730849495, "ce_loss": 0.4704194568452381, "lb_loss": 1.0000000120628447 } }, { "checkpoint_type": "bytes", "bytes_threshold": 290000000, "cumulative_training_bytes": 290004822, "metrics": { "loss": 0.48065669143948775, "ce_loss": 0.4706567009762309, "lb_loss": 1.000000007690922 } }, { "checkpoint_type": "bytes", "bytes_threshold": 291000000, "cumulative_training_bytes": 291005873, "metrics": { "loss": 0.4806928087350246, "ce_loss": 0.4706928182717678, "lb_loss": 1.0000000058189231 } }, { "checkpoint_type": "bytes", "bytes_threshold": 292000000, "cumulative_training_bytes": 292005364, "metrics": { "loss": 0.48091802242118564, "ce_loss": 0.4709180319579288, "lb_loss": 1.0000000037936074 } }, { "checkpoint_type": "bytes", "bytes_threshold": 293000000, "cumulative_training_bytes": 293004963, "metrics": { "loss": 0.4810613071831473, "ce_loss": 0.4710613167198905, "lb_loss": 1.000000003317412 } }, { "checkpoint_type": "bytes", "bytes_threshold": 294000000, "cumulative_training_bytes": 294005549, "metrics": { "loss": 0.48113079579922524, "ce_loss": 0.4711308053359684, "lb_loss": 1.0000000031569258 } }, { "checkpoint_type": "bytes", "bytes_threshold": 295000000, "cumulative_training_bytes": 295000539, "metrics": { "loss": 0.48119787078426174, "ce_loss": 0.4711978803210049, "lb_loss": 1.0000000028700073 } }, { "checkpoint_type": "bytes", "bytes_threshold": 296000000, "cumulative_training_bytes": 296005546, "metrics": { "loss": 0.48143782187311335, "ce_loss": 0.4714378314098565, "lb_loss": 1.0000000027143725 } }, { "checkpoint_type": "bytes", "bytes_threshold": 297000000, "cumulative_training_bytes": 297000097, "metrics": { "loss": 0.4815578144327656, "ce_loss": 0.47155782396950874, "lb_loss": 1.000000002692474 } }, { "checkpoint_type": "bytes", "bytes_threshold": 298000000, "cumulative_training_bytes": 298005559, "metrics": { "loss": 0.48169829352868704, "ce_loss": 0.4716983030654302, "lb_loss": 1.0000000024873654 } }, { "checkpoint_type": "bytes", "bytes_threshold": 299000000, "cumulative_training_bytes": 299000334, "metrics": { "loss": 0.48176441524988556, "ce_loss": 0.4717644247866287, "lb_loss": 1.0000000026848939 } }, { "checkpoint_type": "bytes", "bytes_threshold": 300000000, "cumulative_training_bytes": 300001103, "metrics": { "loss": 0.48185715683726915, "ce_loss": 0.4718571663740123, "lb_loss": 1.0000000018315738 } }, { "checkpoint_type": "bytes", "bytes_threshold": 301000000, "cumulative_training_bytes": 301002320, "metrics": { "loss": 0.48188760156869204, "ce_loss": 0.4718876111054352, "lb_loss": 1.0000000009743302 } }, { "checkpoint_type": "bytes", "bytes_threshold": 302000000, "cumulative_training_bytes": 302002159, "metrics": { "loss": 0.4819753344999541, "ce_loss": 0.4719753440366973, "lb_loss": 1.0000000013442945 } }, { "checkpoint_type": "bytes", "bytes_threshold": 303000000, "cumulative_training_bytes": 303002056, "metrics": { "loss": 0.4820222638025746, "ce_loss": 0.4720222733393178, "lb_loss": 1.0000000014981418 } }, { "checkpoint_type": "bytes", "bytes_threshold": 304000000, "cumulative_training_bytes": 304001329, "metrics": { "loss": 0.4820512609862765, "ce_loss": 0.47205127052301965, "lb_loss": 1.0000000014931427 } }, { "checkpoint_type": "bytes", "bytes_threshold": 305000000, "cumulative_training_bytes": 305002202, "metrics": { "loss": 0.4821052628375764, "ce_loss": 0.47210527237431954, "lb_loss": 1.0000000020230844 } }, { "checkpoint_type": "bytes", "bytes_threshold": 306000000, "cumulative_training_bytes": 306001200, "metrics": { "loss": 0.48219109769290447, "ce_loss": 0.47219110722964763, "lb_loss": 1.0000000016476376 } }, { "checkpoint_type": "bytes", "bytes_threshold": 307000000, "cumulative_training_bytes": 307001550, "metrics": { "loss": 0.48222227538813406, "ce_loss": 0.4722222849248772, "lb_loss": 1.0000000016360708 } }, { "checkpoint_type": "bytes", "bytes_threshold": 308000000, "cumulative_training_bytes": 308001611, "metrics": { "loss": 0.4822451695266177, "ce_loss": 0.4722451790633609, "lb_loss": 1.0000000020853417 } }, { "checkpoint_type": "bytes", "bytes_threshold": 309000000, "cumulative_training_bytes": 309000993, "metrics": { "loss": 0.48228710549352793, "ce_loss": 0.4722871150302711, "lb_loss": 1.000000001819989 } }, { "checkpoint_type": "bytes", "bytes_threshold": 310000000, "cumulative_training_bytes": 310002159, "metrics": { "loss": 0.48228461511673465, "ce_loss": 0.4722846246534778, "lb_loss": 1.0000000019978372 } }, { "checkpoint_type": "bytes", "bytes_threshold": 311000000, "cumulative_training_bytes": 311002120, "metrics": { "loss": 0.48233567679997424, "ce_loss": 0.4723356863367174, "lb_loss": 1.0000000019162238 } }, { "checkpoint_type": "bytes", "bytes_threshold": 312000000, "cumulative_training_bytes": 312002300, "metrics": { "loss": 0.48235052605313145, "ce_loss": 0.4723505355898746, "lb_loss": 1.0000000015641721 } }, { "checkpoint_type": "bytes", "bytes_threshold": 313000000, "cumulative_training_bytes": 313000722, "metrics": { "loss": 0.48237080051912273, "ce_loss": 0.4723708100558659, "lb_loss": 1.0000000015850175 } }, { "checkpoint_type": "bytes", "bytes_threshold": 314000000, "cumulative_training_bytes": 314001059, "metrics": { "loss": 0.48239464620184425, "ce_loss": 0.4723946557385874, "lb_loss": 1.000000001732693 } }, { "checkpoint_type": "bytes", "bytes_threshold": 315000000, "cumulative_training_bytes": 315000576, "metrics": { "loss": 0.48241925209841163, "ce_loss": 0.4724192616351548, "lb_loss": 1.0000000016842368 } }, { "checkpoint_type": "bytes", "bytes_threshold": 316000000, "cumulative_training_bytes": 316000807, "metrics": { "loss": 0.4824112177565126, "ce_loss": 0.47241122729325574, "lb_loss": 1.000000001806564 } }, { "checkpoint_type": "bytes", "bytes_threshold": 317000000, "cumulative_training_bytes": 317000909, "metrics": { "loss": 0.4824116732675667, "ce_loss": 0.47241168280430984, "lb_loss": 1.0000000018282924 } }, { "checkpoint_type": "bytes", "bytes_threshold": 318000000, "cumulative_training_bytes": 318000322, "metrics": { "loss": 0.48244807845667786, "ce_loss": 0.47244808799342103, "lb_loss": 1.0000000013444656 } }, { "checkpoint_type": "bytes", "bytes_threshold": 319000000, "cumulative_training_bytes": 319001281, "metrics": { "loss": 0.4824737020910579, "ce_loss": 0.47247371162780105, "lb_loss": 1.0000000015745443 } }, { "checkpoint_type": "bytes", "bytes_threshold": 320000000, "cumulative_training_bytes": 320000836, "metrics": { "loss": 0.48250469895123843, "ce_loss": 0.4725047084879816, "lb_loss": 1.000000001948897 } }, { "checkpoint_type": "bytes", "bytes_threshold": 321000000, "cumulative_training_bytes": 321001661, "metrics": { "loss": 0.4825023903903205, "ce_loss": 0.47250239992706367, "lb_loss": 1.0000000019230604 } }, { "checkpoint_type": "bytes", "bytes_threshold": 322000000, "cumulative_training_bytes": 322001730, "metrics": { "loss": 0.4825008634410754, "ce_loss": 0.47250087297781856, "lb_loss": 1.0000000020378506 } }, { "checkpoint_type": "bytes", "bytes_threshold": 323000000, "cumulative_training_bytes": 323000981, "metrics": { "loss": 0.482539654899591, "ce_loss": 0.47253966443633416, "lb_loss": 1.0000000021560156 } }, { "checkpoint_type": "bytes", "bytes_threshold": 324000000, "cumulative_training_bytes": 324002180, "metrics": { "loss": 0.4825421488544788, "ce_loss": 0.472542158391222, "lb_loss": 1.0000000021737723 } }, { "checkpoint_type": "bytes", "bytes_threshold": 325000000, "cumulative_training_bytes": 325001770, "metrics": { "loss": 0.48258986484815614, "ce_loss": 0.4725898743848993, "lb_loss": 1.000000002218103 } }, { "checkpoint_type": "bytes", "bytes_threshold": 326000000, "cumulative_training_bytes": 326001016, "metrics": { "loss": 0.4826266742724595, "ce_loss": 0.47262668380920264, "lb_loss": 1.0000000019296467 } }, { "checkpoint_type": "bytes", "bytes_threshold": 327000000, "cumulative_training_bytes": 327000727, "metrics": { "loss": 0.4826471043720421, "ce_loss": 0.4726471139087853, "lb_loss": 1.000000001925541 } }, { "checkpoint_type": "bytes", "bytes_threshold": 328000000, "cumulative_training_bytes": 328001475, "metrics": { "loss": 0.4826450956701042, "ce_loss": 0.4726451052068474, "lb_loss": 1.0000000018791193 } }, { "checkpoint_type": "bytes", "bytes_threshold": 329000000, "cumulative_training_bytes": 329000597, "metrics": { "loss": 0.4826633140459285, "ce_loss": 0.47266332358267166, "lb_loss": 1.0000000018099753 } }, { "checkpoint_type": "bytes", "bytes_threshold": 330000000, "cumulative_training_bytes": 330000395, "metrics": { "loss": 0.48268707368318065, "ce_loss": 0.4726870832199238, "lb_loss": 1.0000000017034534 } }, { "checkpoint_type": "bytes", "bytes_threshold": 331000000, "cumulative_training_bytes": 331000366, "metrics": { "loss": 0.48268638047922063, "ce_loss": 0.4726863900159638, "lb_loss": 1.0000000016889437 } }, { "checkpoint_type": "bytes", "bytes_threshold": 332000000, "cumulative_training_bytes": 332000600, "metrics": { "loss": 0.4826902918228717, "ce_loss": 0.4726903013596149, "lb_loss": 1.0000000017681316 } }, { "checkpoint_type": "bytes", "bytes_threshold": 333000000, "cumulative_training_bytes": 333000847, "metrics": { "loss": 0.4826878186777699, "ce_loss": 0.47268782821451305, "lb_loss": 1.0000000019273814 } }, { "checkpoint_type": "bytes", "bytes_threshold": 334000000, "cumulative_training_bytes": 334000366, "metrics": { "loss": 0.4826988372821751, "ce_loss": 0.4726988468189183, "lb_loss": 1.0000000019016437 } }, { "checkpoint_type": "bytes", "bytes_threshold": 335000000, "cumulative_training_bytes": 335005476, "metrics": { "loss": 0.4827082030273281, "ce_loss": 0.47270821256407125, "lb_loss": 1.0000000017312551 } }, { "checkpoint_type": "bytes", "bytes_threshold": 336000000, "cumulative_training_bytes": 336005120, "metrics": { "loss": 0.4827010808315457, "ce_loss": 0.4727010903682889, "lb_loss": 1.0000000015109631 } }, { "checkpoint_type": "bytes", "bytes_threshold": 337000000, "cumulative_training_bytes": 337004532, "metrics": { "loss": 0.48272331332132057, "ce_loss": 0.47272332285806373, "lb_loss": 1.0000000015718524 } }, { "checkpoint_type": "bytes", "bytes_threshold": 338000000, "cumulative_training_bytes": 338005247, "metrics": { "loss": 0.4827238063321225, "ce_loss": 0.47272381586886564, "lb_loss": 1.0000000015961248 } }, { "checkpoint_type": "bytes", "bytes_threshold": 339000000, "cumulative_training_bytes": 339000096, "metrics": { "loss": 0.4827229669121237, "ce_loss": 0.47272297644886685, "lb_loss": 1.0000000014314792 } }, { "checkpoint_type": "bytes", "bytes_threshold": 340000000, "cumulative_training_bytes": 340005646, "metrics": { "loss": 0.48274137501907055, "ce_loss": 0.4727413845558137, "lb_loss": 1.0000000013122385 } }, { "checkpoint_type": "bytes", "bytes_threshold": 341000000, "cumulative_training_bytes": 341005395, "metrics": { "loss": 0.4827476606277877, "ce_loss": 0.47274767016453084, "lb_loss": 1.00000000132052 } }, { "checkpoint_type": "bytes", "bytes_threshold": 342000000, "cumulative_training_bytes": 342000175, "metrics": { "loss": 0.482743314508692, "ce_loss": 0.4727433240454352, "lb_loss": 1.00000000118243 } }, { "checkpoint_type": "bytes", "bytes_threshold": 343000000, "cumulative_training_bytes": 343000194, "metrics": { "loss": 0.48274443290694463, "ce_loss": 0.4727444424436878, "lb_loss": 1.0000000013987889 } }, { "checkpoint_type": "bytes", "bytes_threshold": 344000000, "cumulative_training_bytes": 344000175, "metrics": { "loss": 0.48273867625180944, "ce_loss": 0.4727386857885526, "lb_loss": 1.0000000012946861 } }, { "checkpoint_type": "bytes", "bytes_threshold": 345000000, "cumulative_training_bytes": 345000308, "metrics": { "loss": 0.4827433929599684, "ce_loss": 0.47274340249671154, "lb_loss": 1.0000000012122365 } }, { "checkpoint_type": "bytes", "bytes_threshold": 346000000, "cumulative_training_bytes": 346005463, "metrics": { "loss": 0.4828590717076615, "ce_loss": 0.4728590812444047, "lb_loss": 1.0000000011858081 } }, { "checkpoint_type": "bytes", "bytes_threshold": 347000000, "cumulative_training_bytes": 347004643, "metrics": { "loss": 0.48290444134126503, "ce_loss": 0.4729044508780082, "lb_loss": 1.000000001125386 } }, { "checkpoint_type": "bytes", "bytes_threshold": 348000000, "cumulative_training_bytes": 348004692, "metrics": { "loss": 0.48292070460289693, "ce_loss": 0.4729207141396401, "lb_loss": 1.0000000011644445 } }, { "checkpoint_type": "bytes", "bytes_threshold": 349000000, "cumulative_training_bytes": 349004119, "metrics": { "loss": 0.48292387037566215, "ce_loss": 0.4729238799124053, "lb_loss": 1.0000000013490067 } }, { "checkpoint_type": "bytes", "bytes_threshold": 350000000, "cumulative_training_bytes": 350003796, "metrics": { "loss": 0.48291546779106, "ce_loss": 0.47291547732780315, "lb_loss": 1.0000000011722043 } }, { "checkpoint_type": "bytes", "bytes_threshold": 351000000, "cumulative_training_bytes": 351004186, "metrics": { "loss": 0.48290248690755855, "ce_loss": 0.4729024964443017, "lb_loss": 1.0000000011923116 } }, { "checkpoint_type": "bytes", "bytes_threshold": 352000000, "cumulative_training_bytes": 352003413, "metrics": { "loss": 0.48291019789300293, "ce_loss": 0.4729102074297461, "lb_loss": 1.000000001454166 } }, { "checkpoint_type": "bytes", "bytes_threshold": 353000000, "cumulative_training_bytes": 353003199, "metrics": { "loss": 0.4829023801036983, "ce_loss": 0.47290238964044146, "lb_loss": 1.000000001453513 } }, { "checkpoint_type": "bytes", "bytes_threshold": 354000000, "cumulative_training_bytes": 354003719, "metrics": { "loss": 0.48289616401652297, "ce_loss": 0.47289617355326613, "lb_loss": 1.0000000015103676 } }, { "checkpoint_type": "bytes", "bytes_threshold": 355000000, "cumulative_training_bytes": 355004300, "metrics": { "loss": 0.48289350448174656, "ce_loss": 0.4728935140184897, "lb_loss": 1.0000000015655617 } }, { "checkpoint_type": "bytes", "bytes_threshold": 356000000, "cumulative_training_bytes": 356004592, "metrics": { "loss": 0.48288726240185853, "ce_loss": 0.4728872719386017, "lb_loss": 1.0000000014871975 } }, { "checkpoint_type": "bytes", "bytes_threshold": 357000000, "cumulative_training_bytes": 357003308, "metrics": { "loss": 0.4828915270964838, "ce_loss": 0.47289153663322697, "lb_loss": 1.0000000015261432 } }, { "checkpoint_type": "bytes", "bytes_threshold": 358000000, "cumulative_training_bytes": 358002795, "metrics": { "loss": 0.4828872656784708, "ce_loss": 0.47288727521521395, "lb_loss": 1.000000001662674 } }, { "epoch": 5, "checkpoint_type": "epoch", "metrics": { "loss": 0.48288743393137185, "ce_loss": 0.472887443468115, "lb_loss": 1.0000000016887818, "training_bytes": 71629698 }, "cumulative_training_bytes": 358148627, "training_bytes_this_epoch": 71629698 } ] }