{ "run_name": "run_large_20260116_074355", "timestamp": "20260116_074355", "phase": "large", "config": { "arch_layout": [ "m4", [ "T1m4", [ "T22" ], "m4T1" ], "m4" ], "d_model": [ 1024, 1024, 1536 ], "d_intermediate": [ 0, 2816, 4096 ], "vocab_size": 256, "ssm_cfg": { "chunk_size": 256, "d_conv": 4, "d_state": 128, "expand": 2 }, "attn_cfg": { "num_heads": [ 16, 16, 16 ], "rotary_emb_dim": [ 32, 32, 48 ], "window_size": [ 1023, 1023, -1 ] }, "tie_embeddings": false }, "training_args": { "data": "datasets/moses/smiles-molecules-moses_all.csv", "max_samples": null, "batch_size": 16, "epochs": 5, "lr": 0.0001, "weight_decay": 0.1, "gradient_accumulation": 8, "concatenate": true, "num_concatenate": 10, "concatenate_separator": " ", "checkpoint_bytes": 1000000, "num_test_samples": 5, "num_visualize": 5, "skip_visualization": false }, "dataset_info": { "train_size": 193691, "test_size": 5, "test_smiles_file": "checkpoints/run_large_20260116_074355/test_smiles.txt" }, "model_info": { "num_parameters": 622923776, "device": "cuda", "dtype": "torch.bfloat16", "use_amp": true }, "training_history": [ { "checkpoint_type": "bytes", "bytes_threshold": 1000000, "cumulative_training_bytes": 1000870, "metrics": { "loss": 2.529138337930984, "ce_loss": 2.5191383136094676, "lb_loss": 0.9999999887139134 } }, { "checkpoint_type": "bytes", "bytes_threshold": 2000000, "cumulative_training_bytes": 2000188, "metrics": { "loss": 1.9090615828361737, "ce_loss": 1.899061575443787, "lb_loss": 0.9999999883612232 } }, { "checkpoint_type": "bytes", "bytes_threshold": 3000000, "cumulative_training_bytes": 3005690, "metrics": { "loss": 1.6126774746601975, "ce_loss": 1.6026774729330708, "lb_loss": 0.9999999873281464 } }, { "checkpoint_type": "bytes", "bytes_threshold": 4000000, "cumulative_training_bytes": 4005451, "metrics": { "loss": 1.4273629051191592, "ce_loss": 1.4173629062038404, "lb_loss": 0.999999986705611 } }, { "checkpoint_type": "bytes", "bytes_threshold": 5000000, "cumulative_training_bytes": 5005919, "metrics": { "loss": 1.2988270140708762, "ce_loss": 1.2888270168439717, "lb_loss": 0.9999999867545234 } }, { "checkpoint_type": "bytes", "bytes_threshold": 6000000, "cumulative_training_bytes": 6004942, "metrics": { "loss": 1.204308031815026, "ce_loss": 1.1943080357142857, "lb_loss": 0.9999999881377948 } }, { "checkpoint_type": "bytes", "bytes_threshold": 7000000, "cumulative_training_bytes": 7000756, "metrics": { "loss": 1.1318895240406415, "ce_loss": 1.1218895287404902, "lb_loss": 0.9999999886131448 } }, { "checkpoint_type": "bytes", "bytes_threshold": 8000000, "cumulative_training_bytes": 8000777, "metrics": { "loss": 1.0758717356348884, "ce_loss": 1.0658717409393492, "lb_loss": 0.9999999891106899 } }, { "checkpoint_type": "bytes", "bytes_threshold": 9000000, "cumulative_training_bytes": 9000410, "metrics": { "loss": 1.0291768310102956, "ce_loss": 1.0191768367850098, "lb_loss": 0.9999999889098524 } }, { "checkpoint_type": "bytes", "bytes_threshold": 10000000, "cumulative_training_bytes": 10001418, "metrics": { "loss": 0.9909772497662426, "ce_loss": 0.9809772559171598, "lb_loss": 0.9999999883612232 } }, { "checkpoint_type": "bytes", "bytes_threshold": 11000000, "cumulative_training_bytes": 11000495, "metrics": { "loss": 0.9587417565859281, "ce_loss": 0.9487417630446476, "lb_loss": 0.9999999891627919 } }, { "checkpoint_type": "bytes", "bytes_threshold": 12000000, "cumulative_training_bytes": 12002164, "metrics": { "loss": 0.9310948989800447, "ce_loss": 0.9210949056952663, "lb_loss": 0.9999999888314768 } }, { "checkpoint_type": "bytes", "bytes_threshold": 13000000, "cumulative_training_bytes": 13003659, "metrics": { "loss": 0.90723912597625, "ce_loss": 0.8972391329085116, "lb_loss": 0.9999999889309535 } }, { "checkpoint_type": "bytes", "bytes_threshold": 14000000, "cumulative_training_bytes": 14003026, "metrics": { "loss": 0.8862365868166152, "ce_loss": 0.8762365939349113, "lb_loss": 0.9999999888650664 } }, { "checkpoint_type": "bytes", "bytes_threshold": 15000000, "cumulative_training_bytes": 15002454, "metrics": { "loss": 0.8678279019906912, "ce_loss": 0.857827909270217, "lb_loss": 0.9999999884787866 } }, { "checkpoint_type": "bytes", "bytes_threshold": 16000000, "cumulative_training_bytes": 16002028, "metrics": { "loss": 0.8511597905638655, "ce_loss": 0.8411597979844675, "lb_loss": 0.9999999888241291 } }, { "checkpoint_type": "bytes", "bytes_threshold": 17000000, "cumulative_training_bytes": 17002370, "metrics": { "loss": 0.8362065383129074, "ce_loss": 0.8262065458579881, "lb_loss": 0.9999999890043642 } }, { "checkpoint_type": "bytes", "bytes_threshold": 18000000, "cumulative_training_bytes": 18002503, "metrics": { "loss": 0.8229468611805631, "ce_loss": 0.8129468688362919, "lb_loss": 0.9999999889490402 } }, { "checkpoint_type": "bytes", "bytes_threshold": 19000000, "cumulative_training_bytes": 19002715, "metrics": { "loss": 0.8110695578011721, "ce_loss": 0.8010695655559016, "lb_loss": 0.9999999891222916 } }, { "checkpoint_type": "bytes", "bytes_threshold": 20000000, "cumulative_training_bytes": 20001653, "metrics": { "loss": 0.7999523775111994, "ce_loss": 0.7899523853550295, "lb_loss": 0.9999999889255275 } }, { "checkpoint_type": "bytes", "bytes_threshold": 21000000, "cumulative_training_bytes": 21000685, "metrics": { "loss": 0.7896056028109734, "ce_loss": 0.7796056107354185, "lb_loss": 0.9999999889322454 } }, { "checkpoint_type": "bytes", "bytes_threshold": 22000000, "cumulative_training_bytes": 22000998, "metrics": { "loss": 0.7800996339468625, "ce_loss": 0.7700996419445939, "lb_loss": 0.999999989082635 } }, { "checkpoint_type": "bytes", "bytes_threshold": 23000000, "cumulative_training_bytes": 23001073, "metrics": { "loss": 0.7714011769752316, "ce_loss": 0.7614011850398765, "lb_loss": 0.9999999893119533 } }, { "checkpoint_type": "bytes", "bytes_threshold": 24000000, "cumulative_training_bytes": 24001251, "metrics": { "loss": 0.7632320863020255, "ce_loss": 0.7532320944280079, "lb_loss": 0.9999999893164259 } }, { "checkpoint_type": "bytes", "bytes_threshold": 25000000, "cumulative_training_bytes": 25001805, "metrics": { "loss": 0.7556249918175872, "ce_loss": 0.745625, "lb_loss": 0.9999999893910786 } }, { "checkpoint_type": "bytes", "bytes_threshold": 26000000, "cumulative_training_bytes": 26001634, "metrics": { "loss": 0.7484830439389158, "ce_loss": 0.7384830521734183, "lb_loss": 0.9999999890937337 } }, { "checkpoint_type": "bytes", "bytes_threshold": 27000000, "cumulative_training_bytes": 27003072, "metrics": { "loss": 0.7417793857562759, "ce_loss": 0.7317793940390094, "lb_loss": 0.9999999890927288 } }, { "checkpoint_type": "bytes", "bytes_threshold": 28000000, "cumulative_training_bytes": 28002976, "metrics": { "loss": 0.7355289699057855, "ce_loss": 0.7255289782333052, "lb_loss": 0.9999999888902585 } }, { "checkpoint_type": "bytes", "bytes_threshold": 29000000, "cumulative_training_bytes": 29002890, "metrics": { "loss": 0.7296044090966063, "ce_loss": 0.7196044174658233, "lb_loss": 0.9999999887139134 } }, { "checkpoint_type": "bytes", "bytes_threshold": 30000000, "cumulative_training_bytes": 30002588, "metrics": { "loss": 0.7242658939587294, "ce_loss": 0.7142659023668639, "lb_loss": 0.9999999886551317 } }, { "checkpoint_type": "bytes", "bytes_threshold": 31000000, "cumulative_training_bytes": 31003334, "metrics": { "loss": 0.7192434659780585, "ce_loss": 0.7092434744225997, "lb_loss": 0.9999999885205026 } }, { "checkpoint_type": "bytes", "bytes_threshold": 32000000, "cumulative_training_bytes": 32004404, "metrics": { "loss": 0.7142980222165938, "ce_loss": 0.7042980306952663, "lb_loss": 0.9999999885596115 } }, { "checkpoint_type": "bytes", "bytes_threshold": 33000000, "cumulative_training_bytes": 33005122, "metrics": { "loss": 0.7095906652385923, "ce_loss": 0.6995906737493276, "lb_loss": 0.9999999883719108 } }, { "checkpoint_type": "bytes", "bytes_threshold": 34000000, "cumulative_training_bytes": 34004602, "metrics": { "loss": 0.7051187425468012, "ce_loss": 0.6951187510877131, "lb_loss": 0.9999999883301035 } }, { "checkpoint_type": "bytes", "bytes_threshold": 35000000, "cumulative_training_bytes": 35004371, "metrics": { "loss": 0.7008211294272545, "ce_loss": 0.6908211379966188, "lb_loss": 0.9999999883813769 } }, { "checkpoint_type": "bytes", "bytes_threshold": 36000000, "cumulative_training_bytes": 36004728, "metrics": { "loss": 0.696762272592127, "ce_loss": 0.6867622811883629, "lb_loss": 0.9999999884298019 } }, { "checkpoint_type": "bytes", "bytes_threshold": 37000000, "cumulative_training_bytes": 37005490, "metrics": { "loss": 0.6928459753060596, "ce_loss": 0.6828459839277147, "lb_loss": 0.999999988513738 } }, { "checkpoint_type": "bytes", "bytes_threshold": 38000000, "cumulative_training_bytes": 38005107, "metrics": { "loss": 0.6892191306410902, "ce_loss": 0.6792191392868265, "lb_loss": 0.9999999885654123 } }, { "checkpoint_type": "bytes", "bytes_threshold": 39000000, "cumulative_training_bytes": 39003704, "metrics": { "loss": 0.685711899615441, "ce_loss": 0.6757119082840237, "lb_loss": 0.9999999885782633 } }, { "checkpoint_type": "bytes", "bytes_threshold": 40000000, "cumulative_training_bytes": 40003473, "metrics": { "loss": 0.6823355358733229, "ce_loss": 0.6723355445636094, "lb_loss": 0.9999999885728373 } }, { "checkpoint_type": "bytes", "bytes_threshold": 41000000, "cumulative_training_bytes": 41003986, "metrics": { "loss": 0.6790900475742463, "ce_loss": 0.6690900562851783, "lb_loss": 0.9999999886106871 } }, { "checkpoint_type": "bytes", "bytes_threshold": 42000000, "cumulative_training_bytes": 42002642, "metrics": { "loss": 0.6761168771879745, "ce_loss": 0.6661168859185687, "lb_loss": 0.9999999885711579 } }, { "checkpoint_type": "bytes", "bytes_threshold": 43000000, "cumulative_training_bytes": 43002853, "metrics": { "loss": 0.6731352473742306, "ce_loss": 0.6631352561235723, "lb_loss": 0.999999988599084 } }, { "checkpoint_type": "bytes", "bytes_threshold": 44000000, "cumulative_training_bytes": 44002051, "metrics": { "loss": 0.670238190533462, "ce_loss": 0.6602381993006993, "lb_loss": 0.9999999885936781 } }, { "checkpoint_type": "bytes", "bytes_threshold": 45000000, "cumulative_training_bytes": 45001206, "metrics": { "loss": 0.667434964095347, "ce_loss": 0.6574349728796844, "lb_loss": 0.99999998859635 } }, { "checkpoint_type": "bytes", "bytes_threshold": 46000000, "cumulative_training_bytes": 46001844, "metrics": { "loss": 0.6648008498306411, "ce_loss": 0.6548008586313352, "lb_loss": 0.999999988568237 } }, { "checkpoint_type": "bytes", "bytes_threshold": 47000000, "cumulative_training_bytes": 47001995, "metrics": { "loss": 0.6622222702343818, "ce_loss": 0.6522222790507365, "lb_loss": 0.9999999885713365 } }, { "checkpoint_type": "bytes", "bytes_threshold": 48000000, "cumulative_training_bytes": 48001607, "metrics": { "loss": 0.6597010513264282, "ce_loss": 0.6497010601577909, "lb_loss": 0.9999999884347003 } }, { "checkpoint_type": "bytes", "bytes_threshold": 49000000, "cumulative_training_bytes": 49001375, "metrics": { "loss": 0.6572643425308871, "ce_loss": 0.6472643513766453, "lb_loss": 0.9999999883828165 } }, { "checkpoint_type": "bytes", "bytes_threshold": 50000000, "cumulative_training_bytes": 50001945, "metrics": { "loss": 0.6550795029747416, "ce_loss": 0.6450795118343196, "lb_loss": 0.9999999882695237 } }, { "checkpoint_type": "bytes", "bytes_threshold": 51000000, "cumulative_training_bytes": 51000374, "metrics": { "loss": 0.652884262069249, "ce_loss": 0.6428842709421047, "lb_loss": 0.9999999882851528 } }, { "checkpoint_type": "bytes", "bytes_threshold": 52000000, "cumulative_training_bytes": 52000635, "metrics": { "loss": 0.6506681074377727, "ce_loss": 0.6406681163233955, "lb_loss": 0.9999999883205282 } }, { "checkpoint_type": "bytes", "bytes_threshold": 53000000, "cumulative_training_bytes": 53001343, "metrics": { "loss": 0.6485172646032642, "ce_loss": 0.6385172735011723, "lb_loss": 0.9999999883479141 } }, { "checkpoint_type": "bytes", "bytes_threshold": 54000000, "cumulative_training_bytes": 54001159, "metrics": { "loss": 0.6464495068967486, "ce_loss": 0.636449515806487, "lb_loss": 0.999999988335098 } }, { "checkpoint_type": "bytes", "bytes_threshold": 55000000, "cumulative_training_bytes": 55001589, "metrics": { "loss": 0.6445485554952144, "ce_loss": 0.6345485644163529, "lb_loss": 0.9999999882842726 } }, { "checkpoint_type": "bytes", "bytes_threshold": 56000000, "cumulative_training_bytes": 56001216, "metrics": { "loss": 0.6427827731895931, "ce_loss": 0.6327827821217245, "lb_loss": 0.9999999883549252 } }, { "checkpoint_type": "bytes", "bytes_threshold": 57000000, "cumulative_training_bytes": 57001016, "metrics": { "loss": 0.6409422922614553, "ce_loss": 0.6309423012041939, "lb_loss": 0.9999999883426606 } }, { "checkpoint_type": "bytes", "bytes_threshold": 58000000, "cumulative_training_bytes": 58002267, "metrics": { "loss": 0.6390827834618332, "ce_loss": 0.6290827924148134, "lb_loss": 0.999999988324738 } }, { "checkpoint_type": "bytes", "bytes_threshold": 59000000, "cumulative_training_bytes": 59002956, "metrics": { "loss": 0.6373783727190031, "ce_loss": 0.6273783816818774, "lb_loss": 0.9999999883731788 } }, { "checkpoint_type": "bytes", "bytes_threshold": 60000000, "cumulative_training_bytes": 60002317, "metrics": { "loss": 0.6357118993115848, "ce_loss": 0.6257119082840237, "lb_loss": 0.9999999883494669 } }, { "checkpoint_type": "bytes", "bytes_threshold": 61000000, "cumulative_training_bytes": 61002300, "metrics": { "loss": 0.6340526998164479, "ce_loss": 0.6240527087981376, "lb_loss": 0.9999999883149687 } }, { "checkpoint_type": "bytes", "bytes_threshold": 62000000, "cumulative_training_bytes": 62001732, "metrics": { "loss": 0.6324138432473804, "ce_loss": 0.6224138522380225, "lb_loss": 0.9999999883839774 } }, { "checkpoint_type": "bytes", "bytes_threshold": 63000000, "cumulative_training_bytes": 63001056, "metrics": { "loss": 0.6308046337404284, "ce_loss": 0.6208046427397389, "lb_loss": 0.9999999883724198 } }, { "checkpoint_type": "bytes", "bytes_threshold": 64000000, "cumulative_training_bytes": 64001869, "metrics": { "loss": 0.629222959280014, "ce_loss": 0.6192229682877219, "lb_loss": 0.99999998833918 } }, { "checkpoint_type": "bytes", "bytes_threshold": 65000000, "cumulative_training_bytes": 65000902, "metrics": { "loss": 0.6279146900055459, "ce_loss": 0.6179146990213928, "lb_loss": 0.9999999883557972 } }, { "checkpoint_type": "bytes", "bytes_threshold": 66000000, "cumulative_training_bytes": 66000374, "metrics": { "loss": 0.6265192889859434, "ce_loss": 0.6165192980096826, "lb_loss": 0.9999999883986297 } }, { "checkpoint_type": "bytes", "bytes_threshold": 67000000, "cumulative_training_bytes": 67001396, "metrics": { "loss": 0.6250934401428511, "ce_loss": 0.6150934491742471, "lb_loss": 0.9999999883717513 } }, { "checkpoint_type": "bytes", "bytes_threshold": 68000000, "cumulative_training_bytes": 68001292, "metrics": { "loss": 0.6237620441829789, "ce_loss": 0.6137620532218064, "lb_loss": 0.9999999883871563 } }, { "checkpoint_type": "bytes", "bytes_threshold": 69000000, "cumulative_training_bytes": 69000668, "metrics": { "loss": 0.6224395933143885, "ce_loss": 0.6124396023604322, "lb_loss": 0.9999999883612232 } }, { "checkpoint_type": "bytes", "bytes_threshold": 70000000, "cumulative_training_bytes": 70004965, "metrics": { "loss": 0.6211014537353425, "ce_loss": 0.6111014627884371, "lb_loss": 0.999999988337017 } }, { "checkpoint_type": "bytes", "bytes_threshold": 71000000, "cumulative_training_bytes": 71004801, "metrics": { "loss": 0.6197786367734274, "ce_loss": 0.6097786458333333, "lb_loss": 0.9999999884068966 } }, { "epoch": 1, "checkpoint_type": "epoch", "metrics": { "loss": 0.6189756860974089, "ce_loss": 0.6089756951614902, "lb_loss": 0.9999999884247052, "training_bytes": 71629753 }, "cumulative_training_bytes": 71629753, "training_bytes_this_epoch": 71629753 }, { "checkpoint_type": "bytes", "bytes_threshold": 72000000, "cumulative_training_bytes": 72002906, "metrics": { "loss": 0.5295622424473838, "ce_loss": 0.519562251984127, "lb_loss": 0.9999999914850507 } }, { "checkpoint_type": "bytes", "bytes_threshold": 73000000, "cumulative_training_bytes": 73002868, "metrics": { "loss": 0.5258186192348085, "ce_loss": 0.5158186287715517, "lb_loss": 0.999999987411088 } }, { "checkpoint_type": "bytes", "bytes_threshold": 74000000, "cumulative_training_bytes": 74003132, "metrics": { "loss": 0.5239981980692419, "ce_loss": 0.513998207605985, "lb_loss": 0.999999988554719 } }, { "checkpoint_type": "bytes", "bytes_threshold": 75000000, "cumulative_training_bytes": 75003207, "metrics": { "loss": 0.5230722218229059, "ce_loss": 0.5130722313596491, "lb_loss": 0.9999999898567534 } }, { "checkpoint_type": "bytes", "bytes_threshold": 76000000, "cumulative_training_bytes": 76002408, "metrics": { "loss": 0.5233044982440417, "ce_loss": 0.5133045077807848, "lb_loss": 0.9999999891114655 } }, { "checkpoint_type": "bytes", "bytes_threshold": 77000000, "cumulative_training_bytes": 77002547, "metrics": { "loss": 0.5234610656284551, "ce_loss": 0.5134610751651982, "lb_loss": 0.9999999891031156 } }, { "checkpoint_type": "bytes", "bytes_threshold": 78000000, "cumulative_training_bytes": 78001228, "metrics": { "loss": 0.5231586487733776, "ce_loss": 0.5131586583101208, "lb_loss": 0.9999999895954752 } }, { "checkpoint_type": "bytes", "bytes_threshold": 79000000, "cumulative_training_bytes": 79000992, "metrics": { "loss": 0.5228191367313146, "ce_loss": 0.5128191462680578, "lb_loss": 0.9999999892367214 } }, { "checkpoint_type": "bytes", "bytes_threshold": 80000000, "cumulative_training_bytes": 80000829, "metrics": { "loss": 0.5224226936434688, "ce_loss": 0.512422703180212, "lb_loss": 0.9999999890479098 } }, { "checkpoint_type": "bytes", "bytes_threshold": 81000000, "cumulative_training_bytes": 81001739, "metrics": { "loss": 0.5220134354601003, "ce_loss": 0.5120134449968434, "lb_loss": 0.9999999892756795 } }, { "checkpoint_type": "bytes", "bytes_threshold": 82000000, "cumulative_training_bytes": 82001573, "metrics": { "loss": 0.5230133960536733, "ce_loss": 0.5130134055904164, "lb_loss": 0.9999999893915292 } }, { "checkpoint_type": "bytes", "bytes_threshold": 83000000, "cumulative_training_bytes": 83002964, "metrics": { "loss": 0.523225756267107, "ce_loss": 0.5132257658038502, "lb_loss": 0.999999989052841 } }, { "checkpoint_type": "bytes", "bytes_threshold": 84000000, "cumulative_training_bytes": 84002697, "metrics": { "loss": 0.5232898338396318, "ce_loss": 0.513289843376375, "lb_loss": 0.999999988939932 } }, { "checkpoint_type": "bytes", "bytes_threshold": 85000000, "cumulative_training_bytes": 85002372, "metrics": { "loss": 0.5232414865915754, "ce_loss": 0.5132414961283186, "lb_loss": 0.9999999892395155 } }, { "checkpoint_type": "bytes", "bytes_threshold": 86000000, "cumulative_training_bytes": 86002248, "metrics": { "loss": 0.5230535232956982, "ce_loss": 0.5130535328324414, "lb_loss": 0.9999999892520237 } }, { "checkpoint_type": "bytes", "bytes_threshold": 87000000, "cumulative_training_bytes": 87002691, "metrics": { "loss": 0.522905801423611, "ce_loss": 0.5129058109603541, "lb_loss": 0.9999999892170196 } }, { "checkpoint_type": "bytes", "bytes_threshold": 88000000, "cumulative_training_bytes": 88003238, "metrics": { "loss": 0.5226462454325377, "ce_loss": 0.5126462549692808, "lb_loss": 0.9999999895309515 } }, { "checkpoint_type": "bytes", "bytes_threshold": 89000000, "cumulative_training_bytes": 89001864, "metrics": { "loss": 0.5224724498041969, "ce_loss": 0.5124724593409401, "lb_loss": 0.9999999894636203 } }, { "checkpoint_type": "bytes", "bytes_threshold": 90000000, "cumulative_training_bytes": 90001197, "metrics": { "loss": 0.5223119117595532, "ce_loss": 0.5123119212962963, "lb_loss": 0.9999999893460296 } }, { "checkpoint_type": "bytes", "bytes_threshold": 91000000, "cumulative_training_bytes": 91001396, "metrics": { "loss": 0.5221548230457248, "ce_loss": 0.512154832582468, "lb_loss": 0.9999999894772497 } }, { "checkpoint_type": "bytes", "bytes_threshold": 92000000, "cumulative_training_bytes": 92000962, "metrics": { "loss": 0.5219621010862601, "ce_loss": 0.5119621106230032, "lb_loss": 0.9999999897513943 } }, { "checkpoint_type": "bytes", "bytes_threshold": 93000000, "cumulative_training_bytes": 93001493, "metrics": { "loss": 0.5217127924081627, "ce_loss": 0.5117128019449059, "lb_loss": 0.9999999896863502 } }, { "checkpoint_type": "bytes", "bytes_threshold": 94000000, "cumulative_training_bytes": 94001023, "metrics": { "loss": 0.521506949601051, "ce_loss": 0.5115069591377942, "lb_loss": 0.9999999895010068 } }, { "checkpoint_type": "bytes", "bytes_threshold": 95000000, "cumulative_training_bytes": 95001477, "metrics": { "loss": 0.521244056918953, "ce_loss": 0.5112440664556962, "lb_loss": 0.9999999895729597 } }, { "checkpoint_type": "bytes", "bytes_threshold": 96000000, "cumulative_training_bytes": 96001379, "metrics": { "loss": 0.5210496801634268, "ce_loss": 0.51104968970017, "lb_loss": 0.9999999896100668 } }, { "checkpoint_type": "bytes", "bytes_threshold": 97000000, "cumulative_training_bytes": 97000217, "metrics": { "loss": 0.5208391965325199, "ce_loss": 0.510839206069263, "lb_loss": 0.9999999898249534 } }, { "checkpoint_type": "bytes", "bytes_threshold": 98000000, "cumulative_training_bytes": 98005873, "metrics": { "loss": 0.5206186290063255, "ce_loss": 0.5106186385430687, "lb_loss": 0.9999999899054494 } }, { "checkpoint_type": "bytes", "bytes_threshold": 99000000, "cumulative_training_bytes": 99005280, "metrics": { "loss": 0.5203992097197946, "ce_loss": 0.5103992192565378, "lb_loss": 0.9999999899392203 } }, { "checkpoint_type": "bytes", "bytes_threshold": 100000000, "cumulative_training_bytes": 100005594, "metrics": { "loss": 0.5202429012222226, "ce_loss": 0.5102429107589658, "lb_loss": 0.9999999899457553 } }, { "checkpoint_type": "bytes", "bytes_threshold": 101000000, "cumulative_training_bytes": 101004874, "metrics": { "loss": 0.5200043963041431, "ce_loss": 0.5100044058408862, "lb_loss": 0.9999999900238752 } }, { "checkpoint_type": "bytes", "bytes_threshold": 102000000, "cumulative_training_bytes": 102004781, "metrics": { "loss": 0.5197838760787613, "ce_loss": 0.5097838856155045, "lb_loss": 0.9999999899575345 } }, { "checkpoint_type": "bytes", "bytes_threshold": 103000000, "cumulative_training_bytes": 103004802, "metrics": { "loss": 0.5195229020227514, "ce_loss": 0.5095229115594946, "lb_loss": 0.9999999899403814 } }, { "checkpoint_type": "bytes", "bytes_threshold": 104000000, "cumulative_training_bytes": 104005501, "metrics": { "loss": 0.5193112426333957, "ce_loss": 0.5093112521701388, "lb_loss": 0.9999999898807173 } }, { "checkpoint_type": "bytes", "bytes_threshold": 105000000, "cumulative_training_bytes": 105000121, "metrics": { "loss": 0.5191852187433987, "ce_loss": 0.5091852282801419, "lb_loss": 0.9999999897382783 } }, { "checkpoint_type": "bytes", "bytes_threshold": 106000000, "cumulative_training_bytes": 106000572, "metrics": { "loss": 0.5189939814685934, "ce_loss": 0.5089939910053366, "lb_loss": 0.9999999898316057 } }, { "checkpoint_type": "bytes", "bytes_threshold": 107000000, "cumulative_training_bytes": 107000443, "metrics": { "loss": 0.5187420054557292, "ce_loss": 0.5087420149924724, "lb_loss": 0.9999999897800668 } }, { "checkpoint_type": "bytes", "bytes_threshold": 108000000, "cumulative_training_bytes": 108005744, "metrics": { "loss": 0.5185298223191449, "ce_loss": 0.5085298318558881, "lb_loss": 0.9999999897912019 } }, { "checkpoint_type": "bytes", "bytes_threshold": 109000000, "cumulative_training_bytes": 109005847, "metrics": { "loss": 0.5183260476304248, "ce_loss": 0.5083260571671679, "lb_loss": 0.9999999897057674 } }, { "checkpoint_type": "bytes", "bytes_threshold": 110000000, "cumulative_training_bytes": 110000442, "metrics": { "loss": 0.5181242074062021, "ce_loss": 0.5081242169429453, "lb_loss": 0.9999999897702437 } }, { "checkpoint_type": "bytes", "bytes_threshold": 111000000, "cumulative_training_bytes": 111004375, "metrics": { "loss": 0.517925187683392, "ce_loss": 0.5079251972201352, "lb_loss": 0.9999999897897378 } }, { "checkpoint_type": "bytes", "bytes_threshold": 112000000, "cumulative_training_bytes": 112003987, "metrics": { "loss": 0.5178511293663929, "ce_loss": 0.507851138903136, "lb_loss": 0.9999999898941129 } }, { "checkpoint_type": "bytes", "bytes_threshold": 113000000, "cumulative_training_bytes": 113002857, "metrics": { "loss": 0.5178904143693057, "ce_loss": 0.5078904239060489, "lb_loss": 0.9999999897803562 } }, { "checkpoint_type": "bytes", "bytes_threshold": 114000000, "cumulative_training_bytes": 114002857, "metrics": { "loss": 0.5177966734777779, "ce_loss": 0.5077966830145211, "lb_loss": 0.9999999896802905 } }, { "checkpoint_type": "bytes", "bytes_threshold": 115000000, "cumulative_training_bytes": 115002430, "metrics": { "loss": 0.5176577004448419, "ce_loss": 0.5076577099815851, "lb_loss": 0.9999999896986652 } }, { "checkpoint_type": "bytes", "bytes_threshold": 116000000, "cumulative_training_bytes": 116002108, "metrics": { "loss": 0.5175234279632568, "ce_loss": 0.5075234375, "lb_loss": 0.9999999896844228 } }, { "checkpoint_type": "bytes", "bytes_threshold": 117000000, "cumulative_training_bytes": 117001640, "metrics": { "loss": 0.5173624745713544, "ce_loss": 0.5073624841080976, "lb_loss": 0.9999999896474916 } }, { "checkpoint_type": "bytes", "bytes_threshold": 118000000, "cumulative_training_bytes": 118002958, "metrics": { "loss": 0.5171740742537646, "ce_loss": 0.5071740837905078, "lb_loss": 0.9999999896805941 } }, { "checkpoint_type": "bytes", "bytes_threshold": 119000000, "cumulative_training_bytes": 119002472, "metrics": { "loss": 0.5171307139708127, "ce_loss": 0.5071307235075558, "lb_loss": 0.9999999897718519 } }, { "checkpoint_type": "bytes", "bytes_threshold": 120000000, "cumulative_training_bytes": 120002108, "metrics": { "loss": 0.5170375470312607, "ce_loss": 0.5070375565680039, "lb_loss": 0.9999999898447566 } }, { "checkpoint_type": "bytes", "bytes_threshold": 121000000, "cumulative_training_bytes": 121001481, "metrics": { "loss": 0.516899705861699, "ce_loss": 0.5068997153984421, "lb_loss": 0.99999998980757 } }, { "checkpoint_type": "bytes", "bytes_threshold": 122000000, "cumulative_training_bytes": 122002988, "metrics": { "loss": 0.5167299444361251, "ce_loss": 0.5067299539728682, "lb_loss": 0.9999999897998629 } }, { "checkpoint_type": "bytes", "bytes_threshold": 123000000, "cumulative_training_bytes": 123002164, "metrics": { "loss": 0.5165764635284417, "ce_loss": 0.5065764730651848, "lb_loss": 0.9999999898679655 } }, { "checkpoint_type": "bytes", "bytes_threshold": 124000000, "cumulative_training_bytes": 124003331, "metrics": { "loss": 0.5166918485885392, "ce_loss": 0.5066918581252824, "lb_loss": 0.9999999899469346 } }, { "checkpoint_type": "bytes", "bytes_threshold": 125000000, "cumulative_training_bytes": 125004265, "metrics": { "loss": 0.516667159430112, "ce_loss": 0.5066671689668552, "lb_loss": 0.9999999899568718 } }, { "checkpoint_type": "bytes", "bytes_threshold": 126000000, "cumulative_training_bytes": 126004045, "metrics": { "loss": 0.5165791951014506, "ce_loss": 0.5065792046381937, "lb_loss": 0.9999999899923866 } }, { "checkpoint_type": "bytes", "bytes_threshold": 127000000, "cumulative_training_bytes": 127003554, "metrics": { "loss": 0.5164666518987735, "ce_loss": 0.5064666614355167, "lb_loss": 0.9999999899438258 } }, { "checkpoint_type": "bytes", "bytes_threshold": 128000000, "cumulative_training_bytes": 128002625, "metrics": { "loss": 0.5163453918460235, "ce_loss": 0.5063454013827666, "lb_loss": 0.9999999898281746 } }, { "checkpoint_type": "bytes", "bytes_threshold": 129000000, "cumulative_training_bytes": 129002471, "metrics": { "loss": 0.516234602327751, "ce_loss": 0.5062346118644941, "lb_loss": 0.9999999898394887 } }, { "checkpoint_type": "bytes", "bytes_threshold": 130000000, "cumulative_training_bytes": 130003807, "metrics": { "loss": 0.5160949440918804, "ce_loss": 0.5060949536286236, "lb_loss": 0.999999989862498 } }, { "checkpoint_type": "bytes", "bytes_threshold": 131000000, "cumulative_training_bytes": 131005142, "metrics": { "loss": 0.5159886006401377, "ce_loss": 0.5059886101768809, "lb_loss": 0.9999999899381895 } }, { "checkpoint_type": "bytes", "bytes_threshold": 132000000, "cumulative_training_bytes": 132000275, "metrics": { "loss": 0.5158503684158198, "ce_loss": 0.5058503779525629, "lb_loss": 0.9999999899286085 } }, { "checkpoint_type": "bytes", "bytes_threshold": 133000000, "cumulative_training_bytes": 133000355, "metrics": { "loss": 0.515711167490349, "ce_loss": 0.5057111770270921, "lb_loss": 0.9999999899203098 } }, { "checkpoint_type": "bytes", "bytes_threshold": 134000000, "cumulative_training_bytes": 134000325, "metrics": { "loss": 0.5156527688998378, "ce_loss": 0.505652778436581, "lb_loss": 0.9999999898783498 } }, { "checkpoint_type": "bytes", "bytes_threshold": 135000000, "cumulative_training_bytes": 135000221, "metrics": { "loss": 0.5155272795038731, "ce_loss": 0.5055272890406163, "lb_loss": 0.9999999898154529 } }, { "checkpoint_type": "bytes", "bytes_threshold": 136000000, "cumulative_training_bytes": 136000822, "metrics": { "loss": 0.5154010211186479, "ce_loss": 0.5054010306553911, "lb_loss": 0.9999999898092987 } }, { "checkpoint_type": "bytes", "bytes_threshold": 137000000, "cumulative_training_bytes": 137000781, "metrics": { "loss": 0.5152766808144517, "ce_loss": 0.5052766903511948, "lb_loss": 0.999999989819518 } }, { "checkpoint_type": "bytes", "bytes_threshold": 138000000, "cumulative_training_bytes": 138000341, "metrics": { "loss": 0.5152090202840646, "ce_loss": 0.5052090298208077, "lb_loss": 0.9999999898347433 } }, { "checkpoint_type": "bytes", "bytes_threshold": 139000000, "cumulative_training_bytes": 139000421, "metrics": { "loss": 0.5150922501462007, "ce_loss": 0.5050922596829439, "lb_loss": 0.9999999898495164 } }, { "checkpoint_type": "bytes", "bytes_threshold": 140000000, "cumulative_training_bytes": 140000775, "metrics": { "loss": 0.514972472613841, "ce_loss": 0.5049724821505842, "lb_loss": 0.9999999898071157 } }, { "checkpoint_type": "bytes", "bytes_threshold": 141000000, "cumulative_training_bytes": 141000713, "metrics": { "loss": 0.514886301395959, "ce_loss": 0.5048863109327022, "lb_loss": 0.9999999897913573 } }, { "checkpoint_type": "bytes", "bytes_threshold": 142000000, "cumulative_training_bytes": 142001597, "metrics": { "loss": 0.5147740040636941, "ce_loss": 0.5047740136004373, "lb_loss": 0.9999999898061173 } }, { "checkpoint_type": "bytes", "bytes_threshold": 143000000, "cumulative_training_bytes": 143001061, "metrics": { "loss": 0.514646232276389, "ce_loss": 0.5046462418131321, "lb_loss": 0.9999999898204636 } }, { "epoch": 2, "checkpoint_type": "epoch", "metrics": { "loss": 0.5146146637141241, "ce_loss": 0.5046146732508673, "lb_loss": 0.9999999898131496, "training_bytes": 71629738 }, "cumulative_training_bytes": 143259491, "training_bytes_this_epoch": 71629738 }, { "checkpoint_type": "bytes", "bytes_threshold": 144000000, "cumulative_training_bytes": 144000161, "metrics": { "loss": 0.49798436546325686, "ce_loss": 0.487984375, "lb_loss": 0.9999999852180481 } }, { "checkpoint_type": "bytes", "bytes_threshold": 145000000, "cumulative_training_bytes": 145005821, "metrics": { "loss": 0.5014327235141043, "ce_loss": 0.49143273305084745, "lb_loss": 0.9999999876749718 } }, { "checkpoint_type": "bytes", "bytes_threshold": 146000000, "cumulative_training_bytes": 146004728, "metrics": { "loss": 0.5015055804417051, "ce_loss": 0.4915055899784483, "lb_loss": 0.9999999870257131 } }, { "checkpoint_type": "bytes", "bytes_threshold": 147000000, "cumulative_training_bytes": 147005290, "metrics": { "loss": 0.5011692736386123, "ce_loss": 0.49116928317535546, "lb_loss": 0.9999999863464873 } }, { "checkpoint_type": "bytes", "bytes_threshold": 148000000, "cumulative_training_bytes": 148004934, "metrics": { "loss": 0.5008042298647531, "ce_loss": 0.49080423940149626, "lb_loss": 0.9999999863251189 } }, { "checkpoint_type": "bytes", "bytes_threshold": 149000000, "cumulative_training_bytes": 149004071, "metrics": { "loss": 0.5005541871676853, "ce_loss": 0.4905541967044284, "lb_loss": 0.9999999872319607 } }, { "checkpoint_type": "bytes", "bytes_threshold": 150000000, "cumulative_training_bytes": 150002971, "metrics": { "loss": 0.5009830633799235, "ce_loss": 0.49098307291666665, "lb_loss": 0.9999999876607928 } }, { "checkpoint_type": "bytes", "bytes_threshold": 151000000, "cumulative_training_bytes": 151001320, "metrics": { "loss": 0.5009520531064959, "ce_loss": 0.4909520626432391, "lb_loss": 0.9999999877512228 } }, { "checkpoint_type": "bytes", "bytes_threshold": 152000000, "cumulative_training_bytes": 152002938, "metrics": { "loss": 0.5011527835112948, "ce_loss": 0.4911527930480379, "lb_loss": 0.9999999878613004 } }, { "checkpoint_type": "bytes", "bytes_threshold": 153000000, "cumulative_training_bytes": 153003165, "metrics": { "loss": 0.5009091244948294, "ce_loss": 0.49090913403157255, "lb_loss": 0.9999999879849775 } }, { "checkpoint_type": "bytes", "bytes_threshold": 154000000, "cumulative_training_bytes": 154003230, "metrics": { "loss": 0.5006602671703053, "ce_loss": 0.4906602767070485, "lb_loss": 0.9999999882497451 } }, { "checkpoint_type": "bytes", "bytes_threshold": 155000000, "cumulative_training_bytes": 155002454, "metrics": { "loss": 0.5005551305766069, "ce_loss": 0.4905551401133501, "lb_loss": 0.9999999881691536 } }, { "checkpoint_type": "bytes", "bytes_threshold": 156000000, "cumulative_training_bytes": 156001254, "metrics": { "loss": 0.5004347557255595, "ce_loss": 0.4904347652623027, "lb_loss": 0.9999999879905219 } }, { "checkpoint_type": "bytes", "bytes_threshold": 157000000, "cumulative_training_bytes": 157000532, "metrics": { "loss": 0.5002469771184441, "ce_loss": 0.49024698665518723, "lb_loss": 0.9999999877865644 } }, { "checkpoint_type": "bytes", "bytes_threshold": 158000000, "cumulative_training_bytes": 158000997, "metrics": { "loss": 0.5000509660852472, "ce_loss": 0.49005097562199035, "lb_loss": 0.9999999877537809 } }, { "checkpoint_type": "bytes", "bytes_threshold": 159000000, "cumulative_training_bytes": 159001808, "metrics": { "loss": 0.4999657282028284, "ce_loss": 0.4899657377395716, "lb_loss": 0.9999999877251615 } }, { "checkpoint_type": "bytes", "bytes_threshold": 160000000, "cumulative_training_bytes": 160002107, "metrics": { "loss": 0.49982096550742644, "ce_loss": 0.4898209750441696, "lb_loss": 0.9999999881422562 } }, { "checkpoint_type": "bytes", "bytes_threshold": 161000000, "cumulative_training_bytes": 161000944, "metrics": { "loss": 0.49971531223718146, "ce_loss": 0.48971532177392463, "lb_loss": 0.999999988075096 } }, { "checkpoint_type": "bytes", "bytes_threshold": 162000000, "cumulative_training_bytes": 162001675, "metrics": { "loss": 0.49959996974829474, "ce_loss": 0.4895999792850379, "lb_loss": 0.9999999881656182 } }, { "checkpoint_type": "bytes", "bytes_threshold": 163000000, "cumulative_training_bytes": 163002334, "metrics": { "loss": 0.4994611925159988, "ce_loss": 0.48946120205274196, "lb_loss": 0.9999999879433218 } }, { "checkpoint_type": "bytes", "bytes_threshold": 164000000, "cumulative_training_bytes": 164002386, "metrics": { "loss": 0.4994817494122015, "ce_loss": 0.4894817589489447, "lb_loss": 0.9999999877594569 } }, { "checkpoint_type": "bytes", "bytes_threshold": 165000000, "cumulative_training_bytes": 165002682, "metrics": { "loss": 0.4994616188646174, "ce_loss": 0.48946162840136054, "lb_loss": 0.9999999877222541 } }, { "checkpoint_type": "bytes", "bytes_threshold": 166000000, "cumulative_training_bytes": 166002837, "metrics": { "loss": 0.49945240412740877, "ce_loss": 0.48945241366415193, "lb_loss": 0.9999999876418049 } }, { "checkpoint_type": "bytes", "bytes_threshold": 167000000, "cumulative_training_bytes": 167002163, "metrics": { "loss": 0.49971019012685014, "ce_loss": 0.4897101996635933, "lb_loss": 0.9999999878503365 } }, { "checkpoint_type": "bytes", "bytes_threshold": 168000000, "cumulative_training_bytes": 168003307, "metrics": { "loss": 0.4998584052468532, "ce_loss": 0.4898584147835964, "lb_loss": 0.9999999878709821 } }, { "checkpoint_type": "bytes", "bytes_threshold": 169000000, "cumulative_training_bytes": 169003303, "metrics": { "loss": 0.4998855771100047, "ce_loss": 0.48988558664674786, "lb_loss": 0.9999999880818109 } }, { "checkpoint_type": "bytes", "bytes_threshold": 170000000, "cumulative_training_bytes": 170003861, "metrics": { "loss": 0.4998251598493188, "ce_loss": 0.48982516938606196, "lb_loss": 0.9999999881054448 } }, { "checkpoint_type": "bytes", "bytes_threshold": 171000000, "cumulative_training_bytes": 171002515, "metrics": { "loss": 0.49974243954621694, "ce_loss": 0.4897424490829601, "lb_loss": 0.9999999880765287 } }, { "checkpoint_type": "bytes", "bytes_threshold": 172000000, "cumulative_training_bytes": 172003023, "metrics": { "loss": 0.4996642684583165, "ce_loss": 0.4896642779950597, "lb_loss": 0.999999988098702 } }, { "checkpoint_type": "bytes", "bytes_threshold": 173000000, "cumulative_training_bytes": 173004076, "metrics": { "loss": 0.4995929074614665, "ce_loss": 0.48959291699820967, "lb_loss": 0.9999999882260966 } }, { "checkpoint_type": "bytes", "bytes_threshold": 174000000, "cumulative_training_bytes": 174003533, "metrics": { "loss": 0.4995156635771906, "ce_loss": 0.4895156731139338, "lb_loss": 0.9999999882649054 } }, { "checkpoint_type": "bytes", "bytes_threshold": 175000000, "cumulative_training_bytes": 175003279, "metrics": { "loss": 0.4994341848947573, "ce_loss": 0.4894341944315005, "lb_loss": 0.9999999882901592 } }, { "checkpoint_type": "bytes", "bytes_threshold": 176000000, "cumulative_training_bytes": 176004414, "metrics": { "loss": 0.4994536801542579, "ce_loss": 0.48945368969100106, "lb_loss": 0.9999999883246413 } }, { "checkpoint_type": "bytes", "bytes_threshold": 177000000, "cumulative_training_bytes": 177003681, "metrics": { "loss": 0.4994922259095132, "ce_loss": 0.48949223544625636, "lb_loss": 0.9999999883884341 } }, { "checkpoint_type": "bytes", "bytes_threshold": 178000000, "cumulative_training_bytes": 178003552, "metrics": { "loss": 0.49946003388968735, "ce_loss": 0.4894600434264305, "lb_loss": 0.9999999885196095 } }, { "checkpoint_type": "bytes", "bytes_threshold": 179000000, "cumulative_training_bytes": 179003814, "metrics": { "loss": 0.4993792064457101, "ce_loss": 0.48937921598245326, "lb_loss": 0.9999999884757118 } }, { "checkpoint_type": "bytes", "bytes_threshold": 180000000, "cumulative_training_bytes": 180004777, "metrics": { "loss": 0.49927101250431966, "ce_loss": 0.4892710220410628, "lb_loss": 0.9999999886165687 } }, { "checkpoint_type": "bytes", "bytes_threshold": 181000000, "cumulative_training_bytes": 181004979, "metrics": { "loss": 0.4991893716358544, "ce_loss": 0.4891893811725976, "lb_loss": 0.9999999887219304 } }, { "checkpoint_type": "bytes", "bytes_threshold": 182000000, "cumulative_training_bytes": 182000028, "metrics": { "loss": 0.49910491208766494, "ce_loss": 0.4891049216244081, "lb_loss": 0.9999999886926885 } }, { "checkpoint_type": "bytes", "bytes_threshold": 183000000, "cumulative_training_bytes": 183000406, "metrics": { "loss": 0.49903300164923065, "ce_loss": 0.4890330111859738, "lb_loss": 0.9999999886843475 } }, { "checkpoint_type": "bytes", "bytes_threshold": 184000000, "cumulative_training_bytes": 184005785, "metrics": { "loss": 0.49893956214856033, "ce_loss": 0.4889395716853035, "lb_loss": 0.9999999886434369 } }, { "checkpoint_type": "bytes", "bytes_threshold": 185000000, "cumulative_training_bytes": 185005572, "metrics": { "loss": 0.4988476605553901, "ce_loss": 0.48884767009213326, "lb_loss": 0.9999999887465079 } }, { "checkpoint_type": "bytes", "bytes_threshold": 186000000, "cumulative_training_bytes": 186004863, "metrics": { "loss": 0.49877141427624133, "ce_loss": 0.4887714238129845, "lb_loss": 0.9999999888200036 } }, { "checkpoint_type": "bytes", "bytes_threshold": 187000000, "cumulative_training_bytes": 187005080, "metrics": { "loss": 0.49868518042335425, "ce_loss": 0.4886851899600974, "lb_loss": 0.9999999888417654 } }, { "checkpoint_type": "bytes", "bytes_threshold": 188000000, "cumulative_training_bytes": 188005375, "metrics": { "loss": 0.498808651366457, "ce_loss": 0.4888086609032002, "lb_loss": 0.99999998895714 } }, { "checkpoint_type": "bytes", "bytes_threshold": 189000000, "cumulative_training_bytes": 189005528, "metrics": { "loss": 0.49888453402165805, "ce_loss": 0.4888845435584012, "lb_loss": 0.9999999890674705 } }, { "checkpoint_type": "bytes", "bytes_threshold": 190000000, "cumulative_training_bytes": 190005472, "metrics": { "loss": 0.4988654473461682, "ce_loss": 0.4888654568829114, "lb_loss": 0.9999999890900866 } }, { "checkpoint_type": "bytes", "bytes_threshold": 191000000, "cumulative_training_bytes": 191005752, "metrics": { "loss": 0.4988207764187656, "ce_loss": 0.48882078595550876, "lb_loss": 0.9999999891930109 } }, { "checkpoint_type": "bytes", "bytes_threshold": 192000000, "cumulative_training_bytes": 192004576, "metrics": { "loss": 0.4987819687953763, "ce_loss": 0.48878197833211945, "lb_loss": 0.9999999892121236 } }, { "checkpoint_type": "bytes", "bytes_threshold": 193000000, "cumulative_training_bytes": 193005339, "metrics": { "loss": 0.49873961034847153, "ce_loss": 0.4887396198852147, "lb_loss": 0.9999999891808389 } }, { "checkpoint_type": "bytes", "bytes_threshold": 194000000, "cumulative_training_bytes": 194004395, "metrics": { "loss": 0.4987082588138865, "ce_loss": 0.48870826835062964, "lb_loss": 0.9999999892411392 } }, { "checkpoint_type": "bytes", "bytes_threshold": 195000000, "cumulative_training_bytes": 195004180, "metrics": { "loss": 0.498664495180238, "ce_loss": 0.48866450471698114, "lb_loss": 0.9999999892241346 } }, { "checkpoint_type": "bytes", "bytes_threshold": 196000000, "cumulative_training_bytes": 196003177, "metrics": { "loss": 0.49861976167988237, "ce_loss": 0.48861977121662553, "lb_loss": 0.9999999891944014 } }, { "checkpoint_type": "bytes", "bytes_threshold": 197000000, "cumulative_training_bytes": 197004413, "metrics": { "loss": 0.49855583482910515, "ce_loss": 0.4885558443658483, "lb_loss": 0.9999999892248347 } }, { "checkpoint_type": "bytes", "bytes_threshold": 198000000, "cumulative_training_bytes": 198004837, "metrics": { "loss": 0.49850712067834546, "ce_loss": 0.4885071302150886, "lb_loss": 0.9999999891768478 } }, { "checkpoint_type": "bytes", "bytes_threshold": 199000000, "cumulative_training_bytes": 199004820, "metrics": { "loss": 0.4984497884544467, "ce_loss": 0.48844979799118987, "lb_loss": 0.999999989181197 } }, { "checkpoint_type": "bytes", "bytes_threshold": 200000000, "cumulative_training_bytes": 200005030, "metrics": { "loss": 0.49839834649818904, "ce_loss": 0.4883983560349322, "lb_loss": 0.9999999892413305 } }, { "checkpoint_type": "bytes", "bytes_threshold": 201000000, "cumulative_training_bytes": 201005297, "metrics": { "loss": 0.4983296733201069, "ce_loss": 0.4883296828568501, "lb_loss": 0.999999989268843 } }, { "checkpoint_type": "bytes", "bytes_threshold": 202000000, "cumulative_training_bytes": 202003321, "metrics": { "loss": 0.49828891289224553, "ce_loss": 0.4882889224289887, "lb_loss": 0.9999999892593967 } }, { "checkpoint_type": "bytes", "bytes_threshold": 203000000, "cumulative_training_bytes": 203002998, "metrics": { "loss": 0.4982311404954446, "ce_loss": 0.48823115003218776, "lb_loss": 0.9999999893270083 } }, { "checkpoint_type": "bytes", "bytes_threshold": 204000000, "cumulative_training_bytes": 204002425, "metrics": { "loss": 0.4981811680153706, "ce_loss": 0.48818117755211377, "lb_loss": 0.9999999893517516 } }, { "checkpoint_type": "bytes", "bytes_threshold": 205000000, "cumulative_training_bytes": 205003606, "metrics": { "loss": 0.49810324124188643, "ce_loss": 0.4881032507786296, "lb_loss": 0.9999999894042534 } }, { "checkpoint_type": "bytes", "bytes_threshold": 206000000, "cumulative_training_bytes": 206003279, "metrics": { "loss": 0.49805855769024665, "ce_loss": 0.4880585672269898, "lb_loss": 0.9999999893820093 } }, { "checkpoint_type": "bytes", "bytes_threshold": 207000000, "cumulative_training_bytes": 207003222, "metrics": { "loss": 0.4979902569280299, "ce_loss": 0.48799026646477306, "lb_loss": 0.9999999893936597 } }, { "checkpoint_type": "bytes", "bytes_threshold": 208000000, "cumulative_training_bytes": 208003619, "metrics": { "loss": 0.49792424448445954, "ce_loss": 0.4879242540212027, "lb_loss": 0.9999999893286877 } }, { "checkpoint_type": "bytes", "bytes_threshold": 209000000, "cumulative_training_bytes": 209003539, "metrics": { "loss": 0.4978634042536447, "ce_loss": 0.4878634137903879, "lb_loss": 0.9999999893193369 } }, { "checkpoint_type": "bytes", "bytes_threshold": 210000000, "cumulative_training_bytes": 210003625, "metrics": { "loss": 0.49781200564499445, "ce_loss": 0.4878120151817376, "lb_loss": 0.9999999893102662 } }, { "checkpoint_type": "bytes", "bytes_threshold": 211000000, "cumulative_training_bytes": 211003650, "metrics": { "loss": 0.49774608864868786, "ce_loss": 0.487746098185431, "lb_loss": 0.9999999893274939 } }, { "checkpoint_type": "bytes", "bytes_threshold": 212000000, "cumulative_training_bytes": 212004804, "metrics": { "loss": 0.4976889819527559, "ce_loss": 0.48768899148949907, "lb_loss": 0.9999999893955241 } }, { "checkpoint_type": "bytes", "bytes_threshold": 213000000, "cumulative_training_bytes": 213005524, "metrics": { "loss": 0.4976399751921955, "ce_loss": 0.48763998472893866, "lb_loss": 0.9999999895425125 } }, { "checkpoint_type": "bytes", "bytes_threshold": 214000000, "cumulative_training_bytes": 214003509, "metrics": { "loss": 0.49759431418147365, "ce_loss": 0.4875943237182168, "lb_loss": 0.9999999895308 } }, { "epoch": 3, "checkpoint_type": "epoch", "metrics": { "loss": 0.49754748757212847, "ce_loss": 0.48754749710887163, "lb_loss": 0.9999999895226594, "training_bytes": 71629674 }, "cumulative_training_bytes": 214889165, "training_bytes_this_epoch": 71629674 }, { "checkpoint_type": "bytes", "bytes_threshold": 215000000, "cumulative_training_bytes": 215001733, "metrics": { "loss": 0.48779604309483576, "ce_loss": 0.4777960526315789, "lb_loss": 0.9999999874516537 } }, { "checkpoint_type": "bytes", "bytes_threshold": 216000000, "cumulative_training_bytes": 216002545, "metrics": { "loss": 0.48633393328240576, "ce_loss": 0.4763339428191489, "lb_loss": 0.9999999930249884 } }, { "checkpoint_type": "bytes", "bytes_threshold": 217000000, "cumulative_training_bytes": 217001927, "metrics": { "loss": 0.4868852758274025, "ce_loss": 0.47688528536414565, "lb_loss": 0.9999999886467343 } }, { "checkpoint_type": "bytes", "bytes_threshold": 218000000, "cumulative_training_bytes": 218001529, "metrics": { "loss": 0.48702292297276256, "ce_loss": 0.4770229325095057, "lb_loss": 0.9999999900281202 } }, { "checkpoint_type": "bytes", "bytes_threshold": 219000000, "cumulative_training_bytes": 219000917, "metrics": { "loss": 0.4870795767941921, "ce_loss": 0.4770795863309353, "lb_loss": 0.9999999893655023 } }, { "checkpoint_type": "bytes", "bytes_threshold": 220000000, "cumulative_training_bytes": 220000410, "metrics": { "loss": 0.4870146027317754, "ce_loss": 0.47701461226851855, "lb_loss": 0.9999999893070372 } }, { "checkpoint_type": "bytes", "bytes_threshold": 221000000, "cumulative_training_bytes": 221000777, "metrics": { "loss": 0.4872185740789393, "ce_loss": 0.4772185836156825, "lb_loss": 0.9999999897870067 } }, { "checkpoint_type": "bytes", "bytes_threshold": 222000000, "cumulative_training_bytes": 222000038, "metrics": { "loss": 0.4873976870106778, "ce_loss": 0.477397696547421, "lb_loss": 0.9999999891898398 } }, { "checkpoint_type": "bytes", "bytes_threshold": 223000000, "cumulative_training_bytes": 223000213, "metrics": { "loss": 0.4873916069840446, "ce_loss": 0.47739161652078776, "lb_loss": 0.9999999892615994 } }, { "checkpoint_type": "bytes", "bytes_threshold": 224000000, "cumulative_training_bytes": 224005748, "metrics": { "loss": 0.4875181402036851, "ce_loss": 0.4775181497404283, "lb_loss": 0.9999999899820876 } }, { "checkpoint_type": "bytes", "bytes_threshold": 225000000, "cumulative_training_bytes": 225000345, "metrics": { "loss": 0.48744476811100407, "ce_loss": 0.47744477764774723, "lb_loss": 0.9999999900949567 } }, { "checkpoint_type": "bytes", "bytes_threshold": 226000000, "cumulative_training_bytes": 226001108, "metrics": { "loss": 0.4874496127342898, "ce_loss": 0.477449622271033, "lb_loss": 0.9999999902880611 } }, { "checkpoint_type": "bytes", "bytes_threshold": 227000000, "cumulative_training_bytes": 227000927, "metrics": { "loss": 0.4873878217407361, "ce_loss": 0.47738783127747925, "lb_loss": 0.9999999899833913 } }, { "checkpoint_type": "bytes", "bytes_threshold": 228000000, "cumulative_training_bytes": 228001827, "metrics": { "loss": 0.48735308302869007, "ce_loss": 0.47735309256543323, "lb_loss": 0.9999999899134739 } }, { "checkpoint_type": "bytes", "bytes_threshold": 229000000, "cumulative_training_bytes": 229005282, "metrics": { "loss": 0.48745310236602296, "ce_loss": 0.4774531119027661, "lb_loss": 0.9999999899826225 } }, { "checkpoint_type": "bytes", "bytes_threshold": 230000000, "cumulative_training_bytes": 230004719, "metrics": { "loss": 0.48747751614818835, "ce_loss": 0.4774775256849315, "lb_loss": 0.9999999900620045 } }, { "checkpoint_type": "bytes", "bytes_threshold": 231000000, "cumulative_training_bytes": 231005169, "metrics": { "loss": 0.4874888625263993, "ce_loss": 0.47748887206314244, "lb_loss": 0.9999999900221299 } }, { "checkpoint_type": "bytes", "bytes_threshold": 232000000, "cumulative_training_bytes": 232004714, "metrics": { "loss": 0.48755626854828965, "ce_loss": 0.4775562780850328, "lb_loss": 0.9999999898220897 } }, { "checkpoint_type": "bytes", "bytes_threshold": 233000000, "cumulative_training_bytes": 233003723, "metrics": { "loss": 0.48759518457658146, "ce_loss": 0.47759519411332463, "lb_loss": 0.9999999893716081 } }, { "checkpoint_type": "bytes", "bytes_threshold": 234000000, "cumulative_training_bytes": 234003904, "metrics": { "loss": 0.4876028272397966, "ce_loss": 0.47760283677653975, "lb_loss": 0.9999999895032365 } }, { "checkpoint_type": "bytes", "bytes_threshold": 235000000, "cumulative_training_bytes": 235004334, "metrics": { "loss": 0.48761258237502153, "ce_loss": 0.4776125919117647, "lb_loss": 0.9999999895165949 } }, { "checkpoint_type": "bytes", "bytes_threshold": 236000000, "cumulative_training_bytes": 236003979, "metrics": { "loss": 0.4876520594951986, "ce_loss": 0.47765206903194174, "lb_loss": 0.9999999894117835 } }, { "checkpoint_type": "bytes", "bytes_threshold": 237000000, "cumulative_training_bytes": 237005117, "metrics": { "loss": 0.4876482565815019, "ce_loss": 0.47764826611824507, "lb_loss": 0.9999999893483407 } }, { "checkpoint_type": "bytes", "bytes_threshold": 238000000, "cumulative_training_bytes": 238005855, "metrics": { "loss": 0.4875992913974775, "ce_loss": 0.47759930093422065, "lb_loss": 0.9999999891378277 } }, { "checkpoint_type": "bytes", "bytes_threshold": 239000000, "cumulative_training_bytes": 239000123, "metrics": { "loss": 0.48757476040190717, "ce_loss": 0.47757476993865033, "lb_loss": 0.9999999890444469 } }, { "checkpoint_type": "bytes", "bytes_threshold": 240000000, "cumulative_training_bytes": 240005576, "metrics": { "loss": 0.4875802319237986, "ce_loss": 0.4775802414605418, "lb_loss": 0.999999988767087 } }, { "checkpoint_type": "bytes", "bytes_threshold": 241000000, "cumulative_training_bytes": 241005599, "metrics": { "loss": 0.4875744516945663, "ce_loss": 0.4775744612313095, "lb_loss": 0.9999999888460723 } }, { "checkpoint_type": "bytes", "bytes_threshold": 242000000, "cumulative_training_bytes": 242000904, "metrics": { "loss": 0.487592761769455, "ce_loss": 0.47759277130619815, "lb_loss": 0.9999999890078732 } }, { "checkpoint_type": "bytes", "bytes_threshold": 243000000, "cumulative_training_bytes": 243001310, "metrics": { "loss": 0.487603389747618, "ce_loss": 0.4776033992843612, "lb_loss": 0.9999999890475995 } }, { "checkpoint_type": "bytes", "bytes_threshold": 244000000, "cumulative_training_bytes": 244002100, "metrics": { "loss": 0.48759343876102107, "ce_loss": 0.47759344829776423, "lb_loss": 0.9999999889271046 } }, { "checkpoint_type": "bytes", "bytes_threshold": 245000000, "cumulative_training_bytes": 245002388, "metrics": { "loss": 0.487588754218415, "ce_loss": 0.4775887637551582, "lb_loss": 0.9999999890020119 } }, { "checkpoint_type": "bytes", "bytes_threshold": 246000000, "cumulative_training_bytes": 246002524, "metrics": { "loss": 0.48757954186112673, "ce_loss": 0.4775795513978699, "lb_loss": 0.9999999888680561 } }, { "checkpoint_type": "bytes", "bytes_threshold": 247000000, "cumulative_training_bytes": 247002019, "metrics": { "loss": 0.4875975351472443, "ce_loss": 0.47759754468398746, "lb_loss": 0.9999999887314602 } }, { "checkpoint_type": "bytes", "bytes_threshold": 248000000, "cumulative_training_bytes": 248002122, "metrics": { "loss": 0.4875781433849866, "ce_loss": 0.4775781529217298, "lb_loss": 0.9999999888054893 } }, { "checkpoint_type": "bytes", "bytes_threshold": 249000000, "cumulative_training_bytes": 249002522, "metrics": { "loss": 0.4875829262828579, "ce_loss": 0.47758293581960104, "lb_loss": 0.9999999887614486 } }, { "checkpoint_type": "bytes", "bytes_threshold": 250000000, "cumulative_training_bytes": 250002149, "metrics": { "loss": 0.48756439685017966, "ce_loss": 0.4775644063869228, "lb_loss": 0.9999999885391136 } }, { "checkpoint_type": "bytes", "bytes_threshold": 251000000, "cumulative_training_bytes": 251002689, "metrics": { "loss": 0.4875350526253083, "ce_loss": 0.47753506216205144, "lb_loss": 0.9999999884853553 } }, { "checkpoint_type": "bytes", "bytes_threshold": 252000000, "cumulative_training_bytes": 252003095, "metrics": { "loss": 0.4875272196166369, "ce_loss": 0.4775272291533801, "lb_loss": 0.9999999884059843 } }, { "checkpoint_type": "bytes", "bytes_threshold": 253000000, "cumulative_training_bytes": 253004268, "metrics": { "loss": 0.4875543662395338, "ce_loss": 0.47755437577627696, "lb_loss": 0.9999999883492862 } }, { "checkpoint_type": "bytes", "bytes_threshold": 254000000, "cumulative_training_bytes": 254003193, "metrics": { "loss": 0.4876513354897319, "ce_loss": 0.47765134502647505, "lb_loss": 0.9999999883766434 } }, { "checkpoint_type": "bytes", "bytes_threshold": 255000000, "cumulative_training_bytes": 255003102, "metrics": { "loss": 0.48769247368349583, "ce_loss": 0.477692483220239, "lb_loss": 0.9999999884202216 } }, { "checkpoint_type": "bytes", "bytes_threshold": 256000000, "cumulative_training_bytes": 256003674, "metrics": { "loss": 0.48770237559207086, "ce_loss": 0.477702385128814, "lb_loss": 0.9999999885560454 } }, { "checkpoint_type": "bytes", "bytes_threshold": 257000000, "cumulative_training_bytes": 257003033, "metrics": { "loss": 0.4877074163449486, "ce_loss": 0.47770742588169174, "lb_loss": 0.9999999885095444 } }, { "checkpoint_type": "bytes", "bytes_threshold": 258000000, "cumulative_training_bytes": 258002592, "metrics": { "loss": 0.48769506706564497, "ce_loss": 0.47769507660238814, "lb_loss": 0.9999999883997548 } }, { "checkpoint_type": "bytes", "bytes_threshold": 259000000, "cumulative_training_bytes": 259002520, "metrics": { "loss": 0.48769663910510797, "ce_loss": 0.47769664864185113, "lb_loss": 0.9999999883189288 } }, { "checkpoint_type": "bytes", "bytes_threshold": 260000000, "cumulative_training_bytes": 260002567, "metrics": { "loss": 0.4876953234577479, "ce_loss": 0.4776953329944911, "lb_loss": 0.9999999883980466 } }, { "checkpoint_type": "bytes", "bytes_threshold": 261000000, "cumulative_training_bytes": 261002603, "metrics": { "loss": 0.48768529297833446, "ce_loss": 0.4776853025150776, "lb_loss": 0.9999999883590056 } }, { "checkpoint_type": "bytes", "bytes_threshold": 262000000, "cumulative_training_bytes": 262003247, "metrics": { "loss": 0.4876651401586851, "ce_loss": 0.4776651496954283, "lb_loss": 0.9999999884039695 } }, { "checkpoint_type": "bytes", "bytes_threshold": 263000000, "cumulative_training_bytes": 263001525, "metrics": { "loss": 0.4876595168898956, "ce_loss": 0.4776595264266388, "lb_loss": 0.999999988483717 } }, { "checkpoint_type": "bytes", "bytes_threshold": 264000000, "cumulative_training_bytes": 264000697, "metrics": { "loss": 0.4876635352674737, "ce_loss": 0.47766354480421686, "lb_loss": 0.9999999885458544 } }, { "checkpoint_type": "bytes", "bytes_threshold": 265000000, "cumulative_training_bytes": 265000767, "metrics": { "loss": 0.4876318777138177, "ce_loss": 0.4776318872505609, "lb_loss": 0.9999999885773599 } }, { "checkpoint_type": "bytes", "bytes_threshold": 266000000, "cumulative_training_bytes": 266000971, "metrics": { "loss": 0.487606885505512, "ce_loss": 0.4776068950422552, "lb_loss": 0.9999999885455302 } }, { "checkpoint_type": "bytes", "bytes_threshold": 267000000, "cumulative_training_bytes": 267001053, "metrics": { "loss": 0.48759194501645314, "ce_loss": 0.4775919545531963, "lb_loss": 0.9999999885216898 } }, { "checkpoint_type": "bytes", "bytes_threshold": 268000000, "cumulative_training_bytes": 268001478, "metrics": { "loss": 0.4875803958498433, "ce_loss": 0.47758040538658647, "lb_loss": 0.9999999885053877 } }, { "checkpoint_type": "bytes", "bytes_threshold": 269000000, "cumulative_training_bytes": 269001884, "metrics": { "loss": 0.48757717574209775, "ce_loss": 0.4775771852788409, "lb_loss": 0.9999999885353121 } }, { "checkpoint_type": "bytes", "bytes_threshold": 270000000, "cumulative_training_bytes": 270002027, "metrics": { "loss": 0.48756191001983834, "ce_loss": 0.4775619195565815, "lb_loss": 0.9999999885449523 } }, { "checkpoint_type": "bytes", "bytes_threshold": 271000000, "cumulative_training_bytes": 271000479, "metrics": { "loss": 0.48755995797090207, "ce_loss": 0.47755996750764523, "lb_loss": 0.9999999886422447 } }, { "checkpoint_type": "bytes", "bytes_threshold": 272000000, "cumulative_training_bytes": 272000318, "metrics": { "loss": 0.48754512359628627, "ce_loss": 0.47754513313302943, "lb_loss": 0.9999999886373242 } }, { "checkpoint_type": "bytes", "bytes_threshold": 273000000, "cumulative_training_bytes": 273000319, "metrics": { "loss": 0.48753497608081103, "ce_loss": 0.4775349856175542, "lb_loss": 0.9999999886447113 } }, { "checkpoint_type": "bytes", "bytes_threshold": 274000000, "cumulative_training_bytes": 274000061, "metrics": { "loss": 0.48750523007787144, "ce_loss": 0.4775052396146146, "lb_loss": 0.999999988687647 } }, { "checkpoint_type": "bytes", "bytes_threshold": 275000000, "cumulative_training_bytes": 275001198, "metrics": { "loss": 0.48749050841531905, "ce_loss": 0.4774905179520622, "lb_loss": 0.9999999888112947 } }, { "checkpoint_type": "bytes", "bytes_threshold": 276000000, "cumulative_training_bytes": 276001015, "metrics": { "loss": 0.4874637873019478, "ce_loss": 0.47746379683869095, "lb_loss": 0.9999999888731841 } }, { "checkpoint_type": "bytes", "bytes_threshold": 277000000, "cumulative_training_bytes": 277005682, "metrics": { "loss": 0.48745421537514483, "ce_loss": 0.477454224911888, "lb_loss": 0.9999999888943908 } }, { "checkpoint_type": "bytes", "bytes_threshold": 278000000, "cumulative_training_bytes": 278005146, "metrics": { "loss": 0.487448876337917, "ce_loss": 0.47744888587466017, "lb_loss": 0.9999999889306458 } }, { "checkpoint_type": "bytes", "bytes_threshold": 279000000, "cumulative_training_bytes": 279005553, "metrics": { "loss": 0.4874283830896873, "ce_loss": 0.47742839262643044, "lb_loss": 0.9999999888832607 } }, { "checkpoint_type": "bytes", "bytes_threshold": 280000000, "cumulative_training_bytes": 280005819, "metrics": { "loss": 0.4874081643046925, "ce_loss": 0.4774081738414357, "lb_loss": 0.999999988875244 } }, { "checkpoint_type": "bytes", "bytes_threshold": 281000000, "cumulative_training_bytes": 281005718, "metrics": { "loss": 0.48739380087134704, "ce_loss": 0.4773938104080902, "lb_loss": 0.9999999888781381 } }, { "checkpoint_type": "bytes", "bytes_threshold": 282000000, "cumulative_training_bytes": 282000131, "metrics": { "loss": 0.4873868256223998, "ce_loss": 0.477386835159143, "lb_loss": 0.9999999888747105 } }, { "checkpoint_type": "bytes", "bytes_threshold": 283000000, "cumulative_training_bytes": 283000083, "metrics": { "loss": 0.4873677663949743, "ce_loss": 0.4773677759317175, "lb_loss": 0.9999999888153911 } }, { "checkpoint_type": "bytes", "bytes_threshold": 284000000, "cumulative_training_bytes": 284005053, "metrics": { "loss": 0.4873527026668353, "ce_loss": 0.47735271220357844, "lb_loss": 0.9999999888608048 } }, { "checkpoint_type": "bytes", "bytes_threshold": 285000000, "cumulative_training_bytes": 285005105, "metrics": { "loss": 0.4873396199463792, "ce_loss": 0.47733962948312236, "lb_loss": 0.9999999888687697 } }, { "checkpoint_type": "bytes", "bytes_threshold": 286000000, "cumulative_training_bytes": 286004749, "metrics": { "loss": 0.4873238175807375, "ce_loss": 0.47732382711748067, "lb_loss": 0.9999999888616331 } }, { "epoch": 4, "checkpoint_type": "epoch", "metrics": { "loss": 0.48730673014812387, "ce_loss": 0.47730673968486703, "lb_loss": 0.9999999889466027, "training_bytes": 71629719 }, "cumulative_training_bytes": 286518884, "training_bytes_this_epoch": 71629719 }, { "checkpoint_type": "bytes", "bytes_threshold": 287000000, "cumulative_training_bytes": 287004957, "metrics": { "loss": 0.4769635956461837, "ce_loss": 0.46696360518292684, "lb_loss": 0.9999999905504832 } }, { "checkpoint_type": "bytes", "bytes_threshold": 288000000, "cumulative_training_bytes": 288004914, "metrics": { "loss": 0.4783764844871612, "ce_loss": 0.46837649402390436, "lb_loss": 0.9999999943007511 } }, { "checkpoint_type": "bytes", "bytes_threshold": 289000000, "cumulative_training_bytes": 289004460, "metrics": { "loss": 0.4789918059394473, "ce_loss": 0.46899181547619045, "lb_loss": 0.9999999903497242 } }, { "checkpoint_type": "bytes", "bytes_threshold": 290000000, "cumulative_training_bytes": 290004004, "metrics": { "loss": 0.4792706027298103, "ce_loss": 0.4692706122665535, "lb_loss": 0.999999990791133 } }, { "checkpoint_type": "bytes", "bytes_threshold": 291000000, "cumulative_training_bytes": 291005649, "metrics": { "loss": 0.47934778070072387, "ce_loss": 0.46934779023746703, "lb_loss": 0.9999999906425425 } }, { "checkpoint_type": "bytes", "bytes_threshold": 292000000, "cumulative_training_bytes": 292000020, "metrics": { "loss": 0.4794418101176845, "ce_loss": 0.46944181965442766, "lb_loss": 0.9999999907310272 } }, { "checkpoint_type": "bytes", "bytes_threshold": 293000000, "cumulative_training_bytes": 293000330, "metrics": { "loss": 0.47953124046325685, "ce_loss": 0.46953125, "lb_loss": 0.9999999902564096 } }, { "checkpoint_type": "bytes", "bytes_threshold": 294000000, "cumulative_training_bytes": 294005832, "metrics": { "loss": 0.47959454332886947, "ce_loss": 0.46959455286561264, "lb_loss": 0.9999999900109212 } }, { "checkpoint_type": "bytes", "bytes_threshold": 295000000, "cumulative_training_bytes": 295000237, "metrics": { "loss": 0.4795432345316448, "ce_loss": 0.469543244068388, "lb_loss": 0.9999999901421488 } }, { "checkpoint_type": "bytes", "bytes_threshold": 296000000, "cumulative_training_bytes": 296000199, "metrics": { "loss": 0.47965827893079743, "ce_loss": 0.4696582884675406, "lb_loss": 0.9999999900286861 } }, { "checkpoint_type": "bytes", "bytes_threshold": 297000000, "cumulative_training_bytes": 297005707, "metrics": { "loss": 0.47973316625332457, "ce_loss": 0.46973317579006774, "lb_loss": 0.9999999901443787 } }, { "checkpoint_type": "bytes", "bytes_threshold": 298000000, "cumulative_training_bytes": 298005767, "metrics": { "loss": 0.47985183068736814, "ce_loss": 0.4698518402241113, "lb_loss": 0.999999989989122 } }, { "checkpoint_type": "bytes", "bytes_threshold": 299000000, "cumulative_training_bytes": 299000563, "metrics": { "loss": 0.4799011210346177, "ce_loss": 0.46990113057136085, "lb_loss": 0.9999999897408791 } }, { "checkpoint_type": "bytes", "bytes_threshold": 300000000, "cumulative_training_bytes": 300005848, "metrics": { "loss": 0.47993609182350255, "ce_loss": 0.4699361013602457, "lb_loss": 0.9999999892507639 } }, { "checkpoint_type": "bytes", "bytes_threshold": 301000000, "cumulative_training_bytes": 301000438, "metrics": { "loss": 0.4799799723288065, "ce_loss": 0.4699799818655497, "lb_loss": 0.9999999894772348 } }, { "checkpoint_type": "bytes", "bytes_threshold": 302000000, "cumulative_training_bytes": 302002460, "metrics": { "loss": 0.47996920037342505, "ce_loss": 0.4699692099101682, "lb_loss": 0.9999999895418455 } }, { "checkpoint_type": "bytes", "bytes_threshold": 303000000, "cumulative_training_bytes": 303002560, "metrics": { "loss": 0.4800305672137057, "ce_loss": 0.47003057675044885, "lb_loss": 0.9999999895344088 } }, { "checkpoint_type": "bytes", "bytes_threshold": 304000000, "cumulative_training_bytes": 304002632, "metrics": { "loss": 0.4800855735709075, "ce_loss": 0.47008558310765064, "lb_loss": 0.9999999897497768 } }, { "checkpoint_type": "bytes", "bytes_threshold": 305000000, "cumulative_training_bytes": 305003714, "metrics": { "loss": 0.4801058582186203, "ce_loss": 0.47010586775536345, "lb_loss": 0.9999999895028644 } }, { "checkpoint_type": "bytes", "bytes_threshold": 306000000, "cumulative_training_bytes": 306003953, "metrics": { "loss": 0.4801525383065132, "ce_loss": 0.4701525478432564, "lb_loss": 0.9999999896615273 } }, { "checkpoint_type": "bytes", "bytes_threshold": 307000000, "cumulative_training_bytes": 307005112, "metrics": { "loss": 0.48019917422517533, "ce_loss": 0.4701991837619185, "lb_loss": 0.9999999893052631 } }, { "checkpoint_type": "bytes", "bytes_threshold": 308000000, "cumulative_training_bytes": 308003803, "metrics": { "loss": 0.4802888162208326, "ce_loss": 0.47028882575757575, "lb_loss": 0.9999999895732922 } }, { "checkpoint_type": "bytes", "bytes_threshold": 309000000, "cumulative_training_bytes": 309001801, "metrics": { "loss": 0.48038025089363323, "ce_loss": 0.4703802604303764, "lb_loss": 0.9999999895350624 } }, { "checkpoint_type": "bytes", "bytes_threshold": 310000000, "cumulative_training_bytes": 310001190, "metrics": { "loss": 0.4804092607190532, "ce_loss": 0.4704092702557964, "lb_loss": 0.9999999891696197 } }, { "checkpoint_type": "bytes", "bytes_threshold": 311000000, "cumulative_training_bytes": 311001090, "metrics": { "loss": 0.4804406215062832, "ce_loss": 0.47044063104302636, "lb_loss": 0.9999999890357422 } }, { "checkpoint_type": "bytes", "bytes_threshold": 312000000, "cumulative_training_bytes": 312001765, "metrics": { "loss": 0.4804813137621421, "ce_loss": 0.47048132329888526, "lb_loss": 0.9999999891061645 } }, { "checkpoint_type": "bytes", "bytes_threshold": 313000000, "cumulative_training_bytes": 313002485, "metrics": { "loss": 0.4805145679772233, "ce_loss": 0.47051457751396647, "lb_loss": 0.9999999891313095 } }, { "checkpoint_type": "bytes", "bytes_threshold": 314000000, "cumulative_training_bytes": 314002181, "metrics": { "loss": 0.4805563366895704, "ce_loss": 0.47055634622631354, "lb_loss": 0.9999999891931286 } }, { "checkpoint_type": "bytes", "bytes_threshold": 315000000, "cumulative_training_bytes": 315002170, "metrics": { "loss": 0.48057691104293687, "ce_loss": 0.47057692057968004, "lb_loss": 0.9999999892629904 } }, { "checkpoint_type": "bytes", "bytes_threshold": 316000000, "cumulative_training_bytes": 316003419, "metrics": { "loss": 0.4805855045640196, "ce_loss": 0.47058551410076277, "lb_loss": 0.9999999891486526 } }, { "checkpoint_type": "bytes", "bytes_threshold": 317000000, "cumulative_training_bytes": 317003347, "metrics": { "loss": 0.48059125914894896, "ce_loss": 0.4705912686856921, "lb_loss": 0.9999999891343891 } }, { "checkpoint_type": "bytes", "bytes_threshold": 318000000, "cumulative_training_bytes": 318002949, "metrics": { "loss": 0.48062565129502377, "ce_loss": 0.47062566083176693, "lb_loss": 0.999999989210663 } }, { "checkpoint_type": "bytes", "bytes_threshold": 319000000, "cumulative_training_bytes": 319002955, "metrics": { "loss": 0.4806742936150149, "ce_loss": 0.47067430315175807, "lb_loss": 0.9999999891845097 } }, { "checkpoint_type": "bytes", "bytes_threshold": 320000000, "cumulative_training_bytes": 320003241, "metrics": { "loss": 0.4806913787630094, "ce_loss": 0.47069138829975254, "lb_loss": 0.9999999892757991 } }, { "checkpoint_type": "bytes", "bytes_threshold": 321000000, "cumulative_training_bytes": 321002538, "metrics": { "loss": 0.4807014395365364, "ce_loss": 0.47070144907327954, "lb_loss": 0.9999999892185867 } }, { "checkpoint_type": "bytes", "bytes_threshold": 322000000, "cumulative_training_bytes": 322002968, "metrics": { "loss": 0.4807102816990807, "ce_loss": 0.47071029123582386, "lb_loss": 0.9999999893733547 } }, { "checkpoint_type": "bytes", "bytes_threshold": 323000000, "cumulative_training_bytes": 323002242, "metrics": { "loss": 0.48073701398312707, "ce_loss": 0.47073702351987023, "lb_loss": 0.9999999893552776 } }, { "checkpoint_type": "bytes", "bytes_threshold": 324000000, "cumulative_training_bytes": 324001740, "metrics": { "loss": 0.48075183566376206, "ce_loss": 0.4707518452005052, "lb_loss": 0.9999999893193445 } }, { "checkpoint_type": "bytes", "bytes_threshold": 325000000, "cumulative_training_bytes": 325002302, "metrics": { "loss": 0.48077099278910645, "ce_loss": 0.4707710023258496, "lb_loss": 0.9999999893127763 } }, { "checkpoint_type": "bytes", "bytes_threshold": 326000000, "cumulative_training_bytes": 326001646, "metrics": { "loss": 0.4807988396651453, "ce_loss": 0.47079884920188847, "lb_loss": 0.9999999894673447 } }, { "checkpoint_type": "bytes", "bytes_threshold": 327000000, "cumulative_training_bytes": 327000938, "metrics": { "loss": 0.4808235976661511, "ce_loss": 0.4708236072028943, "lb_loss": 0.9999999893267519 } }, { "checkpoint_type": "bytes", "bytes_threshold": 328000000, "cumulative_training_bytes": 328001021, "metrics": { "loss": 0.48082542956810703, "ce_loss": 0.4708254391048502, "lb_loss": 0.9999999892609606 } }, { "checkpoint_type": "bytes", "bytes_threshold": 329000000, "cumulative_training_bytes": 329000730, "metrics": { "loss": 0.4808347929252989, "ce_loss": 0.47083480246204207, "lb_loss": 0.9999999892314774 } }, { "checkpoint_type": "bytes", "bytes_threshold": 330000000, "cumulative_training_bytes": 330005619, "metrics": { "loss": 0.48085167946856366, "ce_loss": 0.47085168900530683, "lb_loss": 0.9999999891967088 } }, { "checkpoint_type": "bytes", "bytes_threshold": 331000000, "cumulative_training_bytes": 331004616, "metrics": { "loss": 0.4808701624671142, "ce_loss": 0.4708701720038574, "lb_loss": 0.9999999892492819 } }, { "checkpoint_type": "bytes", "bytes_threshold": 332000000, "cumulative_training_bytes": 332005466, "metrics": { "loss": 0.4808840215384487, "ce_loss": 0.4708840310751919, "lb_loss": 0.999999989369329 } }, { "checkpoint_type": "bytes", "bytes_threshold": 333000000, "cumulative_training_bytes": 333005711, "metrics": { "loss": 0.4808831132356601, "ce_loss": 0.47088312277240324, "lb_loss": 0.999999989431101 } }, { "checkpoint_type": "bytes", "bytes_threshold": 334000000, "cumulative_training_bytes": 334000923, "metrics": { "loss": 0.480881291871532, "ce_loss": 0.47088130140827517, "lb_loss": 0.999999989399822 } }, { "checkpoint_type": "bytes", "bytes_threshold": 335000000, "cumulative_training_bytes": 335005467, "metrics": { "loss": 0.48089165852830446, "ce_loss": 0.4708916680650476, "lb_loss": 0.9999999894233398 } }, { "checkpoint_type": "bytes", "bytes_threshold": 336000000, "cumulative_training_bytes": 336004881, "metrics": { "loss": 0.4808885490696182, "ce_loss": 0.47088855860636136, "lb_loss": 0.9999999894375124 } }, { "checkpoint_type": "bytes", "bytes_threshold": 337000000, "cumulative_training_bytes": 337004670, "metrics": { "loss": 0.4809070810486999, "ce_loss": 0.47090709058544306, "lb_loss": 0.9999999893812634 } }, { "checkpoint_type": "bytes", "bytes_threshold": 338000000, "cumulative_training_bytes": 338005568, "metrics": { "loss": 0.48091389403755863, "ce_loss": 0.4709139035743018, "lb_loss": 0.9999999893751518 } }, { "checkpoint_type": "bytes", "bytes_threshold": 339000000, "cumulative_training_bytes": 339000221, "metrics": { "loss": 0.480916728187352, "ce_loss": 0.47091673772409515, "lb_loss": 0.9999999893008689 } }, { "checkpoint_type": "bytes", "bytes_threshold": 340000000, "cumulative_training_bytes": 340000243, "metrics": { "loss": 0.4809181364164544, "ce_loss": 0.4709181459531976, "lb_loss": 0.999999989316273 } }, { "checkpoint_type": "bytes", "bytes_threshold": 341000000, "cumulative_training_bytes": 341005321, "metrics": { "loss": 0.480930711371706, "ce_loss": 0.4709307209084492, "lb_loss": 0.9999999892610658 } }, { "checkpoint_type": "bytes", "bytes_threshold": 342000000, "cumulative_training_bytes": 342004954, "metrics": { "loss": 0.4809334829848522, "ce_loss": 0.4709334925215954, "lb_loss": 0.9999999893020564 } }, { "checkpoint_type": "bytes", "bytes_threshold": 343000000, "cumulative_training_bytes": 343000349, "metrics": { "loss": 0.48092697284932284, "ce_loss": 0.470926982386066, "lb_loss": 0.9999999892842776 } }, { "checkpoint_type": "bytes", "bytes_threshold": 344000000, "cumulative_training_bytes": 344005952, "metrics": { "loss": 0.4809306952368029, "ce_loss": 0.47093070477354604, "lb_loss": 0.9999999892754586 } }, { "checkpoint_type": "bytes", "bytes_threshold": 345000000, "cumulative_training_bytes": 345000555, "metrics": { "loss": 0.48093492924702697, "ce_loss": 0.47093493878377013, "lb_loss": 0.9999999892527089 } }, { "checkpoint_type": "bytes", "bytes_threshold": 346000000, "cumulative_training_bytes": 346000337, "metrics": { "loss": 0.4809374438307459, "ce_loss": 0.47093745336748905, "lb_loss": 0.9999999893088763 } }, { "checkpoint_type": "bytes", "bytes_threshold": 347000000, "cumulative_training_bytes": 347005484, "metrics": { "loss": 0.4809448199364519, "ce_loss": 0.47094482947319505, "lb_loss": 0.999999989370058 } }, { "checkpoint_type": "bytes", "bytes_threshold": 348000000, "cumulative_training_bytes": 348000477, "metrics": { "loss": 0.4809486189521885, "ce_loss": 0.47094862848893165, "lb_loss": 0.999999989364099 } }, { "checkpoint_type": "bytes", "bytes_threshold": 349000000, "cumulative_training_bytes": 349000951, "metrics": { "loss": 0.48095005379785294, "ce_loss": 0.4709500633345961, "lb_loss": 0.9999999893762722 } }, { "checkpoint_type": "bytes", "bytes_threshold": 350000000, "cumulative_training_bytes": 350001130, "metrics": { "loss": 0.4809361505117281, "ce_loss": 0.4709361600484713, "lb_loss": 0.999999989365838 } }, { "checkpoint_type": "bytes", "bytes_threshold": 351000000, "cumulative_training_bytes": 351000377, "metrics": { "loss": 0.4809302063483628, "ce_loss": 0.470930215885106, "lb_loss": 0.999999989306499 } }, { "checkpoint_type": "bytes", "bytes_threshold": 352000000, "cumulative_training_bytes": 352004390, "metrics": { "loss": 0.48093818933151383, "ce_loss": 0.470938198868257, "lb_loss": 0.9999999893145735 } }, { "checkpoint_type": "bytes", "bytes_threshold": 353000000, "cumulative_training_bytes": 353003368, "metrics": { "loss": 0.48094108521450285, "ce_loss": 0.470941094751246, "lb_loss": 0.9999999893214534 } }, { "checkpoint_type": "bytes", "bytes_threshold": 354000000, "cumulative_training_bytes": 354002832, "metrics": { "loss": 0.4809529716118759, "ce_loss": 0.47095298114861905, "lb_loss": 0.9999999893124508 } }, { "checkpoint_type": "bytes", "bytes_threshold": 355000000, "cumulative_training_bytes": 355002761, "metrics": { "loss": 0.48095117955086697, "ce_loss": 0.47095118908761013, "lb_loss": 0.9999999892213132 } }, { "checkpoint_type": "bytes", "bytes_threshold": 356000000, "cumulative_training_bytes": 356002307, "metrics": { "loss": 0.48095010436089797, "ce_loss": 0.47095011389764113, "lb_loss": 0.9999999893053746 } }, { "checkpoint_type": "bytes", "bytes_threshold": 357000000, "cumulative_training_bytes": 357002171, "metrics": { "loss": 0.48094610834858254, "ce_loss": 0.4709461178853257, "lb_loss": 0.9999999892319346 } }, { "checkpoint_type": "bytes", "bytes_threshold": 358000000, "cumulative_training_bytes": 358002343, "metrics": { "loss": 0.4809467508721634, "ce_loss": 0.47094676040890654, "lb_loss": 0.9999999893085618 } }, { "epoch": 5, "checkpoint_type": "epoch", "metrics": { "loss": 0.4809443117192456, "ce_loss": 0.4709443212559888, "lb_loss": 0.9999999892863285, "training_bytes": 71629726 }, "cumulative_training_bytes": 358148610, "training_bytes_this_epoch": 71629726 } ] }