MOSES-340M / metadata.json
jordiferrero's picture
Add files using upload-large-folder tool
17fb1ec verified
{
"run_name": "run_large_20251112_071557",
"timestamp": "20251112_071557",
"phase": "large",
"config": {
"arch_layout": [
"m4",
[
"T22"
],
"m4"
],
"d_model": [
1024,
1536
],
"d_intermediate": [
0,
4096
],
"vocab_size": 256,
"ssm_cfg": {
"chunk_size": 256,
"d_conv": 4,
"d_state": 128,
"expand": 2
},
"attn_cfg": {
"num_heads": [
16,
16
],
"rotary_emb_dim": [
32,
48
],
"window_size": [
1023,
-1
]
},
"tie_embeddings": false
},
"training_args": {
"data": "datasets/moses/smiles-molecules-moses_all.csv",
"max_samples": null,
"batch_size": 16,
"epochs": 5,
"lr": 0.0001,
"weight_decay": 0.1,
"gradient_accumulation": 8,
"concatenate": true,
"num_concatenate": 10,
"concatenate_separator": " ",
"checkpoint_bytes": 1000000,
"num_test_samples": 5,
"num_visualize": 5,
"skip_visualization": false
},
"dataset_info": {
"train_size": 193691,
"test_size": 5,
"test_smiles_file": "checkpoints/run_large_20251112_071557/test_smiles.txt"
},
"model_info": {
"num_parameters": 622923776,
"device": "cuda",
"dtype": "torch.bfloat16",
"use_amp": true
},
"training_history": [
{
"checkpoint_type": "bytes",
"bytes_threshold": 1000000,
"cumulative_training_bytes": 1005180,
"metrics": {
"loss": 2.7717187923543594,
"ce_loss": 2.76171875,
"lb_loss": 0.9999999957926133
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 2000000,
"cumulative_training_bytes": 2005824,
"metrics": {
"loss": 2.068881838019267,
"ce_loss": 2.0588818215339235,
"lb_loss": 0.999999996131852
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 3000000,
"cumulative_training_bytes": 3000192,
"metrics": {
"loss": 1.7394671552985377,
"ce_loss": 1.7294671474358974,
"lb_loss": 0.9999999945920836
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 4000000,
"cumulative_training_bytes": 4000199,
"metrics": {
"loss": 1.532599624444747,
"ce_loss": 1.5225996209319526,
"lb_loss": 0.9999999955913724
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 5000000,
"cumulative_training_bytes": 5000601,
"metrics": {
"loss": 1.3878337656957864,
"ce_loss": 1.3778337647928993,
"lb_loss": 0.9999999956971795
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 6000000,
"cumulative_training_bytes": 6001219,
"metrics": {
"loss": 1.2813033152378992,
"ce_loss": 1.2713033160749507,
"lb_loss": 0.9999999946508652
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 7000000,
"cumulative_training_bytes": 7000167,
"metrics": {
"loss": 1.1998906361280814,
"ce_loss": 1.189890638207946,
"lb_loss": 0.9999999956669489
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 8000000,
"cumulative_training_bytes": 8005274,
"metrics": {
"loss": 1.1353377880031588,
"ce_loss": 1.1253377910199556,
"lb_loss": 0.9999999970484027
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 9000000,
"cumulative_training_bytes": 9004340,
"metrics": {
"loss": 1.082989381114694,
"ce_loss": 1.0729893848554533,
"lb_loss": 0.9999999974153045
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 10000000,
"cumulative_training_bytes": 10005356,
"metrics": {
"loss": 1.0403814267858396,
"ce_loss": 1.0303814311058546,
"lb_loss": 0.999999997638373
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 11000000,
"cumulative_training_bytes": 11004976,
"metrics": {
"loss": 1.0042456269264222,
"ce_loss": 0.9942456317204301,
"lb_loss": 0.9999999972761319
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 12000000,
"cumulative_training_bytes": 12005836,
"metrics": {
"loss": 0.9732034663486622,
"ce_loss": 0.9632034715377032,
"lb_loss": 0.9999999970036108
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 13000000,
"cumulative_training_bytes": 13000402,
"metrics": {
"loss": 0.9467905753842798,
"ce_loss": 0.9367905809057806,
"lb_loss": 0.9999999964188379
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 14000000,
"cumulative_training_bytes": 14000972,
"metrics": {
"loss": 0.9233459631266074,
"ce_loss": 0.9133459689349113,
"lb_loss": 0.9999999965234823
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 15000000,
"cumulative_training_bytes": 15000856,
"metrics": {
"loss": 0.9026359037064472,
"ce_loss": 0.8926359097633136,
"lb_loss": 0.9999999959087936
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 16000000,
"cumulative_training_bytes": 16000794,
"metrics": {
"loss": 0.8842589041361442,
"ce_loss": 0.874258910410503,
"lb_loss": 0.9999999957015882
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 17000000,
"cumulative_training_bytes": 17000571,
"metrics": {
"loss": 0.8677488623120169,
"ce_loss": 0.8577488687782805,
"lb_loss": 0.9999999955602527
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 18000000,
"cumulative_training_bytes": 18004713,
"metrics": {
"loss": 0.8526691315482018,
"ce_loss": 0.8426691381860006,
"lb_loss": 0.9999999956319962
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 19000000,
"cumulative_training_bytes": 19004995,
"metrics": {
"loss": 0.8389714284525119,
"ce_loss": 0.8289714352428393,
"lb_loss": 0.9999999954535685
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 20000000,
"cumulative_training_bytes": 20004594,
"metrics": {
"loss": 0.8264131751930668,
"ce_loss": 0.8164131821206744,
"lb_loss": 0.9999999955926764
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 21000000,
"cumulative_training_bytes": 21004699,
"metrics": {
"loss": 0.8155721760467745,
"ce_loss": 0.8055721830985916,
"lb_loss": 0.9999999949629877
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 22000000,
"cumulative_training_bytes": 22004049,
"metrics": {
"loss": 0.8053173244432212,
"ce_loss": 0.7953173316079591,
"lb_loss": 0.9999999948072318
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 23000000,
"cumulative_training_bytes": 23003811,
"metrics": {
"loss": 0.795621416021767,
"ce_loss": 0.785621423289609,
"lb_loss": 0.9999999948336509
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 24000000,
"cumulative_training_bytes": 24003387,
"metrics": {
"loss": 0.7866196854525316,
"ce_loss": 0.7766196928148879,
"lb_loss": 0.999999995078246
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 25000000,
"cumulative_training_bytes": 25003311,
"metrics": {
"loss": 0.7783351499098937,
"ce_loss": 0.7683351573592049,
"lb_loss": 0.9999999949365671
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 26000000,
"cumulative_training_bytes": 26003789,
"metrics": {
"loss": 0.7705019833691698,
"ce_loss": 0.7605019908987486,
"lb_loss": 0.9999999948464698
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 27000000,
"cumulative_training_bytes": 27005374,
"metrics": {
"loss": 0.7630905858996038,
"ce_loss": 0.7530905935035057,
"lb_loss": 0.999999994763045
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 28000000,
"cumulative_training_bytes": 28004376,
"metrics": {
"loss": 0.7561506895592821,
"ce_loss": 0.7461506972321994,
"lb_loss": 0.9999999949374462
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 29000000,
"cumulative_training_bytes": 29003450,
"metrics": {
"loss": 0.7496948872036664,
"ce_loss": 0.7396948949408405,
"lb_loss": 0.9999999950268667
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 30000000,
"cumulative_training_bytes": 30002714,
"metrics": {
"loss": 0.7436015993809234,
"ce_loss": 0.7336016071780714,
"lb_loss": 0.999999995180851
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 31000000,
"cumulative_training_bytes": 31003719,
"metrics": {
"loss": 0.7378081684349147,
"ce_loss": 0.727808176288168,
"lb_loss": 0.9999999952907781
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 32000000,
"cumulative_training_bytes": 32002795,
"metrics": {
"loss": 0.7323312628928164,
"ce_loss": 0.7223312707986689,
"lb_loss": 0.9999999951954843
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 33000000,
"cumulative_training_bytes": 33002982,
"metrics": {
"loss": 0.7271596202716984,
"ce_loss": 0.7171596282269631,
"lb_loss": 0.9999999955120203
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 34000000,
"cumulative_training_bytes": 34002814,
"metrics": {
"loss": 0.7221670737365277,
"ce_loss": 0.7121670817382982,
"lb_loss": 0.9999999956336253
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 35000000,
"cumulative_training_bytes": 35001491,
"metrics": {
"loss": 0.7178196230395731,
"ce_loss": 0.7078196310851927,
"lb_loss": 0.9999999956173056
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 36000000,
"cumulative_training_bytes": 36002685,
"metrics": {
"loss": 0.7134658659885629,
"ce_loss": 0.7034658740755957,
"lb_loss": 0.9999999959055478
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 37000000,
"cumulative_training_bytes": 37002731,
"metrics": {
"loss": 0.7092018776668845,
"ce_loss": 0.6992018857930924,
"lb_loss": 0.9999999959590071
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 38000000,
"cumulative_training_bytes": 38001457,
"metrics": {
"loss": 0.7051239603872,
"ce_loss": 0.6951239685505216,
"lb_loss": 0.9999999958890149
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 39000000,
"cumulative_training_bytes": 39001008,
"metrics": {
"loss": 0.7012219511840528,
"ce_loss": 0.6912219593825849,
"lb_loss": 0.9999999958406953
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 40000000,
"cumulative_training_bytes": 40002237,
"metrics": {
"loss": 0.6974433710758123,
"ce_loss": 0.6874433793077948,
"lb_loss": 0.9999999959270306
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 41000000,
"cumulative_training_bytes": 41003402,
"metrics": {
"loss": 0.6939161623711194,
"ce_loss": 0.6839161706349206,
"lb_loss": 0.9999999961123666
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 42000000,
"cumulative_training_bytes": 42003174,
"metrics": {
"loss": 0.6905112212804828,
"ce_loss": 0.680511229574588,
"lb_loss": 0.9999999960034074
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 43000000,
"cumulative_training_bytes": 43003031,
"metrics": {
"loss": 0.6871942206602144,
"ce_loss": 0.6771942289832141,
"lb_loss": 0.9999999960389319
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 44000000,
"cumulative_training_bytes": 44004167,
"metrics": {
"loss": 0.6840069627062967,
"ce_loss": 0.6740069710568778,
"lb_loss": 0.9999999959686519
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 45000000,
"cumulative_training_bytes": 45004421,
"metrics": {
"loss": 0.6809336089975044,
"ce_loss": 0.6709336173744412,
"lb_loss": 0.9999999959798602
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 46000000,
"cumulative_training_bytes": 46004726,
"metrics": {
"loss": 0.6780240068711652,
"ce_loss": 0.6680240152733119,
"lb_loss": 0.9999999960059138
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 47000000,
"cumulative_training_bytes": 47003808,
"metrics": {
"loss": 0.6751821455578549,
"ce_loss": 0.6651821539841389,
"lb_loss": 0.9999999959183123
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 48000000,
"cumulative_training_bytes": 48003562,
"metrics": {
"loss": 0.6724254583939221,
"ce_loss": 0.6624254668433378,
"lb_loss": 0.9999999958784414
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 49000000,
"cumulative_training_bytes": 49002795,
"metrics": {
"loss": 0.6697642958932843,
"ce_loss": 0.6597643043648878,
"lb_loss": 0.9999999957538348
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 50000000,
"cumulative_training_bytes": 50002223,
"metrics": {
"loss": 0.6672086428501326,
"ce_loss": 0.6572086513430363,
"lb_loss": 0.9999999955707352
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 51000000,
"cumulative_training_bytes": 51002866,
"metrics": {
"loss": 0.6648764597290906,
"ce_loss": 0.6548764682424594,
"lb_loss": 0.9999999957958673
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 52000000,
"cumulative_training_bytes": 52002541,
"metrics": {
"loss": 0.662538409517926,
"ce_loss": 0.6525384180509728,
"lb_loss": 0.9999999958292347
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 53000000,
"cumulative_training_bytes": 53002612,
"metrics": {
"loss": 0.6602227322941887,
"ce_loss": 0.650222740846171,
"lb_loss": 0.9999999958347279
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 54000000,
"cumulative_training_bytes": 54002019,
"metrics": {
"loss": 0.6579624243212044,
"ce_loss": 0.6479624328914211,
"lb_loss": 0.9999999960620576
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 55000000,
"cumulative_training_bytes": 55003666,
"metrics": {
"loss": 0.6557700134647078,
"ce_loss": 0.6457700220524957,
"lb_loss": 0.9999999959477048
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 56000000,
"cumulative_training_bytes": 56004562,
"metrics": {
"loss": 0.653625767280107,
"ce_loss": 0.6436257758848389,
"lb_loss": 0.9999999958311384
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 57000000,
"cumulative_training_bytes": 57003399,
"metrics": {
"loss": 0.6515486406938451,
"ce_loss": 0.6415486493149263,
"lb_loss": 0.999999995693914
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 58000000,
"cumulative_training_bytes": 58004263,
"metrics": {
"loss": 0.6495315760820968,
"ce_loss": 0.6395315847189635,
"lb_loss": 0.9999999955675012
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 59000000,
"cumulative_training_bytes": 59005055,
"metrics": {
"loss": 0.6475570260450345,
"ce_loss": 0.6375570346971521,
"lb_loss": 0.999999995678484
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 60000000,
"cumulative_training_bytes": 60005248,
"metrics": {
"loss": 0.6456633104091682,
"ce_loss": 0.635663319076028,
"lb_loss": 0.9999999955741744
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 61000000,
"cumulative_training_bytes": 61004559,
"metrics": {
"loss": 0.6438197430768832,
"ce_loss": 0.633819751758002,
"lb_loss": 0.9999999955889094
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 62000000,
"cumulative_training_bytes": 62004749,
"metrics": {
"loss": 0.642214755655211,
"ce_loss": 0.6322147643501288,
"lb_loss": 0.999999995517849
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 63000000,
"cumulative_training_bytes": 63005338,
"metrics": {
"loss": 0.6405543280803199,
"ce_loss": 0.6305543367885988,
"lb_loss": 0.999999995768115
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 64000000,
"cumulative_training_bytes": 64005014,
"metrics": {
"loss": 0.638879157175052,
"ce_loss": 0.6288791658962744,
"lb_loss": 0.9999999957625986
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 65000000,
"cumulative_training_bytes": 65000026,
"metrics": {
"loss": 0.6372423956814168,
"ce_loss": 0.6272424044151115,
"lb_loss": 0.9999999956700495
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 66000000,
"cumulative_training_bytes": 66005586,
"metrics": {
"loss": 0.6356362670272628,
"ce_loss": 0.6256362757731959,
"lb_loss": 0.9999999957306938
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 67000000,
"cumulative_training_bytes": 67004497,
"metrics": {
"loss": 0.6340658579854652,
"ce_loss": 0.6240658667432003,
"lb_loss": 0.9999999956786106
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 68000000,
"cumulative_training_bytes": 68004301,
"metrics": {
"loss": 0.6325890467972416,
"ce_loss": 0.6225890555664317,
"lb_loss": 0.9999999957162241
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 69000000,
"cumulative_training_bytes": 69003767,
"metrics": {
"loss": 0.6311319261365108,
"ce_loss": 0.6211319349168238,
"lb_loss": 0.9999999957016372
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 70000000,
"cumulative_training_bytes": 70004275,
"metrics": {
"loss": 0.6296715307554119,
"ce_loss": 0.6196715395465303,
"lb_loss": 0.9999999958637974
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 71000000,
"cumulative_training_bytes": 71003661,
"metrics": {
"loss": 0.6282464104692141,
"ce_loss": 0.6182464192708333,
"lb_loss": 0.9999999958972137
}
},
{
"epoch": 1,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.6273426957392759,
"ce_loss": 0.6173427045473319,
"lb_loss": 0.9999999960463795,
"training_bytes": 71629728
},
"cumulative_training_bytes": 71629728,
"training_bytes_this_epoch": 71629728
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 72000000,
"cumulative_training_bytes": 72002782,
"metrics": {
"loss": 0.5245709230029394,
"ce_loss": 0.5145709325396826,
"lb_loss": 0.9999999943233672
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 73000000,
"cumulative_training_bytes": 73002022,
"metrics": {
"loss": 0.5237897264546362,
"ce_loss": 0.5137897359913793,
"lb_loss": 0.9999999958893349
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 74000000,
"cumulative_training_bytes": 74002053,
"metrics": {
"loss": 0.523175060600414,
"ce_loss": 0.5131750701371571,
"lb_loss": 0.9999999982163199
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 75000000,
"cumulative_training_bytes": 75001463,
"metrics": {
"loss": 0.5227672602000989,
"ce_loss": 0.5127672697368421,
"lb_loss": 0.9999999993725827
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 76000000,
"cumulative_training_bytes": 76001540,
"metrics": {
"loss": 0.5225935501723231,
"ce_loss": 0.5125935597090663,
"lb_loss": 0.999999997419014
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 77000000,
"cumulative_training_bytes": 77001527,
"metrics": {
"loss": 0.5224307276603934,
"ce_loss": 0.5124307371971366,
"lb_loss": 0.9999999961926549
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 78000000,
"cumulative_training_bytes": 78001457,
"metrics": {
"loss": 0.522277294316553,
"ce_loss": 0.5122773038532962,
"lb_loss": 0.9999999968454366
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 79000000,
"cumulative_training_bytes": 79001702,
"metrics": {
"loss": 0.522033811189581,
"ce_loss": 0.5120338207263242,
"lb_loss": 0.9999999971297924
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 80000000,
"cumulative_training_bytes": 80002593,
"metrics": {
"loss": 0.5217684312759777,
"ce_loss": 0.5117684408127209,
"lb_loss": 0.999999997514718
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 81000000,
"cumulative_training_bytes": 81002708,
"metrics": {
"loss": 0.5216262629537871,
"ce_loss": 0.5116262724905303,
"lb_loss": 0.9999999968015184
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 82000000,
"cumulative_training_bytes": 82002370,
"metrics": {
"loss": 0.5214791957541867,
"ce_loss": 0.5114792052909298,
"lb_loss": 0.9999999966678521
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 83000000,
"cumulative_training_bytes": 83002802,
"metrics": {
"loss": 0.5213061656713733,
"ce_loss": 0.5113061752081165,
"lb_loss": 0.9999999966197157
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 84000000,
"cumulative_training_bytes": 84001825,
"metrics": {
"loss": 0.5217131360873601,
"ce_loss": 0.5117131456241033,
"lb_loss": 0.9999999977195736
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 85000000,
"cumulative_training_bytes": 85001219,
"metrics": {
"loss": 0.5217455311159117,
"ce_loss": 0.5117455406526549,
"lb_loss": 0.9999999976791112
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 86000000,
"cumulative_training_bytes": 86001317,
"metrics": {
"loss": 0.5215764172335327,
"ce_loss": 0.5115764267702758,
"lb_loss": 0.9999999972025815
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 87000000,
"cumulative_training_bytes": 87002232,
"metrics": {
"loss": 0.5213811911464747,
"ce_loss": 0.5113812006832179,
"lb_loss": 0.9999999972010136
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 88000000,
"cumulative_training_bytes": 88002341,
"metrics": {
"loss": 0.5211851074672322,
"ce_loss": 0.5111851170039754,
"lb_loss": 0.999999997091931
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 89000000,
"cumulative_training_bytes": 89002058,
"metrics": {
"loss": 0.5209484002895511,
"ce_loss": 0.5109484098262943,
"lb_loss": 0.9999999970157075
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 90000000,
"cumulative_training_bytes": 90002506,
"metrics": {
"loss": 0.5207305409866063,
"ce_loss": 0.5107305505233495,
"lb_loss": 0.999999997312512
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 91000000,
"cumulative_training_bytes": 91001385,
"metrics": {
"loss": 0.5205339798493289,
"ce_loss": 0.5105339893860721,
"lb_loss": 0.999999997414826
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 92000000,
"cumulative_training_bytes": 92001245,
"metrics": {
"loss": 0.5203743006578546,
"ce_loss": 0.5103743101945978,
"lb_loss": 0.9999999980091391
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 93000000,
"cumulative_training_bytes": 93002978,
"metrics": {
"loss": 0.5201673795482513,
"ce_loss": 0.5101673890849945,
"lb_loss": 0.999999998036281
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 94000000,
"cumulative_training_bytes": 94003884,
"metrics": {
"loss": 0.519991870091662,
"ce_loss": 0.5099918796284052,
"lb_loss": 0.9999999981082895
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 95000000,
"cumulative_training_bytes": 95004709,
"metrics": {
"loss": 0.5197606708430037,
"ce_loss": 0.5097606803797469,
"lb_loss": 0.999999998113777
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 96000000,
"cumulative_training_bytes": 96004703,
"metrics": {
"loss": 0.5195778423387121,
"ce_loss": 0.5095778518754552,
"lb_loss": 0.999999998133285
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 97000000,
"cumulative_training_bytes": 97004722,
"metrics": {
"loss": 0.5193511229842457,
"ce_loss": 0.5093511325209888,
"lb_loss": 0.9999999983041589
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 98000000,
"cumulative_training_bytes": 98005936,
"metrics": {
"loss": 0.5191288888253838,
"ce_loss": 0.5091288983621269,
"lb_loss": 0.999999998542314
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 99000000,
"cumulative_training_bytes": 99005325,
"metrics": {
"loss": 0.5189959339079174,
"ce_loss": 0.5089959434446606,
"lb_loss": 0.9999999980415248
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 100000000,
"cumulative_training_bytes": 100005645,
"metrics": {
"loss": 0.5188124741702432,
"ce_loss": 0.5088124837069864,
"lb_loss": 0.9999999982224267
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 101000000,
"cumulative_training_bytes": 101005293,
"metrics": {
"loss": 0.5186718025099933,
"ce_loss": 0.5086718120467365,
"lb_loss": 0.9999999980307893
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 102000000,
"cumulative_training_bytes": 102004390,
"metrics": {
"loss": 0.5184943529218581,
"ce_loss": 0.5084943624586012,
"lb_loss": 0.9999999979678915
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 103000000,
"cumulative_training_bytes": 103003700,
"metrics": {
"loss": 0.5183400036658219,
"ce_loss": 0.508340013202565,
"lb_loss": 0.9999999980888741
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 104000000,
"cumulative_training_bytes": 104004107,
"metrics": {
"loss": 0.5181776973495664,
"ce_loss": 0.5081777068863096,
"lb_loss": 0.9999999981152251
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 105000000,
"cumulative_training_bytes": 105003741,
"metrics": {
"loss": 0.5180538606981859,
"ce_loss": 0.5080538702349291,
"lb_loss": 0.9999999981928379
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 106000000,
"cumulative_training_bytes": 106003973,
"metrics": {
"loss": 0.5178760367922292,
"ce_loss": 0.5078760463289723,
"lb_loss": 0.9999999980915022
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 107000000,
"cumulative_training_bytes": 107004552,
"metrics": {
"loss": 0.5179294557944712,
"ce_loss": 0.5079294653312144,
"lb_loss": 0.999999998235192
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 108000000,
"cumulative_training_bytes": 108004388,
"metrics": {
"loss": 0.518020289999616,
"ce_loss": 0.5080202995363592,
"lb_loss": 0.9999999982255328
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 109000000,
"cumulative_training_bytes": 109004031,
"metrics": {
"loss": 0.5179760755052137,
"ce_loss": 0.5079760850419569,
"lb_loss": 0.9999999984334434
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 110000000,
"cumulative_training_bytes": 110003930,
"metrics": {
"loss": 0.5178546550738967,
"ce_loss": 0.5078546646106399,
"lb_loss": 0.9999999985202239
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 111000000,
"cumulative_training_bytes": 111002483,
"metrics": {
"loss": 0.5177637650725144,
"ce_loss": 0.5077637746092576,
"lb_loss": 0.9999999984951037
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 112000000,
"cumulative_training_bytes": 112000941,
"metrics": {
"loss": 0.5176624922952955,
"ce_loss": 0.5076625018320386,
"lb_loss": 0.999999998462492
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 113000000,
"cumulative_training_bytes": 113001291,
"metrics": {
"loss": 0.5176278486404593,
"ce_loss": 0.5076278581772025,
"lb_loss": 0.999999998840642
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 114000000,
"cumulative_training_bytes": 114005949,
"metrics": {
"loss": 0.5175975976260605,
"ce_loss": 0.5075976071628037,
"lb_loss": 0.999999998751648
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 115000000,
"cumulative_training_bytes": 115005446,
"metrics": {
"loss": 0.5174949178265088,
"ce_loss": 0.507494927363252,
"lb_loss": 0.9999999990487323
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 116000000,
"cumulative_training_bytes": 116005050,
"metrics": {
"loss": 0.5173872300465902,
"ce_loss": 0.5073872395833333,
"lb_loss": 0.9999999989589056
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 117000000,
"cumulative_training_bytes": 117004462,
"metrics": {
"loss": 0.5172662063160408,
"ce_loss": 0.507266215852784,
"lb_loss": 0.9999999990906581
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 118000000,
"cumulative_training_bytes": 118004814,
"metrics": {
"loss": 0.5171274763493247,
"ce_loss": 0.5071274858860678,
"lb_loss": 0.9999999993155884
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 119000000,
"cumulative_training_bytes": 119005101,
"metrics": {
"loss": 0.5169919194004368,
"ce_loss": 0.5069919289371799,
"lb_loss": 0.9999999993077018
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 120000000,
"cumulative_training_bytes": 120004824,
"metrics": {
"loss": 0.5170007587179046,
"ce_loss": 0.5070007682546478,
"lb_loss": 0.9999999993511726
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 121000000,
"cumulative_training_bytes": 121005427,
"metrics": {
"loss": 0.51693738748243,
"ce_loss": 0.5069373970191732,
"lb_loss": 0.9999999995357337
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 122000000,
"cumulative_training_bytes": 122001139,
"metrics": {
"loss": 0.5168287009648427,
"ce_loss": 0.5068287105015858,
"lb_loss": 0.999999999698931
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 123000000,
"cumulative_training_bytes": 123001524,
"metrics": {
"loss": 0.5166915019957379,
"ce_loss": 0.506691511532481,
"lb_loss": 0.9999999996018119
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 124000000,
"cumulative_training_bytes": 124002811,
"metrics": {
"loss": 0.5165513776652679,
"ce_loss": 0.5065513872020111,
"lb_loss": 0.9999999995420725
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 125000000,
"cumulative_training_bytes": 125003361,
"metrics": {
"loss": 0.516395933506494,
"ce_loss": 0.5063959430432372,
"lb_loss": 0.9999999994845719
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 126000000,
"cumulative_training_bytes": 126002775,
"metrics": {
"loss": 0.5162708629058512,
"ce_loss": 0.5062708724425944,
"lb_loss": 0.9999999993059422
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 127000000,
"cumulative_training_bytes": 127003533,
"metrics": {
"loss": 0.5161498922852273,
"ce_loss": 0.5061499018219705,
"lb_loss": 0.9999999994267559
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 128000000,
"cumulative_training_bytes": 128003350,
"metrics": {
"loss": 0.5160164026995326,
"ce_loss": 0.5060164122362758,
"lb_loss": 0.9999999997184623
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 129000000,
"cumulative_training_bytes": 129003039,
"metrics": {
"loss": 0.5158909909402577,
"ce_loss": 0.5058910004770009,
"lb_loss": 0.999999999569686
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 130000000,
"cumulative_training_bytes": 130003634,
"metrics": {
"loss": 0.515758194974154,
"ce_loss": 0.5057582045108971,
"lb_loss": 0.9999999994441331
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 131000000,
"cumulative_training_bytes": 131003813,
"metrics": {
"loss": 0.5156362957751962,
"ce_loss": 0.5056363053119394,
"lb_loss": 0.9999999995544799
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 132000000,
"cumulative_training_bytes": 132004994,
"metrics": {
"loss": 0.5154975693003636,
"ce_loss": 0.5054975788371068,
"lb_loss": 0.9999999996085944
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 133000000,
"cumulative_training_bytes": 133003835,
"metrics": {
"loss": 0.5153652466819225,
"ce_loss": 0.5053652562186657,
"lb_loss": 0.9999999995804918
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 134000000,
"cumulative_training_bytes": 134003355,
"metrics": {
"loss": 0.5152516195188492,
"ce_loss": 0.5052516290555924,
"lb_loss": 0.9999999994910902
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 135000000,
"cumulative_training_bytes": 135002497,
"metrics": {
"loss": 0.515110941093509,
"ce_loss": 0.5051109506302521,
"lb_loss": 0.9999999995269473
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 136000000,
"cumulative_training_bytes": 136002211,
"metrics": {
"loss": 0.5149785831073418,
"ce_loss": 0.504978592644085,
"lb_loss": 0.9999999994904649
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 137000000,
"cumulative_training_bytes": 137002818,
"metrics": {
"loss": 0.5148538101591293,
"ce_loss": 0.5048538196958725,
"lb_loss": 0.9999999994011481
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 138000000,
"cumulative_training_bytes": 138002176,
"metrics": {
"loss": 0.514731752242253,
"ce_loss": 0.5047317617789961,
"lb_loss": 0.9999999992188747
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 139000000,
"cumulative_training_bytes": 139002858,
"metrics": {
"loss": 0.5145917039710735,
"ce_loss": 0.5045917135078166,
"lb_loss": 0.9999999994450999
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 140000000,
"cumulative_training_bytes": 140002848,
"metrics": {
"loss": 0.5144674152793538,
"ce_loss": 0.504467424816097,
"lb_loss": 0.999999999458374
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 141000000,
"cumulative_training_bytes": 141001755,
"metrics": {
"loss": 0.5143557057694663,
"ce_loss": 0.5043557153062095,
"lb_loss": 0.9999999994610975
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 142000000,
"cumulative_training_bytes": 142001377,
"metrics": {
"loss": 0.5142396167139925,
"ce_loss": 0.5042396262507357,
"lb_loss": 0.9999999993935792
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 143000000,
"cumulative_training_bytes": 143001209,
"metrics": {
"loss": 0.5141645089407066,
"ce_loss": 0.5041645184774498,
"lb_loss": 0.9999999995503215
}
},
{
"epoch": 2,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.5141700236596057,
"ce_loss": 0.5041700331963489,
"lb_loss": 0.9999999995568793,
"training_bytes": 71629742
},
"cumulative_training_bytes": 143259470,
"training_bytes_this_epoch": 71629742
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 144000000,
"cumulative_training_bytes": 144004798,
"metrics": {
"loss": 0.5033345638759552,
"ce_loss": 0.49333457341269843,
"lb_loss": 1.000000001892211
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 145000000,
"cumulative_training_bytes": 145004453,
"metrics": {
"loss": 0.5023530095310534,
"ce_loss": 0.4923530190677966,
"lb_loss": 1.0000000127291275
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 146000000,
"cumulative_training_bytes": 146003901,
"metrics": {
"loss": 0.5018044422412741,
"ce_loss": 0.49180445177801724,
"lb_loss": 1.0000000069367474
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 147000000,
"cumulative_training_bytes": 147004659,
"metrics": {
"loss": 0.5011353330777908,
"ce_loss": 0.49113534261453395,
"lb_loss": 1.0000000053672429
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 148000000,
"cumulative_training_bytes": 148005351,
"metrics": {
"loss": 0.5010307144345786,
"ce_loss": 0.4910307239713217,
"lb_loss": 1.0000000031957603
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 149000000,
"cumulative_training_bytes": 149005165,
"metrics": {
"loss": 0.5008639515858109,
"ce_loss": 0.4908639611225541,
"lb_loss": 1.0000000036217034
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 150000000,
"cumulative_training_bytes": 150005636,
"metrics": {
"loss": 0.5008031702878183,
"ce_loss": 0.4908031798245614,
"lb_loss": 1.0000000014639738
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 151000000,
"cumulative_training_bytes": 151000050,
"metrics": {
"loss": 0.5007569906543884,
"ce_loss": 0.4907570001911315,
"lb_loss": 1.000000002096188
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 152000000,
"cumulative_training_bytes": 152000047,
"metrics": {
"loss": 0.500738184437529,
"ce_loss": 0.49073819397427215,
"lb_loss": 1.0000000011703012
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 153000000,
"cumulative_training_bytes": 153000782,
"metrics": {
"loss": 0.5005915286619202,
"ce_loss": 0.4905915381986634,
"lb_loss": 1.0000000017019552
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 154000000,
"cumulative_training_bytes": 154000682,
"metrics": {
"loss": 0.50057656587648,
"ce_loss": 0.49057657541322314,
"lb_loss": 1.0000000016420012
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 155000000,
"cumulative_training_bytes": 155000553,
"metrics": {
"loss": 0.5005808876406762,
"ce_loss": 0.4905808971774194,
"lb_loss": 1.000000001982816
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 156000000,
"cumulative_training_bytes": 156000853,
"metrics": {
"loss": 0.5005047004609345,
"ce_loss": 0.49050470999767765,
"lb_loss": 1.000000002159388
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 157000000,
"cumulative_training_bytes": 157005344,
"metrics": {
"loss": 0.5004092470818535,
"ce_loss": 0.49040925661859663,
"lb_loss": 1.0000000026171647
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 158000000,
"cumulative_training_bytes": 158005059,
"metrics": {
"loss": 0.5003464428609294,
"ce_loss": 0.49034645239767255,
"lb_loss": 1.0000000022244109
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 159000000,
"cumulative_training_bytes": 159004522,
"metrics": {
"loss": 0.5003165714008743,
"ce_loss": 0.49031658093761743,
"lb_loss": 1.0000000018591453
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 160000000,
"cumulative_training_bytes": 160005391,
"metrics": {
"loss": 0.5002716335727975,
"ce_loss": 0.49027164310954063,
"lb_loss": 1.0000000016217518
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 161000000,
"cumulative_training_bytes": 161005358,
"metrics": {
"loss": 0.5002122226656577,
"ce_loss": 0.4902122322024008,
"lb_loss": 1.0000000014111137
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 162000000,
"cumulative_training_bytes": 162005279,
"metrics": {
"loss": 0.5003447219578907,
"ce_loss": 0.4903447314946338,
"lb_loss": 1.0000000011100612
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 163000000,
"cumulative_training_bytes": 163004202,
"metrics": {
"loss": 0.5003701535963104,
"ce_loss": 0.49037016313305365,
"lb_loss": 1.0000000013574926
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 164000000,
"cumulative_training_bytes": 164003475,
"metrics": {
"loss": 0.5003257267011347,
"ce_loss": 0.4903257362378779,
"lb_loss": 1.0000000012580559
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 165000000,
"cumulative_training_bytes": 165003971,
"metrics": {
"loss": 0.5002354283884269,
"ce_loss": 0.49023543792517005,
"lb_loss": 1.0000000012650783
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 166000000,
"cumulative_training_bytes": 166003094,
"metrics": {
"loss": 0.5001840638568572,
"ce_loss": 0.49018407339360043,
"lb_loss": 1.0000000017211539
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 167000000,
"cumulative_training_bytes": 167003137,
"metrics": {
"loss": 0.5001199910862322,
"ce_loss": 0.49012000062297534,
"lb_loss": 1.0000000016635235
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 168000000,
"cumulative_training_bytes": 168003662,
"metrics": {
"loss": 0.50006763528033,
"ce_loss": 0.49006764481707316,
"lb_loss": 1.0000000019241098
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 169000000,
"cumulative_training_bytes": 169003714,
"metrics": {
"loss": 0.500026528902696,
"ce_loss": 0.4900265384394392,
"lb_loss": 1.0000000021370545
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 170000000,
"cumulative_training_bytes": 170005211,
"metrics": {
"loss": 0.4999621378636993,
"ce_loss": 0.48996214740044247,
"lb_loss": 1.0000000022153939
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 171000000,
"cumulative_training_bytes": 171005841,
"metrics": {
"loss": 0.49993529449929863,
"ce_loss": 0.4899353040360418,
"lb_loss": 1.0000000024406253
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 172000000,
"cumulative_training_bytes": 172005501,
"metrics": {
"loss": 0.49990348405629925,
"ce_loss": 0.4899034935930424,
"lb_loss": 1.0000000021226025
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 173000000,
"cumulative_training_bytes": 173005085,
"metrics": {
"loss": 0.49985011230530974,
"ce_loss": 0.4898501218420529,
"lb_loss": 1.000000002193527
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 174000000,
"cumulative_training_bytes": 174000244,
"metrics": {
"loss": 0.4997937374555571,
"ce_loss": 0.48979374699230027,
"lb_loss": 1.0000000023864806
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 175000000,
"cumulative_training_bytes": 175005818,
"metrics": {
"loss": 0.49977711942411424,
"ce_loss": 0.4897771289608574,
"lb_loss": 1.000000002455289
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 176000000,
"cumulative_training_bytes": 176000495,
"metrics": {
"loss": 0.4997098139089463,
"ce_loss": 0.4897098234456895,
"lb_loss": 1.0000000023053306
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 177000000,
"cumulative_training_bytes": 177000876,
"metrics": {
"loss": 0.49965548414968264,
"ce_loss": 0.4896554936864258,
"lb_loss": 1.00000000232063
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 178000000,
"cumulative_training_bytes": 178000666,
"metrics": {
"loss": 0.49961792182077686,
"ce_loss": 0.48961793135752,
"lb_loss": 1.0000000020812387
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 179000000,
"cumulative_training_bytes": 179000024,
"metrics": {
"loss": 0.4995814913945482,
"ce_loss": 0.4895815009312914,
"lb_loss": 1.00000000217103
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 180000000,
"cumulative_training_bytes": 180005268,
"metrics": {
"loss": 0.49955155799546297,
"ce_loss": 0.48955156753220613,
"lb_loss": 1.0000000022171776
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 181000000,
"cumulative_training_bytes": 181005588,
"metrics": {
"loss": 0.49950381894342616,
"ce_loss": 0.4895038284801693,
"lb_loss": 1.0000000026256317
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 182000000,
"cumulative_training_bytes": 182000786,
"metrics": {
"loss": 0.4994432111177551,
"ce_loss": 0.48944322065449825,
"lb_loss": 1.0000000024125906
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 183000000,
"cumulative_training_bytes": 183000940,
"metrics": {
"loss": 0.49941193581763443,
"ce_loss": 0.4894119453543776,
"lb_loss": 1.0000000023341307
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 184000000,
"cumulative_training_bytes": 184001325,
"metrics": {
"loss": 0.49934361564481095,
"ce_loss": 0.4893436251815541,
"lb_loss": 1.00000000238938
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 185000000,
"cumulative_training_bytes": 185001001,
"metrics": {
"loss": 0.4993054266519441,
"ce_loss": 0.48930543618868727,
"lb_loss": 1.0000000024081832
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 186000000,
"cumulative_training_bytes": 186000105,
"metrics": {
"loss": 0.49926686148291627,
"ce_loss": 0.48926687101965943,
"lb_loss": 1.0000000024673665
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 187000000,
"cumulative_training_bytes": 187005589,
"metrics": {
"loss": 0.4993321712423722,
"ce_loss": 0.4893321807791154,
"lb_loss": 1.0000000023138824
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 188000000,
"cumulative_training_bytes": 188000312,
"metrics": {
"loss": 0.49934911756284683,
"ce_loss": 0.48934912709959,
"lb_loss": 1.000000002364951
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 189000000,
"cumulative_training_bytes": 189005692,
"metrics": {
"loss": 0.49930618266025595,
"ce_loss": 0.4893061921969991,
"lb_loss": 1.0000000023283666
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 190000000,
"cumulative_training_bytes": 190000372,
"metrics": {
"loss": 0.4992524868235556,
"ce_loss": 0.48925249636029877,
"lb_loss": 1.0000000023543043
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 191000000,
"cumulative_training_bytes": 191000957,
"metrics": {
"loss": 0.4992190696960283,
"ce_loss": 0.48921907923277147,
"lb_loss": 1.0000000023862543
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 192000000,
"cumulative_training_bytes": 192000891,
"metrics": {
"loss": 0.49916283798055683,
"ce_loss": 0.4891628475173,
"lb_loss": 1.0000000025326727
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 193000000,
"cumulative_training_bytes": 193001130,
"metrics": {
"loss": 0.4991221111954719,
"ce_loss": 0.48912212073221506,
"lb_loss": 1.0000000026519316
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 194000000,
"cumulative_training_bytes": 194000556,
"metrics": {
"loss": 0.49906408485101195,
"ce_loss": 0.4890640943877551,
"lb_loss": 1.000000002599666
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 195000000,
"cumulative_training_bytes": 195001954,
"metrics": {
"loss": 0.49900763141705373,
"ce_loss": 0.4890076409537969,
"lb_loss": 1.000000002699387
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 196000000,
"cumulative_training_bytes": 196000689,
"metrics": {
"loss": 0.4989594545185693,
"ce_loss": 0.4889594640553125,
"lb_loss": 1.000000002741827
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 197000000,
"cumulative_training_bytes": 197001752,
"metrics": {
"loss": 0.49890855444971355,
"ce_loss": 0.4889085639864567,
"lb_loss": 1.0000000027695617
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 198000000,
"cumulative_training_bytes": 198002474,
"metrics": {
"loss": 0.49885149131451617,
"ce_loss": 0.48885150085125934,
"lb_loss": 1.000000002751182
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 199000000,
"cumulative_training_bytes": 199002221,
"metrics": {
"loss": 0.49881285537833114,
"ce_loss": 0.4888128649150743,
"lb_loss": 1.0000000026512044
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 200000000,
"cumulative_training_bytes": 200000993,
"metrics": {
"loss": 0.49877435920608715,
"ce_loss": 0.4887743687428303,
"lb_loss": 1.0000000026977178
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 201000000,
"cumulative_training_bytes": 201000542,
"metrics": {
"loss": 0.4987275889721726,
"ce_loss": 0.4887275985089158,
"lb_loss": 1.0000000027365117
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 202000000,
"cumulative_training_bytes": 202000266,
"metrics": {
"loss": 0.49867375425141036,
"ce_loss": 0.4886737637881535,
"lb_loss": 1.0000000025758429
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 203000000,
"cumulative_training_bytes": 203004847,
"metrics": {
"loss": 0.4986276846422209,
"ce_loss": 0.48862769417896407,
"lb_loss": 1.0000000024321198
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 204000000,
"cumulative_training_bytes": 204000174,
"metrics": {
"loss": 0.4986164967954536,
"ce_loss": 0.4886165063321968,
"lb_loss": 1.0000000022994096
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 205000000,
"cumulative_training_bytes": 205000569,
"metrics": {
"loss": 0.4985904758954976,
"ce_loss": 0.48859048543224076,
"lb_loss": 1.000000002336429
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 206000000,
"cumulative_training_bytes": 206000959,
"metrics": {
"loss": 0.4985317591843735,
"ce_loss": 0.48853176872111664,
"lb_loss": 1.0000000022767028
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 207000000,
"cumulative_training_bytes": 207001002,
"metrics": {
"loss": 0.4984893900640738,
"ce_loss": 0.48848939960081694,
"lb_loss": 1.0000000022465174
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 208000000,
"cumulative_training_bytes": 208000719,
"metrics": {
"loss": 0.49843654790773173,
"ce_loss": 0.4884365574444749,
"lb_loss": 1.000000002075621
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 209000000,
"cumulative_training_bytes": 209005520,
"metrics": {
"loss": 0.49840692531385533,
"ce_loss": 0.4884069348505985,
"lb_loss": 1.0000000021457887
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 210000000,
"cumulative_training_bytes": 210005103,
"metrics": {
"loss": 0.49835361690385965,
"ce_loss": 0.4883536264406028,
"lb_loss": 1.0000000021611968
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 211000000,
"cumulative_training_bytes": 211004535,
"metrics": {
"loss": 0.49829659387840225,
"ce_loss": 0.4882966034151454,
"lb_loss": 1.0000000022542415
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 212000000,
"cumulative_training_bytes": 212003237,
"metrics": {
"loss": 0.49826476548907883,
"ce_loss": 0.488264775025822,
"lb_loss": 1.0000000022368416
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 213000000,
"cumulative_training_bytes": 213003872,
"metrics": {
"loss": 0.49821131437943567,
"ce_loss": 0.48821132391617883,
"lb_loss": 1.0000000022199405
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 214000000,
"cumulative_training_bytes": 214003977,
"metrics": {
"loss": 0.4981701560704833,
"ce_loss": 0.4881701656072265,
"lb_loss": 1.000000002153664
}
},
{
"epoch": 3,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4981207118514115,
"ce_loss": 0.48812072138815465,
"lb_loss": 1.000000002058049,
"training_bytes": 71629748
},
"cumulative_training_bytes": 214889218,
"training_bytes_this_epoch": 71629748
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 215000000,
"cumulative_training_bytes": 215001580,
"metrics": {
"loss": 0.4880016352000989,
"ce_loss": 0.4780016447368421,
"lb_loss": 1.0000000313708657
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 216000000,
"cumulative_training_bytes": 216001732,
"metrics": {
"loss": 0.4874559412611292,
"ce_loss": 0.47745595079787234,
"lb_loss": 1.0000000231443567
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 217000000,
"cumulative_training_bytes": 217001957,
"metrics": {
"loss": 0.48738313121955934,
"ce_loss": 0.4773831407563025,
"lb_loss": 1.0000000128559037
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 218000000,
"cumulative_training_bytes": 218001275,
"metrics": {
"loss": 0.4878064008720021,
"ce_loss": 0.4778064104087453,
"lb_loss": 1.0000000087253949
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 219000000,
"cumulative_training_bytes": 219001184,
"metrics": {
"loss": 0.48801539064311295,
"ce_loss": 0.4780154001798561,
"lb_loss": 1.0000000052314868
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 220000000,
"cumulative_training_bytes": 220001433,
"metrics": {
"loss": 0.4881019327375624,
"ce_loss": 0.4781019422743056,
"lb_loss": 1.000000007312607
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 221000000,
"cumulative_training_bytes": 221002027,
"metrics": {
"loss": 0.4882320058311174,
"ce_loss": 0.4782320153678606,
"lb_loss": 1.00000000657786
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 222000000,
"cumulative_training_bytes": 222000843,
"metrics": {
"loss": 0.4884359955589307,
"ce_loss": 0.47843600509567386,
"lb_loss": 1.0000000053059044
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 223000000,
"cumulative_training_bytes": 223001364,
"metrics": {
"loss": 0.4885213138585887,
"ce_loss": 0.4785213233953319,
"lb_loss": 1.0000000031736973
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 224000000,
"cumulative_training_bytes": 224001492,
"metrics": {
"loss": 0.48859297950546465,
"ce_loss": 0.4785929890422078,
"lb_loss": 1.0000000030189367
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 225000000,
"cumulative_training_bytes": 225001545,
"metrics": {
"loss": 0.4886264715925722,
"ce_loss": 0.4786264811293154,
"lb_loss": 1.0000000019182302
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 226000000,
"cumulative_training_bytes": 226001650,
"metrics": {
"loss": 0.4886456157960577,
"ce_loss": 0.47864562533280086,
"lb_loss": 1.0000000023486388
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 227000000,
"cumulative_training_bytes": 227001584,
"metrics": {
"loss": 0.4887551046547566,
"ce_loss": 0.47875511419149974,
"lb_loss": 1.0000000021256175
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 228000000,
"cumulative_training_bytes": 228001190,
"metrics": {
"loss": 0.4888778602173182,
"ce_loss": 0.4788778697540614,
"lb_loss": 1.000000001533152
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 229000000,
"cumulative_training_bytes": 229001032,
"metrics": {
"loss": 0.4889627459664015,
"ce_loss": 0.47896275550314465,
"lb_loss": 1.0000000022742233
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 230000000,
"cumulative_training_bytes": 230000968,
"metrics": {
"loss": 0.48901880898322553,
"ce_loss": 0.4790188185199687,
"lb_loss": 1.000000001867021
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 231000000,
"cumulative_training_bytes": 231000807,
"metrics": {
"loss": 0.48906002305598545,
"ce_loss": 0.4790600325927286,
"lb_loss": 1.0000000024953837
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 232000000,
"cumulative_training_bytes": 232001935,
"metrics": {
"loss": 0.48905724971291104,
"ce_loss": 0.4790572592496542,
"lb_loss": 1.000000002040408
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 233000000,
"cumulative_training_bytes": 233002684,
"metrics": {
"loss": 0.4890720104322212,
"ce_loss": 0.47907201996896437,
"lb_loss": 1.0000000021419506
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 234000000,
"cumulative_training_bytes": 234002126,
"metrics": {
"loss": 0.4890822031180556,
"ce_loss": 0.4790822126547988,
"lb_loss": 1.0000000019191588
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 235000000,
"cumulative_training_bytes": 235001865,
"metrics": {
"loss": 0.48909425532056783,
"ce_loss": 0.479094264857311,
"lb_loss": 1.0000000020517044
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 236000000,
"cumulative_training_bytes": 236002486,
"metrics": {
"loss": 0.4891166612171806,
"ce_loss": 0.4791166707539238,
"lb_loss": 1.0000000017707658
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 237000000,
"cumulative_training_bytes": 237003585,
"metrics": {
"loss": 0.4891103856666285,
"ce_loss": 0.4791103952033717,
"lb_loss": 1.0000000018661341
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 238000000,
"cumulative_training_bytes": 238002820,
"metrics": {
"loss": 0.48916915728993376,
"ce_loss": 0.47916916682667693,
"lb_loss": 1.0000000021668867
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 239000000,
"cumulative_training_bytes": 239002121,
"metrics": {
"loss": 0.48922257481908504,
"ce_loss": 0.4792225843558282,
"lb_loss": 1.0000000026328433
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 240000000,
"cumulative_training_bytes": 240002063,
"metrics": {
"loss": 0.48926897682635767,
"ce_loss": 0.47926898636310084,
"lb_loss": 1.000000002373512
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 241000000,
"cumulative_training_bytes": 241003283,
"metrics": {
"loss": 0.48924765007689835,
"ce_loss": 0.4792476596136415,
"lb_loss": 1.0000000022420963
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 242000000,
"cumulative_training_bytes": 242002834,
"metrics": {
"loss": 0.4892743589158976,
"ce_loss": 0.47927436845264076,
"lb_loss": 1.000000002783805
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 243000000,
"cumulative_training_bytes": 243002432,
"metrics": {
"loss": 0.48929176785222755,
"ce_loss": 0.4792917773889707,
"lb_loss": 1.0000000026471438
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 244000000,
"cumulative_training_bytes": 244001977,
"metrics": {
"loss": 0.48928654407098043,
"ce_loss": 0.4792865536077236,
"lb_loss": 1.0000000030771503
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 245000000,
"cumulative_training_bytes": 245003684,
"metrics": {
"loss": 0.4892567206656542,
"ce_loss": 0.47925673020239734,
"lb_loss": 1.0000000031506484
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 246000000,
"cumulative_training_bytes": 246003783,
"metrics": {
"loss": 0.4892511018649305,
"ce_loss": 0.47925111140167365,
"lb_loss": 1.0000000031400698
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 247000000,
"cumulative_training_bytes": 247004190,
"metrics": {
"loss": 0.48922172035546246,
"ce_loss": 0.47922172989220563,
"lb_loss": 1.0000000033827585
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 248000000,
"cumulative_training_bytes": 248001380,
"metrics": {
"loss": 0.489257635432878,
"ce_loss": 0.47925764496962114,
"lb_loss": 1.0000000028438956
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 249000000,
"cumulative_training_bytes": 249000805,
"metrics": {
"loss": 0.48925180637826116,
"ce_loss": 0.4792518159150043,
"lb_loss": 1.0000000031430722
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 250000000,
"cumulative_training_bytes": 250001009,
"metrics": {
"loss": 0.4892397265181945,
"ce_loss": 0.47923973605493764,
"lb_loss": 1.000000003174093
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 251000000,
"cumulative_training_bytes": 251000542,
"metrics": {
"loss": 0.4892279956451346,
"ce_loss": 0.47922800518187775,
"lb_loss": 1.0000000031740963
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 252000000,
"cumulative_training_bytes": 252000674,
"metrics": {
"loss": 0.4892209452025744,
"ce_loss": 0.4792209547393176,
"lb_loss": 1.0000000032216159
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 253000000,
"cumulative_training_bytes": 253005686,
"metrics": {
"loss": 0.4892111244278641,
"ce_loss": 0.47921113396460724,
"lb_loss": 1.0000000030718321
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 254000000,
"cumulative_training_bytes": 254000399,
"metrics": {
"loss": 0.489192856386101,
"ce_loss": 0.47919286592284416,
"lb_loss": 1.0000000032642786
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 255000000,
"cumulative_training_bytes": 255000527,
"metrics": {
"loss": 0.4891710748783623,
"ce_loss": 0.4791710844151055,
"lb_loss": 1.000000002980672
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 256000000,
"cumulative_training_bytes": 256001300,
"metrics": {
"loss": 0.48916581381170243,
"ce_loss": 0.4791658233484456,
"lb_loss": 1.0000000028566993
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 257000000,
"cumulative_training_bytes": 257002023,
"metrics": {
"loss": 0.4891462577633833,
"ce_loss": 0.47914626730012644,
"lb_loss": 1.0000000031154879
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 258000000,
"cumulative_training_bytes": 258002271,
"metrics": {
"loss": 0.4892640542499711,
"ce_loss": 0.47926406378671427,
"lb_loss": 1.000000003092308
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 259000000,
"cumulative_training_bytes": 259002781,
"metrics": {
"loss": 0.4893791298831093,
"ce_loss": 0.47937913941985244,
"lb_loss": 1.000000003134141
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 260000000,
"cumulative_training_bytes": 260002747,
"metrics": {
"loss": 0.4893935482249305,
"ce_loss": 0.47939355776167364,
"lb_loss": 1.0000000030177587
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 261000000,
"cumulative_training_bytes": 261003435,
"metrics": {
"loss": 0.48941761685873997,
"ce_loss": 0.47941762639548313,
"lb_loss": 1.0000000031435274
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 262000000,
"cumulative_training_bytes": 262003338,
"metrics": {
"loss": 0.4894158878508479,
"ce_loss": 0.47941589738759105,
"lb_loss": 1.000000002949539
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 263000000,
"cumulative_training_bytes": 263003784,
"metrics": {
"loss": 0.4895384178553365,
"ce_loss": 0.4795384273920797,
"lb_loss": 1.0000000027929368
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 264000000,
"cumulative_training_bytes": 264003117,
"metrics": {
"loss": 0.4896001881289195,
"ce_loss": 0.47960019766566264,
"lb_loss": 1.0000000026283493
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 265000000,
"cumulative_training_bytes": 265003414,
"metrics": {
"loss": 0.4895905376057766,
"ce_loss": 0.47959054714251975,
"lb_loss": 1.0000000025266345
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 266000000,
"cumulative_training_bytes": 266003442,
"metrics": {
"loss": 0.48961065880372917,
"ce_loss": 0.47961066834047233,
"lb_loss": 1.000000002311595
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 267000000,
"cumulative_training_bytes": 267003434,
"metrics": {
"loss": 0.4895792225371753,
"ce_loss": 0.47957923207391845,
"lb_loss": 1.000000002152183
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 268000000,
"cumulative_training_bytes": 268003818,
"metrics": {
"loss": 0.48956441921771315,
"ce_loss": 0.4795644287544563,
"lb_loss": 1.0000000020120552
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 269000000,
"cumulative_training_bytes": 269002978,
"metrics": {
"loss": 0.48955592950098237,
"ce_loss": 0.47955593903772553,
"lb_loss": 1.000000002202993
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 270000000,
"cumulative_training_bytes": 270002483,
"metrics": {
"loss": 0.48952992356128133,
"ce_loss": 0.4795299330980245,
"lb_loss": 1.0000000023806022
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 271000000,
"cumulative_training_bytes": 271002540,
"metrics": {
"loss": 0.4895038147409116,
"ce_loss": 0.47950382427765476,
"lb_loss": 1.0000000024261724
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 272000000,
"cumulative_training_bytes": 272002993,
"metrics": {
"loss": 0.48948651203909604,
"ce_loss": 0.4794865215758392,
"lb_loss": 1.0000000023651656
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 273000000,
"cumulative_training_bytes": 273003373,
"metrics": {
"loss": 0.48946483302002297,
"ce_loss": 0.47946484255676614,
"lb_loss": 1.000000002154531
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 274000000,
"cumulative_training_bytes": 274002590,
"metrics": {
"loss": 0.4894640248101037,
"ce_loss": 0.47946403434684687,
"lb_loss": 1.0000000023090088
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 275000000,
"cumulative_training_bytes": 275002224,
"metrics": {
"loss": 0.48944997785128713,
"ce_loss": 0.4794499873880303,
"lb_loss": 1.0000000022060582
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 276000000,
"cumulative_training_bytes": 276003651,
"metrics": {
"loss": 0.4894235311415101,
"ce_loss": 0.47942354067825327,
"lb_loss": 1.0000000022276716
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 277000000,
"cumulative_training_bytes": 277002727,
"metrics": {
"loss": 0.48940091304828115,
"ce_loss": 0.4794009225850243,
"lb_loss": 1.000000002265624
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 278000000,
"cumulative_training_bytes": 278002905,
"metrics": {
"loss": 0.48939109742697334,
"ce_loss": 0.4793911069637165,
"lb_loss": 1.0000000021459012
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 279000000,
"cumulative_training_bytes": 279002296,
"metrics": {
"loss": 0.48937239470875754,
"ce_loss": 0.4793724042455007,
"lb_loss": 1.0000000020409159
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 280000000,
"cumulative_training_bytes": 280002829,
"metrics": {
"loss": 0.48934805423552147,
"ce_loss": 0.47934806377226463,
"lb_loss": 1.000000002193737
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 281000000,
"cumulative_training_bytes": 281001549,
"metrics": {
"loss": 0.4893414064325578,
"ce_loss": 0.47934141596930097,
"lb_loss": 1.0000000022459103
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 282000000,
"cumulative_training_bytes": 282001567,
"metrics": {
"loss": 0.489328068624075,
"ce_loss": 0.4793280781608182,
"lb_loss": 1.0000000024594404
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 283000000,
"cumulative_training_bytes": 283001333,
"metrics": {
"loss": 0.48931800692577093,
"ce_loss": 0.4793180164625141,
"lb_loss": 1.000000002594208
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 284000000,
"cumulative_training_bytes": 284000025,
"metrics": {
"loss": 0.48929820322010614,
"ce_loss": 0.4792982127568493,
"lb_loss": 1.0000000026281157
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 285000000,
"cumulative_training_bytes": 285005735,
"metrics": {
"loss": 0.4892746146721176,
"ce_loss": 0.47927462420886074,
"lb_loss": 1.0000000025853828
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 286000000,
"cumulative_training_bytes": 286000791,
"metrics": {
"loss": 0.48924303995152446,
"ce_loss": 0.4792430494882676,
"lb_loss": 1.0000000023756552
}
},
{
"epoch": 4,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4892372682077637,
"ce_loss": 0.47923727774450686,
"lb_loss": 1.0000000023534628,
"training_bytes": 71629711
},
"cumulative_training_bytes": 286518929,
"training_bytes_this_epoch": 71629711
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 287000000,
"cumulative_training_bytes": 287004047,
"metrics": {
"loss": 0.4806078410730129,
"ce_loss": 0.4706078506097561,
"lb_loss": 1.0000000225334633
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 288000000,
"cumulative_training_bytes": 288004715,
"metrics": {
"loss": 0.4802595771166433,
"ce_loss": 0.47025958665338646,
"lb_loss": 1.0000000116359664
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 289000000,
"cumulative_training_bytes": 289004562,
"metrics": {
"loss": 0.48041944730849495,
"ce_loss": 0.4704194568452381,
"lb_loss": 1.0000000120628447
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 290000000,
"cumulative_training_bytes": 290004822,
"metrics": {
"loss": 0.48065669143948775,
"ce_loss": 0.4706567009762309,
"lb_loss": 1.000000007690922
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 291000000,
"cumulative_training_bytes": 291005873,
"metrics": {
"loss": 0.4806928087350246,
"ce_loss": 0.4706928182717678,
"lb_loss": 1.0000000058189231
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 292000000,
"cumulative_training_bytes": 292005364,
"metrics": {
"loss": 0.48091802242118564,
"ce_loss": 0.4709180319579288,
"lb_loss": 1.0000000037936074
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 293000000,
"cumulative_training_bytes": 293004963,
"metrics": {
"loss": 0.4810613071831473,
"ce_loss": 0.4710613167198905,
"lb_loss": 1.000000003317412
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 294000000,
"cumulative_training_bytes": 294005549,
"metrics": {
"loss": 0.48113079579922524,
"ce_loss": 0.4711308053359684,
"lb_loss": 1.0000000031569258
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 295000000,
"cumulative_training_bytes": 295000539,
"metrics": {
"loss": 0.48119787078426174,
"ce_loss": 0.4711978803210049,
"lb_loss": 1.0000000028700073
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 296000000,
"cumulative_training_bytes": 296005546,
"metrics": {
"loss": 0.48143782187311335,
"ce_loss": 0.4714378314098565,
"lb_loss": 1.0000000027143725
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 297000000,
"cumulative_training_bytes": 297000097,
"metrics": {
"loss": 0.4815578144327656,
"ce_loss": 0.47155782396950874,
"lb_loss": 1.000000002692474
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 298000000,
"cumulative_training_bytes": 298005559,
"metrics": {
"loss": 0.48169829352868704,
"ce_loss": 0.4716983030654302,
"lb_loss": 1.0000000024873654
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 299000000,
"cumulative_training_bytes": 299000334,
"metrics": {
"loss": 0.48176441524988556,
"ce_loss": 0.4717644247866287,
"lb_loss": 1.0000000026848939
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 300000000,
"cumulative_training_bytes": 300001103,
"metrics": {
"loss": 0.48185715683726915,
"ce_loss": 0.4718571663740123,
"lb_loss": 1.0000000018315738
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 301000000,
"cumulative_training_bytes": 301002320,
"metrics": {
"loss": 0.48188760156869204,
"ce_loss": 0.4718876111054352,
"lb_loss": 1.0000000009743302
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 302000000,
"cumulative_training_bytes": 302002159,
"metrics": {
"loss": 0.4819753344999541,
"ce_loss": 0.4719753440366973,
"lb_loss": 1.0000000013442945
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 303000000,
"cumulative_training_bytes": 303002056,
"metrics": {
"loss": 0.4820222638025746,
"ce_loss": 0.4720222733393178,
"lb_loss": 1.0000000014981418
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 304000000,
"cumulative_training_bytes": 304001329,
"metrics": {
"loss": 0.4820512609862765,
"ce_loss": 0.47205127052301965,
"lb_loss": 1.0000000014931427
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 305000000,
"cumulative_training_bytes": 305002202,
"metrics": {
"loss": 0.4821052628375764,
"ce_loss": 0.47210527237431954,
"lb_loss": 1.0000000020230844
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 306000000,
"cumulative_training_bytes": 306001200,
"metrics": {
"loss": 0.48219109769290447,
"ce_loss": 0.47219110722964763,
"lb_loss": 1.0000000016476376
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 307000000,
"cumulative_training_bytes": 307001550,
"metrics": {
"loss": 0.48222227538813406,
"ce_loss": 0.4722222849248772,
"lb_loss": 1.0000000016360708
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 308000000,
"cumulative_training_bytes": 308001611,
"metrics": {
"loss": 0.4822451695266177,
"ce_loss": 0.4722451790633609,
"lb_loss": 1.0000000020853417
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 309000000,
"cumulative_training_bytes": 309000993,
"metrics": {
"loss": 0.48228710549352793,
"ce_loss": 0.4722871150302711,
"lb_loss": 1.000000001819989
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 310000000,
"cumulative_training_bytes": 310002159,
"metrics": {
"loss": 0.48228461511673465,
"ce_loss": 0.4722846246534778,
"lb_loss": 1.0000000019978372
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 311000000,
"cumulative_training_bytes": 311002120,
"metrics": {
"loss": 0.48233567679997424,
"ce_loss": 0.4723356863367174,
"lb_loss": 1.0000000019162238
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 312000000,
"cumulative_training_bytes": 312002300,
"metrics": {
"loss": 0.48235052605313145,
"ce_loss": 0.4723505355898746,
"lb_loss": 1.0000000015641721
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 313000000,
"cumulative_training_bytes": 313000722,
"metrics": {
"loss": 0.48237080051912273,
"ce_loss": 0.4723708100558659,
"lb_loss": 1.0000000015850175
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 314000000,
"cumulative_training_bytes": 314001059,
"metrics": {
"loss": 0.48239464620184425,
"ce_loss": 0.4723946557385874,
"lb_loss": 1.000000001732693
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 315000000,
"cumulative_training_bytes": 315000576,
"metrics": {
"loss": 0.48241925209841163,
"ce_loss": 0.4724192616351548,
"lb_loss": 1.0000000016842368
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 316000000,
"cumulative_training_bytes": 316000807,
"metrics": {
"loss": 0.4824112177565126,
"ce_loss": 0.47241122729325574,
"lb_loss": 1.000000001806564
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 317000000,
"cumulative_training_bytes": 317000909,
"metrics": {
"loss": 0.4824116732675667,
"ce_loss": 0.47241168280430984,
"lb_loss": 1.0000000018282924
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 318000000,
"cumulative_training_bytes": 318000322,
"metrics": {
"loss": 0.48244807845667786,
"ce_loss": 0.47244808799342103,
"lb_loss": 1.0000000013444656
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 319000000,
"cumulative_training_bytes": 319001281,
"metrics": {
"loss": 0.4824737020910579,
"ce_loss": 0.47247371162780105,
"lb_loss": 1.0000000015745443
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 320000000,
"cumulative_training_bytes": 320000836,
"metrics": {
"loss": 0.48250469895123843,
"ce_loss": 0.4725047084879816,
"lb_loss": 1.000000001948897
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 321000000,
"cumulative_training_bytes": 321001661,
"metrics": {
"loss": 0.4825023903903205,
"ce_loss": 0.47250239992706367,
"lb_loss": 1.0000000019230604
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 322000000,
"cumulative_training_bytes": 322001730,
"metrics": {
"loss": 0.4825008634410754,
"ce_loss": 0.47250087297781856,
"lb_loss": 1.0000000020378506
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 323000000,
"cumulative_training_bytes": 323000981,
"metrics": {
"loss": 0.482539654899591,
"ce_loss": 0.47253966443633416,
"lb_loss": 1.0000000021560156
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 324000000,
"cumulative_training_bytes": 324002180,
"metrics": {
"loss": 0.4825421488544788,
"ce_loss": 0.472542158391222,
"lb_loss": 1.0000000021737723
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 325000000,
"cumulative_training_bytes": 325001770,
"metrics": {
"loss": 0.48258986484815614,
"ce_loss": 0.4725898743848993,
"lb_loss": 1.000000002218103
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 326000000,
"cumulative_training_bytes": 326001016,
"metrics": {
"loss": 0.4826266742724595,
"ce_loss": 0.47262668380920264,
"lb_loss": 1.0000000019296467
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 327000000,
"cumulative_training_bytes": 327000727,
"metrics": {
"loss": 0.4826471043720421,
"ce_loss": 0.4726471139087853,
"lb_loss": 1.000000001925541
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 328000000,
"cumulative_training_bytes": 328001475,
"metrics": {
"loss": 0.4826450956701042,
"ce_loss": 0.4726451052068474,
"lb_loss": 1.0000000018791193
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 329000000,
"cumulative_training_bytes": 329000597,
"metrics": {
"loss": 0.4826633140459285,
"ce_loss": 0.47266332358267166,
"lb_loss": 1.0000000018099753
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 330000000,
"cumulative_training_bytes": 330000395,
"metrics": {
"loss": 0.48268707368318065,
"ce_loss": 0.4726870832199238,
"lb_loss": 1.0000000017034534
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 331000000,
"cumulative_training_bytes": 331000366,
"metrics": {
"loss": 0.48268638047922063,
"ce_loss": 0.4726863900159638,
"lb_loss": 1.0000000016889437
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 332000000,
"cumulative_training_bytes": 332000600,
"metrics": {
"loss": 0.4826902918228717,
"ce_loss": 0.4726903013596149,
"lb_loss": 1.0000000017681316
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 333000000,
"cumulative_training_bytes": 333000847,
"metrics": {
"loss": 0.4826878186777699,
"ce_loss": 0.47268782821451305,
"lb_loss": 1.0000000019273814
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 334000000,
"cumulative_training_bytes": 334000366,
"metrics": {
"loss": 0.4826988372821751,
"ce_loss": 0.4726988468189183,
"lb_loss": 1.0000000019016437
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 335000000,
"cumulative_training_bytes": 335005476,
"metrics": {
"loss": 0.4827082030273281,
"ce_loss": 0.47270821256407125,
"lb_loss": 1.0000000017312551
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 336000000,
"cumulative_training_bytes": 336005120,
"metrics": {
"loss": 0.4827010808315457,
"ce_loss": 0.4727010903682889,
"lb_loss": 1.0000000015109631
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 337000000,
"cumulative_training_bytes": 337004532,
"metrics": {
"loss": 0.48272331332132057,
"ce_loss": 0.47272332285806373,
"lb_loss": 1.0000000015718524
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 338000000,
"cumulative_training_bytes": 338005247,
"metrics": {
"loss": 0.4827238063321225,
"ce_loss": 0.47272381586886564,
"lb_loss": 1.0000000015961248
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 339000000,
"cumulative_training_bytes": 339000096,
"metrics": {
"loss": 0.4827229669121237,
"ce_loss": 0.47272297644886685,
"lb_loss": 1.0000000014314792
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 340000000,
"cumulative_training_bytes": 340005646,
"metrics": {
"loss": 0.48274137501907055,
"ce_loss": 0.4727413845558137,
"lb_loss": 1.0000000013122385
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 341000000,
"cumulative_training_bytes": 341005395,
"metrics": {
"loss": 0.4827476606277877,
"ce_loss": 0.47274767016453084,
"lb_loss": 1.00000000132052
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 342000000,
"cumulative_training_bytes": 342000175,
"metrics": {
"loss": 0.482743314508692,
"ce_loss": 0.4727433240454352,
"lb_loss": 1.00000000118243
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 343000000,
"cumulative_training_bytes": 343000194,
"metrics": {
"loss": 0.48274443290694463,
"ce_loss": 0.4727444424436878,
"lb_loss": 1.0000000013987889
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 344000000,
"cumulative_training_bytes": 344000175,
"metrics": {
"loss": 0.48273867625180944,
"ce_loss": 0.4727386857885526,
"lb_loss": 1.0000000012946861
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 345000000,
"cumulative_training_bytes": 345000308,
"metrics": {
"loss": 0.4827433929599684,
"ce_loss": 0.47274340249671154,
"lb_loss": 1.0000000012122365
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 346000000,
"cumulative_training_bytes": 346005463,
"metrics": {
"loss": 0.4828590717076615,
"ce_loss": 0.4728590812444047,
"lb_loss": 1.0000000011858081
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 347000000,
"cumulative_training_bytes": 347004643,
"metrics": {
"loss": 0.48290444134126503,
"ce_loss": 0.4729044508780082,
"lb_loss": 1.000000001125386
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 348000000,
"cumulative_training_bytes": 348004692,
"metrics": {
"loss": 0.48292070460289693,
"ce_loss": 0.4729207141396401,
"lb_loss": 1.0000000011644445
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 349000000,
"cumulative_training_bytes": 349004119,
"metrics": {
"loss": 0.48292387037566215,
"ce_loss": 0.4729238799124053,
"lb_loss": 1.0000000013490067
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 350000000,
"cumulative_training_bytes": 350003796,
"metrics": {
"loss": 0.48291546779106,
"ce_loss": 0.47291547732780315,
"lb_loss": 1.0000000011722043
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 351000000,
"cumulative_training_bytes": 351004186,
"metrics": {
"loss": 0.48290248690755855,
"ce_loss": 0.4729024964443017,
"lb_loss": 1.0000000011923116
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 352000000,
"cumulative_training_bytes": 352003413,
"metrics": {
"loss": 0.48291019789300293,
"ce_loss": 0.4729102074297461,
"lb_loss": 1.000000001454166
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 353000000,
"cumulative_training_bytes": 353003199,
"metrics": {
"loss": 0.4829023801036983,
"ce_loss": 0.47290238964044146,
"lb_loss": 1.000000001453513
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 354000000,
"cumulative_training_bytes": 354003719,
"metrics": {
"loss": 0.48289616401652297,
"ce_loss": 0.47289617355326613,
"lb_loss": 1.0000000015103676
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 355000000,
"cumulative_training_bytes": 355004300,
"metrics": {
"loss": 0.48289350448174656,
"ce_loss": 0.4728935140184897,
"lb_loss": 1.0000000015655617
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 356000000,
"cumulative_training_bytes": 356004592,
"metrics": {
"loss": 0.48288726240185853,
"ce_loss": 0.4728872719386017,
"lb_loss": 1.0000000014871975
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 357000000,
"cumulative_training_bytes": 357003308,
"metrics": {
"loss": 0.4828915270964838,
"ce_loss": 0.47289153663322697,
"lb_loss": 1.0000000015261432
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 358000000,
"cumulative_training_bytes": 358002795,
"metrics": {
"loss": 0.4828872656784708,
"ce_loss": 0.47288727521521395,
"lb_loss": 1.000000001662674
}
},
{
"epoch": 5,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.48288743393137185,
"ce_loss": 0.472887443468115,
"lb_loss": 1.0000000016887818,
"training_bytes": 71629698
},
"cumulative_training_bytes": 358148627,
"training_bytes_this_epoch": 71629698
}
]
}