MOSES-nocat / metadata.json
jordiferrero's picture
Add files using upload-large-folder tool
58494f5 verified
{
"run_name": "run_large_20251113_074900",
"timestamp": "20251113_074900",
"phase": "large",
"config": {
"arch_layout": [
"m4",
[
"T22"
],
"m4"
],
"d_model": [
1024,
1536
],
"d_intermediate": [
0,
4096
],
"vocab_size": 256,
"ssm_cfg": {
"chunk_size": 256,
"d_conv": 4,
"d_state": 128,
"expand": 2
},
"attn_cfg": {
"num_heads": [
16,
16
],
"rotary_emb_dim": [
32,
48
],
"window_size": [
1023,
-1
]
},
"tie_embeddings": false
},
"training_args": {
"data": "datasets/moses/smiles-molecules-moses_all.csv",
"max_samples": null,
"batch_size": 128,
"epochs": 5,
"lr": 0.0001,
"weight_decay": 0.1,
"gradient_accumulation": 8,
"concatenate": false,
"num_concatenate": 0,
"concatenate_separator": " ",
"checkpoint_bytes": 1000000,
"num_test_samples": 50,
"num_visualize": 15,
"skip_visualization": false
},
"dataset_info": {
"train_size": 1936912,
"test_size": 50,
"test_smiles_file": "checkpoints/run_large_20251113_074900/test_smiles.txt"
},
"model_info": {
"num_parameters": 622923776,
"device": "cuda",
"dtype": "torch.bfloat16",
"use_amp": true
},
"training_history": [
{
"checkpoint_type": "bytes",
"bytes_threshold": 1000000,
"cumulative_training_bytes": 1002806,
"metrics": {
"loss": 2.281135283806474,
"ce_loss": 2.271135265700483,
"lb_loss": 1.0000000002879452
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 2000000,
"cumulative_training_bytes": 2002946,
"metrics": {
"loss": 1.706012413519058,
"ce_loss": 1.6960124092009685,
"lb_loss": 0.9999999955260436
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 3000000,
"cumulative_training_bytes": 3001506,
"metrics": {
"loss": 1.4319949007573536,
"ce_loss": 1.4219949010500808,
"lb_loss": 0.9999999944150737
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 4000000,
"cumulative_training_bytes": 4000430,
"metrics": {
"loss": 1.2650426110354336,
"ce_loss": 1.2550426136363637,
"lb_loss": 0.9999999934254271
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 5000000,
"cumulative_training_bytes": 5003932,
"metrics": {
"loss": 1.1526008317636889,
"ce_loss": 1.142600835755814,
"lb_loss": 0.9999999931269838
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 6000000,
"cumulative_training_bytes": 6003600,
"metrics": {
"loss": 1.0721529130981888,
"ce_loss": 1.062152918012924,
"lb_loss": 0.999999994848387
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 7000000,
"cumulative_training_bytes": 7002600,
"metrics": {
"loss": 1.0112524831724299,
"ce_loss": 1.0012524887465375,
"lb_loss": 0.9999999941798788
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 8000000,
"cumulative_training_bytes": 8002304,
"metrics": {
"loss": 0.9639488575675271,
"ce_loss": 0.9539488636363637,
"lb_loss": 0.9999999943646518
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 9000000,
"cumulative_training_bytes": 9000790,
"metrics": {
"loss": 0.9256936054805229,
"ce_loss": 0.9156936119342672,
"lb_loss": 0.9999999935128565
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 10000000,
"cumulative_training_bytes": 10003453,
"metrics": {
"loss": 0.8938084639106858,
"ce_loss": 0.8838084706737761,
"lb_loss": 0.9999999943371254
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 11000000,
"cumulative_training_bytes": 11002426,
"metrics": {
"loss": 0.8670801356690946,
"ce_loss": 0.8570801426840018,
"lb_loss": 0.999999994536022
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 12000000,
"cumulative_training_bytes": 12000566,
"metrics": {
"loss": 0.8441871771186289,
"ce_loss": 0.8341871843434343,
"lb_loss": 0.9999999944128172
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 13000000,
"cumulative_training_bytes": 13004187,
"metrics": {
"loss": 0.8242754283722971,
"ce_loss": 0.8142754357755406,
"lb_loss": 0.999999994221772
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 14000000,
"cumulative_training_bytes": 14003783,
"metrics": {
"loss": 0.8069831988157659,
"ce_loss": 0.7969832063711911,
"lb_loss": 0.9999999938083818
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 15000000,
"cumulative_training_bytes": 15002839,
"metrics": {
"loss": 0.791671675408973,
"ce_loss": 0.7816716830963154,
"lb_loss": 0.9999999934693036
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 16000000,
"cumulative_training_bytes": 16002416,
"metrics": {
"loss": 0.7779427005305435,
"ce_loss": 0.7679427083333333,
"lb_loss": 0.9999999930461247
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 17000000,
"cumulative_training_bytes": 17002552,
"metrics": {
"loss": 0.7657100694626995,
"ce_loss": 0.7557100773673702,
"lb_loss": 0.9999999931316952
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 18000000,
"cumulative_training_bytes": 18001467,
"metrics": {
"loss": 0.7545878573738295,
"ce_loss": 0.7445878653690733,
"lb_loss": 0.9999999934004555
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 19000000,
"cumulative_training_bytes": 19000835,
"metrics": {
"loss": 0.7444627280135493,
"ce_loss": 0.7344627360898418,
"lb_loss": 0.9999999935496761
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 20000000,
"cumulative_training_bytes": 20000123,
"metrics": {
"loss": 0.7352657759802418,
"ce_loss": 0.725265784129486,
"lb_loss": 0.9999999934238328
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 21000000,
"cumulative_training_bytes": 21002597,
"metrics": {
"loss": 0.726655715058511,
"ce_loss": 0.7166557232740707,
"lb_loss": 0.999999993339033
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 22000000,
"cumulative_training_bytes": 22000619,
"metrics": {
"loss": 0.7188806190663086,
"ce_loss": 0.7088806273418559,
"lb_loss": 0.9999999929451853
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 23000000,
"cumulative_training_bytes": 23000348,
"metrics": {
"loss": 0.7116252486272977,
"ce_loss": 0.7016252569576218,
"lb_loss": 0.9999999927991858
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 24000000,
"cumulative_training_bytes": 24004833,
"metrics": {
"loss": 0.7047766645990237,
"ce_loss": 0.694776672979798,
"lb_loss": 0.9999999928354013
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 25000000,
"cumulative_training_bytes": 25004271,
"metrics": {
"loss": 0.6984826149729632,
"ce_loss": 0.6884826233999224,
"lb_loss": 0.9999999924742777
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 26000000,
"cumulative_training_bytes": 26003036,
"metrics": {
"loss": 0.6926022446542389,
"ce_loss": 0.6826022531238344,
"lb_loss": 0.9999999924410372
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 27000000,
"cumulative_training_bytes": 27002303,
"metrics": {
"loss": 0.68704403782713,
"ce_loss": 0.6770440463362069,
"lb_loss": 0.9999999920034717
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 28000000,
"cumulative_training_bytes": 28001005,
"metrics": {
"loss": 0.6819027289412697,
"ce_loss": 0.6719027374870108,
"lb_loss": 0.9999999921752128
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 29000000,
"cumulative_training_bytes": 29003421,
"metrics": {
"loss": 0.6770648373905342,
"ce_loss": 0.6670648459705735,
"lb_loss": 0.9999999924460257
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 30000000,
"cumulative_training_bytes": 30003416,
"metrics": {
"loss": 0.6724512500756774,
"ce_loss": 0.6624512586875707,
"lb_loss": 0.9999999922158472
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 31000000,
"cumulative_training_bytes": 31002007,
"metrics": {
"loss": 0.6681099358288201,
"ce_loss": 0.6581099444705146,
"lb_loss": 0.9999999920937371
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 32000000,
"cumulative_training_bytes": 32004458,
"metrics": {
"loss": 0.6640003464438698,
"ce_loss": 0.6540003551136364,
"lb_loss": 0.9999999920256211
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 33000000,
"cumulative_training_bytes": 33002297,
"metrics": {
"loss": 0.6601016364516562,
"ce_loss": 0.6501016451476638,
"lb_loss": 0.9999999920743162
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 34000000,
"cumulative_training_bytes": 34004030,
"metrics": {
"loss": 0.6564515032034601,
"ce_loss": 0.6464515119242835,
"lb_loss": 0.9999999919427915
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 35000000,
"cumulative_training_bytes": 35002326,
"metrics": {
"loss": 0.6529157669000273,
"ce_loss": 0.6429157756441335,
"lb_loss": 0.999999991933268
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 36000000,
"cumulative_training_bytes": 36001349,
"metrics": {
"loss": 0.6495351869409735,
"ce_loss": 0.6395351957070707,
"lb_loss": 0.9999999918600525
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 37000000,
"cumulative_training_bytes": 37004270,
"metrics": {
"loss": 0.6462952058520207,
"ce_loss": 0.63629521463902,
"lb_loss": 0.999999991823105
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 38000000,
"cumulative_training_bytes": 38003714,
"metrics": {
"loss": 0.6431952418950054,
"ce_loss": 0.6331952507017096,
"lb_loss": 0.9999999918174792
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 39000000,
"cumulative_training_bytes": 39003133,
"metrics": {
"loss": 0.6402474970796226,
"ce_loss": 0.6302475059050224,
"lb_loss": 0.9999999917750926
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 40000000,
"cumulative_training_bytes": 40001052,
"metrics": {
"loss": 0.6374116862325957,
"ce_loss": 0.6274116950757576,
"lb_loss": 0.999999991792621
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 41000000,
"cumulative_training_bytes": 41003634,
"metrics": {
"loss": 0.634700220979048,
"ce_loss": 0.6247002298391865,
"lb_loss": 0.9999999917045446
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 42000000,
"cumulative_training_bytes": 42001934,
"metrics": {
"loss": 0.6320584671857601,
"ce_loss": 0.6220584760619877,
"lb_loss": 0.9999999916747524
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 43000000,
"cumulative_training_bytes": 43004796,
"metrics": {
"loss": 0.6295534807504097,
"ce_loss": 0.6195534896420518,
"lb_loss": 0.9999999916808849
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 44000000,
"cumulative_training_bytes": 44003115,
"metrics": {
"loss": 0.6271674778031694,
"ce_loss": 0.6171674867094535,
"lb_loss": 0.9999999917974657
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 45000000,
"cumulative_training_bytes": 45002725,
"metrics": {
"loss": 0.6248585546974785,
"ce_loss": 0.6148585636177548,
"lb_loss": 0.9999999917547551
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 46000000,
"cumulative_training_bytes": 46002129,
"metrics": {
"loss": 0.6225959593288815,
"ce_loss": 0.6125959682625421,
"lb_loss": 0.9999999915694104
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 47000000,
"cumulative_training_bytes": 47000416,
"metrics": {
"loss": 0.6204234810989125,
"ce_loss": 0.6104234900453889,
"lb_loss": 0.9999999917178196
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 48000000,
"cumulative_training_bytes": 48003247,
"metrics": {
"loss": 0.6183409256689982,
"ce_loss": 0.6083409346278154,
"lb_loss": 0.9999999918066941
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 49000000,
"cumulative_training_bytes": 49003796,
"metrics": {
"loss": 0.6163010463005029,
"ce_loss": 0.6063010552710992,
"lb_loss": 0.9999999916847186
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 50000000,
"cumulative_training_bytes": 50003148,
"metrics": {
"loss": 0.614361976497587,
"ce_loss": 0.6043619854794919,
"lb_loss": 0.9999999918392555
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 51000000,
"cumulative_training_bytes": 51000399,
"metrics": {
"loss": 0.6125098095498663,
"ce_loss": 0.6025098185426372,
"lb_loss": 0.99999999184608
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 52000000,
"cumulative_training_bytes": 52003702,
"metrics": {
"loss": 0.6106979154326813,
"ce_loss": 0.60069792443595,
"lb_loss": 0.9999999918200599
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 53000000,
"cumulative_training_bytes": 53002141,
"metrics": {
"loss": 0.6089451033860565,
"ce_loss": 0.598945112399378,
"lb_loss": 0.9999999917342992
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 54000000,
"cumulative_training_bytes": 54001738,
"metrics": {
"loss": 0.6072190762593643,
"ce_loss": 0.5972190852823667,
"lb_loss": 0.9999999917854974
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 55000000,
"cumulative_training_bytes": 55000564,
"metrics": {
"loss": 0.6055407572868681,
"ce_loss": 0.5955407663191996,
"lb_loss": 0.9999999920344992
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 56000000,
"cumulative_training_bytes": 56004377,
"metrics": {
"loss": 0.603901492993085,
"ce_loss": 0.5939015020344559,
"lb_loss": 0.9999999920224413
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 57000000,
"cumulative_training_bytes": 57002312,
"metrics": {
"loss": 0.6023297931422605,
"ce_loss": 0.592329802192311,
"lb_loss": 0.9999999919442221
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 58000000,
"cumulative_training_bytes": 58000186,
"metrics": {
"loss": 0.6008043852197599,
"ce_loss": 0.5908043942781911,
"lb_loss": 0.9999999920480638
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 59000000,
"cumulative_training_bytes": 59003287,
"metrics": {
"loss": 0.5993142948056599,
"ce_loss": 0.5893143038722268,
"lb_loss": 0.999999991938435
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 60000000,
"cumulative_training_bytes": 60002539,
"metrics": {
"loss": 0.5978776750521423,
"ce_loss": 0.5878776841265352,
"lb_loss": 0.9999999918992395
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 61000000,
"cumulative_training_bytes": 61001369,
"metrics": {
"loss": 0.5964548781080705,
"ce_loss": 0.5864548871900334,
"lb_loss": 0.9999999918518527
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 62000000,
"cumulative_training_bytes": 62000171,
"metrics": {
"loss": 0.595078989532466,
"ce_loss": 0.5850789986217547,
"lb_loss": 0.9999999919225172
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 63000000,
"cumulative_training_bytes": 63004361,
"metrics": {
"loss": 0.5937321655380584,
"ce_loss": 0.5837321746344748,
"lb_loss": 0.9999999919915575
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 64000000,
"cumulative_training_bytes": 64002292,
"metrics": {
"loss": 0.5924299737105143,
"ce_loss": 0.582429982813802,
"lb_loss": 0.999999992053316
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 65000000,
"cumulative_training_bytes": 65000997,
"metrics": {
"loss": 0.59116503060811,
"ce_loss": 0.5811650397180578,
"lb_loss": 0.9999999920153695
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 66000000,
"cumulative_training_bytes": 66004775,
"metrics": {
"loss": 0.589915283734672,
"ce_loss": 0.5799152928511091,
"lb_loss": 0.9999999919879169
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 67000000,
"cumulative_training_bytes": 67002689,
"metrics": {
"loss": 0.5887356604232457,
"ce_loss": 0.5787356695459479,
"lb_loss": 0.9999999919995213
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 68000000,
"cumulative_training_bytes": 68000588,
"metrics": {
"loss": 0.5875624195750525,
"ce_loss": 0.5775624287038358,
"lb_loss": 0.9999999919300427
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 69000000,
"cumulative_training_bytes": 69004376,
"metrics": {
"loss": 0.5864199688653537,
"ce_loss": 0.5764199780000703,
"lb_loss": 0.9999999920055317
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 70000000,
"cumulative_training_bytes": 70002900,
"metrics": {
"loss": 0.5852980388978469,
"ce_loss": 0.5752980480382991,
"lb_loss": 0.9999999920370275
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 71000000,
"cumulative_training_bytes": 71001777,
"metrics": {
"loss": 0.5841887338123211,
"ce_loss": 0.5741887429583475,
"lb_loss": 0.9999999920513574
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 72000000,
"cumulative_training_bytes": 72001154,
"metrics": {
"loss": 0.5831138857462038,
"ce_loss": 0.57311389489765,
"lb_loss": 0.9999999919047493
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 73000000,
"cumulative_training_bytes": 73000028,
"metrics": {
"loss": 0.5820674106895334,
"ce_loss": 0.5720674198462509,
"lb_loss": 0.9999999919165382
}
},
{
"epoch": 1,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.5816875383369134,
"ce_loss": 0.5716875474955395,
"lb_loss": 0.999999991905931,
"training_bytes": 73364866
},
"cumulative_training_bytes": 73364866,
"training_bytes_this_epoch": 73364866
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 74000000,
"cumulative_training_bytes": 74000512,
"metrics": {
"loss": 0.5005325524861576,
"ce_loss": 0.49053256202290074,
"lb_loss": 0.9999999849850895
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 75000000,
"cumulative_training_bytes": 75004045,
"metrics": {
"loss": 0.5011473647236119,
"ce_loss": 0.49114737426035504,
"lb_loss": 0.9999999903010194
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 76000000,
"cumulative_training_bytes": 76003847,
"metrics": {
"loss": 0.5011714346268598,
"ce_loss": 0.4911714441636029,
"lb_loss": 0.9999999864136472
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 77000000,
"cumulative_training_bytes": 77002822,
"metrics": {
"loss": 0.5010182196299235,
"ce_loss": 0.49101822916666665,
"lb_loss": 0.9999999865690867
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 78000000,
"cumulative_training_bytes": 78000023,
"metrics": {
"loss": 0.5009371635804116,
"ce_loss": 0.4909371731171548,
"lb_loss": 0.9999999881538886
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 79000000,
"cumulative_training_bytes": 79002990,
"metrics": {
"loss": 0.5009800124967908,
"ce_loss": 0.49098002203353397,
"lb_loss": 0.9999999891860877
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 80000000,
"cumulative_training_bytes": 80001116,
"metrics": {
"loss": 0.5009120384910143,
"ce_loss": 0.4909120480277575,
"lb_loss": 0.9999999892459114
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 81000000,
"cumulative_training_bytes": 81003932,
"metrics": {
"loss": 0.5009258906853381,
"ce_loss": 0.4909259002220812,
"lb_loss": 0.999999988805219
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 82000000,
"cumulative_training_bytes": 82002652,
"metrics": {
"loss": 0.5008152612264443,
"ce_loss": 0.49081527076318743,
"lb_loss": 0.9999999887279656
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 83000000,
"cumulative_training_bytes": 83002121,
"metrics": {
"loss": 0.5007128221408222,
"ce_loss": 0.4907128316775654,
"lb_loss": 0.9999999895062245
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 84000000,
"cumulative_training_bytes": 84000595,
"metrics": {
"loss": 0.5006207174459368,
"ce_loss": 0.49062072698268006,
"lb_loss": 0.9999999896764973
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 85000000,
"cumulative_training_bytes": 85000337,
"metrics": {
"loss": 0.5004435125986735,
"ce_loss": 0.49044352213541664,
"lb_loss": 0.9999999905377627
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 86000000,
"cumulative_training_bytes": 86003615,
"metrics": {
"loss": 0.500366971188228,
"ce_loss": 0.49036698072497126,
"lb_loss": 0.9999999906946335
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 87000000,
"cumulative_training_bytes": 87002716,
"metrics": {
"loss": 0.5002281165741704,
"ce_loss": 0.4902281261109136,
"lb_loss": 0.999999990316629
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 88000000,
"cumulative_training_bytes": 88001724,
"metrics": {
"loss": 0.5001107990919418,
"ce_loss": 0.490110808628685,
"lb_loss": 0.9999999901481558
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 89000000,
"cumulative_training_bytes": 89000615,
"metrics": {
"loss": 0.5000072579051174,
"ce_loss": 0.49000726744186046,
"lb_loss": 0.9999999906111133
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 90000000,
"cumulative_training_bytes": 90004256,
"metrics": {
"loss": 0.4999731520672778,
"ce_loss": 0.489973161604021,
"lb_loss": 0.9999999905695448
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 91000000,
"cumulative_training_bytes": 91004345,
"metrics": {
"loss": 0.4998607054852469,
"ce_loss": 0.4898607150219901,
"lb_loss": 0.99999999072671
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 92000000,
"cumulative_training_bytes": 92001910,
"metrics": {
"loss": 0.499761327124287,
"ce_loss": 0.48976133666103017,
"lb_loss": 0.999999990898042
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 93000000,
"cumulative_training_bytes": 93001551,
"metrics": {
"loss": 0.49959730913609635,
"ce_loss": 0.4895973186728395,
"lb_loss": 0.9999999908900555
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 94000000,
"cumulative_training_bytes": 94001433,
"metrics": {
"loss": 0.4994959777459166,
"ce_loss": 0.48949598728265975,
"lb_loss": 0.9999999912889829
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 95000000,
"cumulative_training_bytes": 95001556,
"metrics": {
"loss": 0.4993510381996979,
"ce_loss": 0.48935104773644106,
"lb_loss": 0.9999999910098777
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 96000000,
"cumulative_training_bytes": 96000636,
"metrics": {
"loss": 0.4992381378229398,
"ce_loss": 0.48923814735968296,
"lb_loss": 0.9999999908575566
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 97000000,
"cumulative_training_bytes": 97003789,
"metrics": {
"loss": 0.49909694559146195,
"ce_loss": 0.4890969551282051,
"lb_loss": 0.9999999909156408
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 98000000,
"cumulative_training_bytes": 98003981,
"metrics": {
"loss": 0.4989720032314127,
"ce_loss": 0.4889720127681559,
"lb_loss": 0.9999999909202922
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 99000000,
"cumulative_training_bytes": 99001943,
"metrics": {
"loss": 0.49889595580276885,
"ce_loss": 0.488895965339512,
"lb_loss": 0.9999999909471289
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 100000000,
"cumulative_training_bytes": 100000629,
"metrics": {
"loss": 0.49879610028484794,
"ce_loss": 0.4887961098215911,
"lb_loss": 0.9999999908091873
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 101000000,
"cumulative_training_bytes": 101003165,
"metrics": {
"loss": 0.4987198369544849,
"ce_loss": 0.48871984649122807,
"lb_loss": 0.9999999910802172
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 102000000,
"cumulative_training_bytes": 102000414,
"metrics": {
"loss": 0.4985755650907543,
"ce_loss": 0.48857557462749746,
"lb_loss": 0.9999999911894929
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 103000000,
"cumulative_training_bytes": 103004635,
"metrics": {
"loss": 0.4984361996690478,
"ce_loss": 0.48843620920579095,
"lb_loss": 0.9999999914585852
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 104000000,
"cumulative_training_bytes": 104002500,
"metrics": {
"loss": 0.49834367617302106,
"ce_loss": 0.4883436857097642,
"lb_loss": 0.9999999916710078
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 105000000,
"cumulative_training_bytes": 105001110,
"metrics": {
"loss": 0.4982617840456323,
"ce_loss": 0.4882617935823755,
"lb_loss": 0.9999999917969393
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 106000000,
"cumulative_training_bytes": 106004479,
"metrics": {
"loss": 0.49815445561105837,
"ce_loss": 0.48815446514780153,
"lb_loss": 0.9999999917215772
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 107000000,
"cumulative_training_bytes": 107002481,
"metrics": {
"loss": 0.49805969111546206,
"ce_loss": 0.4880597006522052,
"lb_loss": 0.9999999917611914
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 108000000,
"cumulative_training_bytes": 108000820,
"metrics": {
"loss": 0.49798761556824006,
"ce_loss": 0.4879876251049832,
"lb_loss": 0.999999992090537
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 109000000,
"cumulative_training_bytes": 109004357,
"metrics": {
"loss": 0.4978611764413551,
"ce_loss": 0.48786118597809824,
"lb_loss": 0.999999992159204
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 110000000,
"cumulative_training_bytes": 110001924,
"metrics": {
"loss": 0.4977405579834368,
"ce_loss": 0.48774056752017997,
"lb_loss": 0.9999999922230807
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 111000000,
"cumulative_training_bytes": 111000809,
"metrics": {
"loss": 0.49763489521979426,
"ce_loss": 0.4876349047565374,
"lb_loss": 0.9999999922374989
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 112000000,
"cumulative_training_bytes": 112000343,
"metrics": {
"loss": 0.49752072510687584,
"ce_loss": 0.487520734643619,
"lb_loss": 0.9999999922511718
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 113000000,
"cumulative_training_bytes": 113004390,
"metrics": {
"loss": 0.49740453195665213,
"ce_loss": 0.4874045414933953,
"lb_loss": 0.9999999924327763
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 114000000,
"cumulative_training_bytes": 114003370,
"metrics": {
"loss": 0.4973142315453375,
"ce_loss": 0.48731424108208066,
"lb_loss": 0.9999999925903078
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 115000000,
"cumulative_training_bytes": 115002294,
"metrics": {
"loss": 0.4972011996359397,
"ce_loss": 0.48720120917268284,
"lb_loss": 0.9999999926569965
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 116000000,
"cumulative_training_bytes": 116000382,
"metrics": {
"loss": 0.4970865782645987,
"ce_loss": 0.48708658780134184,
"lb_loss": 0.9999999925036688
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 117000000,
"cumulative_training_bytes": 117000510,
"metrics": {
"loss": 0.49695767275492353,
"ce_loss": 0.4869576822916667,
"lb_loss": 0.9999999926090241
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 118000000,
"cumulative_training_bytes": 118003216,
"metrics": {
"loss": 0.4968769081617471,
"ce_loss": 0.4868769176984903,
"lb_loss": 0.9999999924773979
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 119000000,
"cumulative_training_bytes": 119001261,
"metrics": {
"loss": 0.49676820662707283,
"ce_loss": 0.486768216163816,
"lb_loss": 0.9999999922367688
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 120000000,
"cumulative_training_bytes": 120000540,
"metrics": {
"loss": 0.4966631457418721,
"ce_loss": 0.48666315527861526,
"lb_loss": 0.9999999924773845
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 121000000,
"cumulative_training_bytes": 121003783,
"metrics": {
"loss": 0.4965640567796623,
"ce_loss": 0.4865640663164055,
"lb_loss": 0.9999999925388039
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 122000000,
"cumulative_training_bytes": 122001344,
"metrics": {
"loss": 0.4964622565243613,
"ce_loss": 0.4864622660611045,
"lb_loss": 0.9999999927217215
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 123000000,
"cumulative_training_bytes": 123004762,
"metrics": {
"loss": 0.4963449017705134,
"ce_loss": 0.48634491130725654,
"lb_loss": 0.9999999927349744
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 124000000,
"cumulative_training_bytes": 124003071,
"metrics": {
"loss": 0.49625051403913045,
"ce_loss": 0.4862505235758736,
"lb_loss": 0.9999999929239101
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 125000000,
"cumulative_training_bytes": 125001438,
"metrics": {
"loss": 0.49615721014920183,
"ce_loss": 0.486157219685945,
"lb_loss": 0.9999999929096719
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 126000000,
"cumulative_training_bytes": 126004765,
"metrics": {
"loss": 0.4960651325244099,
"ce_loss": 0.4860651420611531,
"lb_loss": 0.9999999929844597
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 127000000,
"cumulative_training_bytes": 127003693,
"metrics": {
"loss": 0.49597558558030314,
"ce_loss": 0.4859755951170463,
"lb_loss": 0.99999999303966
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 128000000,
"cumulative_training_bytes": 128002776,
"metrics": {
"loss": 0.49587631449830566,
"ce_loss": 0.4858763240350488,
"lb_loss": 0.9999999929817779
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 129000000,
"cumulative_training_bytes": 129001342,
"metrics": {
"loss": 0.49577192754934957,
"ce_loss": 0.48577193708609273,
"lb_loss": 0.9999999930973708
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 130000000,
"cumulative_training_bytes": 130004919,
"metrics": {
"loss": 0.49565188584543607,
"ce_loss": 0.48565189538217923,
"lb_loss": 0.9999999932349775
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 131000000,
"cumulative_training_bytes": 131003386,
"metrics": {
"loss": 0.4955454855112003,
"ce_loss": 0.4855454950479435,
"lb_loss": 0.999999993322114
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 132000000,
"cumulative_training_bytes": 132001655,
"metrics": {
"loss": 0.4954483614326657,
"ce_loss": 0.48544837096940885,
"lb_loss": 0.9999999934161384
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 133000000,
"cumulative_training_bytes": 133001280,
"metrics": {
"loss": 0.49533400802280486,
"ce_loss": 0.485334017559548,
"lb_loss": 0.9999999933519573
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 134000000,
"cumulative_training_bytes": 134000603,
"metrics": {
"loss": 0.4952326351822142,
"ce_loss": 0.4852326447189574,
"lb_loss": 0.9999999933518446
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 135000000,
"cumulative_training_bytes": 135004408,
"metrics": {
"loss": 0.4951358704479194,
"ce_loss": 0.48513587998466257,
"lb_loss": 0.9999999933053774
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 136000000,
"cumulative_training_bytes": 136002901,
"metrics": {
"loss": 0.4950331875414302,
"ce_loss": 0.4850331970781734,
"lb_loss": 0.9999999932783307
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 137000000,
"cumulative_training_bytes": 137002147,
"metrics": {
"loss": 0.4949309004891596,
"ce_loss": 0.4849309100259028,
"lb_loss": 0.9999999933247884
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 138000000,
"cumulative_training_bytes": 138000664,
"metrics": {
"loss": 0.4948386598770732,
"ce_loss": 0.48483866941381637,
"lb_loss": 0.9999999932938068
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 139000000,
"cumulative_training_bytes": 139003345,
"metrics": {
"loss": 0.4947454480302854,
"ce_loss": 0.4847454575670286,
"lb_loss": 0.9999999931806194
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 140000000,
"cumulative_training_bytes": 140001624,
"metrics": {
"loss": 0.4946506588881386,
"ce_loss": 0.48465066842488175,
"lb_loss": 0.9999999931006919
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 141000000,
"cumulative_training_bytes": 141001168,
"metrics": {
"loss": 0.494550829112816,
"ce_loss": 0.48455083864955917,
"lb_loss": 0.9999999930231249
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 142000000,
"cumulative_training_bytes": 142003856,
"metrics": {
"loss": 0.4944707291798835,
"ce_loss": 0.48447073871662666,
"lb_loss": 0.999999993099872
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 143000000,
"cumulative_training_bytes": 143002536,
"metrics": {
"loss": 0.4943706393075899,
"ce_loss": 0.48437064884433306,
"lb_loss": 0.999999993173932
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 144000000,
"cumulative_training_bytes": 144001676,
"metrics": {
"loss": 0.49428343353978393,
"ce_loss": 0.4842834430765271,
"lb_loss": 0.9999999932131705
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 145000000,
"cumulative_training_bytes": 145000756,
"metrics": {
"loss": 0.4941936364093857,
"ce_loss": 0.48419364594612885,
"lb_loss": 0.9999999931867728
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 146000000,
"cumulative_training_bytes": 146001319,
"metrics": {
"loss": 0.4941026584565154,
"ce_loss": 0.4841026679932586,
"lb_loss": 0.9999999931093816
}
},
{
"epoch": 2,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.49404458670987017,
"ce_loss": 0.48404459624661333,
"lb_loss": 0.9999999931190567,
"training_bytes": 73364908
},
"cumulative_training_bytes": 146729774,
"training_bytes_this_epoch": 73364908
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 147000000,
"cumulative_training_bytes": 147001010,
"metrics": {
"loss": 0.4805636065346854,
"ce_loss": 0.47056361607142855,
"lb_loss": 0.9999999914850507
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 148000000,
"cumulative_training_bytes": 148004362,
"metrics": {
"loss": 0.4808664947408234,
"ce_loss": 0.47086650427756654,
"lb_loss": 0.9999999929743575
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 149000000,
"cumulative_training_bytes": 149002455,
"metrics": {
"loss": 0.48084887106027174,
"ce_loss": 0.4708488805970149,
"lb_loss": 0.9999999895787188
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 150000000,
"cumulative_training_bytes": 150001378,
"metrics": {
"loss": 0.48101561546325683,
"ce_loss": 0.471015625,
"lb_loss": 0.9999999924059267
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 151000000,
"cumulative_training_bytes": 151001036,
"metrics": {
"loss": 0.4810201380228482,
"ce_loss": 0.4710201475595914,
"lb_loss": 0.9999999941816125
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 152000000,
"cumulative_training_bytes": 152004640,
"metrics": {
"loss": 0.4810783021590289,
"ce_loss": 0.4710783116957721,
"lb_loss": 0.9999999939190114
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 153000000,
"cumulative_training_bytes": 153004013,
"metrics": {
"loss": 0.4810306487901502,
"ce_loss": 0.47103065832689334,
"lb_loss": 0.9999999938276488
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 154000000,
"cumulative_training_bytes": 154004012,
"metrics": {
"loss": 0.48104295921325685,
"ce_loss": 0.47104296875,
"lb_loss": 0.9999999943574269
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 155000000,
"cumulative_training_bytes": 155002759,
"metrics": {
"loss": 0.48113014689643385,
"ce_loss": 0.471130156433177,
"lb_loss": 0.9999999966459285
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 156000000,
"cumulative_training_bytes": 156001894,
"metrics": {
"loss": 0.48115360088428194,
"ce_loss": 0.4711536104210251,
"lb_loss": 0.9999999970072981
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 157000000,
"cumulative_training_bytes": 157004920,
"metrics": {
"loss": 0.4811667406992172,
"ce_loss": 0.4711667502359604,
"lb_loss": 0.9999999969902327
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 158000000,
"cumulative_training_bytes": 158004293,
"metrics": {
"loss": 0.48113322299013855,
"ce_loss": 0.4711332325268817,
"lb_loss": 0.9999999964878123
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 159000000,
"cumulative_training_bytes": 159003028,
"metrics": {
"loss": 0.4811722976047029,
"ce_loss": 0.4711723071414461,
"lb_loss": 0.9999999965381735
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 160000000,
"cumulative_training_bytes": 160002161,
"metrics": {
"loss": 0.4812026359236149,
"ce_loss": 0.47120264546035806,
"lb_loss": 0.9999999964502897
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 161000000,
"cumulative_training_bytes": 161000072,
"metrics": {
"loss": 0.4811928899280886,
"ce_loss": 0.4711928994648318,
"lb_loss": 0.9999999957873713
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 162000000,
"cumulative_training_bytes": 162004719,
"metrics": {
"loss": 0.4811979071299235,
"ce_loss": 0.47119791666666666,
"lb_loss": 0.9999999959506686
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 163000000,
"cumulative_training_bytes": 163003159,
"metrics": {
"loss": 0.481205364234115,
"ce_loss": 0.47120537377085814,
"lb_loss": 0.9999999952046322
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 164000000,
"cumulative_training_bytes": 164002582,
"metrics": {
"loss": 0.4811905742293994,
"ce_loss": 0.4711905837661426,
"lb_loss": 0.99999999494649
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 165000000,
"cumulative_training_bytes": 165001639,
"metrics": {
"loss": 0.4812484146936178,
"ce_loss": 0.47124842423036095,
"lb_loss": 0.9999999941945582
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 166000000,
"cumulative_training_bytes": 166000287,
"metrics": {
"loss": 0.48126094054503843,
"ce_loss": 0.4712609500817816,
"lb_loss": 0.9999999941955215
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 167000000,
"cumulative_training_bytes": 167003168,
"metrics": {
"loss": 0.48131460568688755,
"ce_loss": 0.4713146152236307,
"lb_loss": 0.9999999943688508
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 168000000,
"cumulative_training_bytes": 168002141,
"metrics": {
"loss": 0.4813228442072732,
"ce_loss": 0.4713228537440164,
"lb_loss": 0.9999999944702324
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 169000000,
"cumulative_training_bytes": 169001802,
"metrics": {
"loss": 0.4812920721636705,
"ce_loss": 0.47129208170041365,
"lb_loss": 0.9999999943678607
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 170000000,
"cumulative_training_bytes": 170004652,
"metrics": {
"loss": 0.4812715562184652,
"ce_loss": 0.47127156575520834,
"lb_loss": 0.9999999946479996
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 171000000,
"cumulative_training_bytes": 171003991,
"metrics": {
"loss": 0.48125830860648494,
"ce_loss": 0.4712583181432281,
"lb_loss": 0.9999999945110385
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 172000000,
"cumulative_training_bytes": 172002412,
"metrics": {
"loss": 0.48124760714111564,
"ce_loss": 0.4712476166778588,
"lb_loss": 0.9999999945678806
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 173000000,
"cumulative_training_bytes": 173002030,
"metrics": {
"loss": 0.4812279862642904,
"ce_loss": 0.4712279958010336,
"lb_loss": 0.9999999947194114
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 174000000,
"cumulative_training_bytes": 174004724,
"metrics": {
"loss": 0.48122082379659015,
"ce_loss": 0.4712208333333333,
"lb_loss": 0.9999999946170383
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 175000000,
"cumulative_training_bytes": 175000065,
"metrics": {
"loss": 0.48117782869224485,
"ce_loss": 0.471177838228988,
"lb_loss": 0.9999999944996056
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 176000000,
"cumulative_training_bytes": 176003706,
"metrics": {
"loss": 0.48119810987273176,
"ce_loss": 0.4711981194094749,
"lb_loss": 0.9999999942439113
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 177000000,
"cumulative_training_bytes": 177001652,
"metrics": {
"loss": 0.4812033632407676,
"ce_loss": 0.4712033727775108,
"lb_loss": 0.9999999942047062
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 178000000,
"cumulative_training_bytes": 178000949,
"metrics": {
"loss": 0.4811637610090779,
"ce_loss": 0.47116377054582104,
"lb_loss": 0.9999999945654316
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 179000000,
"cumulative_training_bytes": 179003699,
"metrics": {
"loss": 0.4811420990870549,
"ce_loss": 0.4711421086237981,
"lb_loss": 0.9999999944120646
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 180000000,
"cumulative_training_bytes": 180003641,
"metrics": {
"loss": 0.4811195278612458,
"ce_loss": 0.47111953739798895,
"lb_loss": 0.999999994232369
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 181000000,
"cumulative_training_bytes": 181001865,
"metrics": {
"loss": 0.48110407533521493,
"ce_loss": 0.4711040848719581,
"lb_loss": 0.9999999940125498
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 182000000,
"cumulative_training_bytes": 182004543,
"metrics": {
"loss": 0.4810891764598204,
"ce_loss": 0.4710891859965636,
"lb_loss": 0.999999994043632
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 183000000,
"cumulative_training_bytes": 183004012,
"metrics": {
"loss": 0.4810652307386211,
"ce_loss": 0.4710652402753643,
"lb_loss": 0.9999999939526901
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 184000000,
"cumulative_training_bytes": 184003290,
"metrics": {
"loss": 0.48101411130038446,
"ce_loss": 0.4710141208371276,
"lb_loss": 0.9999999940837331
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 185000000,
"cumulative_training_bytes": 185001996,
"metrics": {
"loss": 0.48102430591682327,
"ce_loss": 0.47102431545356643,
"lb_loss": 0.9999999941173169
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 186000000,
"cumulative_training_bytes": 186000889,
"metrics": {
"loss": 0.4810144482666894,
"ce_loss": 0.47101445780343254,
"lb_loss": 0.9999999941197542
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 187000000,
"cumulative_training_bytes": 187000547,
"metrics": {
"loss": 0.4809928505625946,
"ce_loss": 0.4709928600993378,
"lb_loss": 0.9999999940359471
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 188000000,
"cumulative_training_bytes": 188003407,
"metrics": {
"loss": 0.48096928381382076,
"ce_loss": 0.4709692933505639,
"lb_loss": 0.9999999940269312
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 189000000,
"cumulative_training_bytes": 189001532,
"metrics": {
"loss": 0.4809352142817932,
"ce_loss": 0.47093522381853636,
"lb_loss": 0.999999993874081
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 190000000,
"cumulative_training_bytes": 190004203,
"metrics": {
"loss": 0.48090532134560976,
"ce_loss": 0.4709053308823529,
"lb_loss": 0.9999999939426989
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 191000000,
"cumulative_training_bytes": 191003779,
"metrics": {
"loss": 0.48086076009418444,
"ce_loss": 0.4708607696309276,
"lb_loss": 0.999999993929217
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 192000000,
"cumulative_training_bytes": 192002266,
"metrics": {
"loss": 0.48084745050127764,
"ce_loss": 0.4708474600380208,
"lb_loss": 0.9999999939993182
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 193000000,
"cumulative_training_bytes": 193001653,
"metrics": {
"loss": 0.48082509623450276,
"ce_loss": 0.4708251057712459,
"lb_loss": 0.9999999939414749
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 194000000,
"cumulative_training_bytes": 194000835,
"metrics": {
"loss": 0.4807908648478091,
"ce_loss": 0.47079087438455225,
"lb_loss": 0.9999999939716709
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 195000000,
"cumulative_training_bytes": 195004664,
"metrics": {
"loss": 0.48074686263832717,
"ce_loss": 0.47074687217507033,
"lb_loss": 0.99999999400122
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 196000000,
"cumulative_training_bytes": 196004836,
"metrics": {
"loss": 0.48070388425876953,
"ce_loss": 0.4707038937955127,
"lb_loss": 0.9999999940700358
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 197000000,
"cumulative_training_bytes": 197003791,
"metrics": {
"loss": 0.4806869147736349,
"ce_loss": 0.47068692431037806,
"lb_loss": 0.9999999940268879
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 198000000,
"cumulative_training_bytes": 198002877,
"metrics": {
"loss": 0.4806610015517758,
"ce_loss": 0.470661011088519,
"lb_loss": 0.9999999939910581
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 199000000,
"cumulative_training_bytes": 199001231,
"metrics": {
"loss": 0.48063136801427725,
"ce_loss": 0.4706313775510204,
"lb_loss": 0.9999999939455393
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 200000000,
"cumulative_training_bytes": 200001409,
"metrics": {
"loss": 0.4805955566952794,
"ce_loss": 0.4705955662320226,
"lb_loss": 0.9999999939180041
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 201000000,
"cumulative_training_bytes": 201003734,
"metrics": {
"loss": 0.4805725930385271,
"ce_loss": 0.4705726025752703,
"lb_loss": 0.9999999940358079
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 202000000,
"cumulative_training_bytes": 202002044,
"metrics": {
"loss": 0.4805435962620111,
"ce_loss": 0.47054360579875426,
"lb_loss": 0.9999999941383624
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 203000000,
"cumulative_training_bytes": 203001126,
"metrics": {
"loss": 0.48050098680104225,
"ce_loss": 0.4705009963377854,
"lb_loss": 0.999999994103737
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 204000000,
"cumulative_training_bytes": 204004123,
"metrics": {
"loss": 0.48048055313257615,
"ce_loss": 0.4704805626693193,
"lb_loss": 0.9999999941515592
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 205000000,
"cumulative_training_bytes": 205003381,
"metrics": {
"loss": 0.4804562526845083,
"ce_loss": 0.4704562622212515,
"lb_loss": 0.9999999940881398
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 206000000,
"cumulative_training_bytes": 206002109,
"metrics": {
"loss": 0.48041871080847937,
"ce_loss": 0.47041872034522253,
"lb_loss": 0.9999999941975191
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 207000000,
"cumulative_training_bytes": 207004847,
"metrics": {
"loss": 0.48040474973443376,
"ce_loss": 0.4704047592711769,
"lb_loss": 0.9999999941119376
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 208000000,
"cumulative_training_bytes": 208002759,
"metrics": {
"loss": 0.4803970944535235,
"ce_loss": 0.4703971039902667,
"lb_loss": 0.9999999939768037
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 209000000,
"cumulative_training_bytes": 209002658,
"metrics": {
"loss": 0.4803597255816092,
"ce_loss": 0.4703597351183524,
"lb_loss": 0.9999999939481073
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 210000000,
"cumulative_training_bytes": 210001152,
"metrics": {
"loss": 0.48035077764043516,
"ce_loss": 0.4703507871771783,
"lb_loss": 0.9999999939705624
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 211000000,
"cumulative_training_bytes": 211004753,
"metrics": {
"loss": 0.48031134121951813,
"ce_loss": 0.4703113507562613,
"lb_loss": 0.9999999939972691
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 212000000,
"cumulative_training_bytes": 212002762,
"metrics": {
"loss": 0.48029571871685955,
"ce_loss": 0.4702957282536027,
"lb_loss": 0.9999999940359934
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 213000000,
"cumulative_training_bytes": 213001000,
"metrics": {
"loss": 0.4802748512237924,
"ce_loss": 0.47027486076053554,
"lb_loss": 0.9999999941389638
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 214000000,
"cumulative_training_bytes": 214004278,
"metrics": {
"loss": 0.48026237785064424,
"ce_loss": 0.4702623873873874,
"lb_loss": 0.9999999940760501
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 215000000,
"cumulative_training_bytes": 215002179,
"metrics": {
"loss": 0.4802387263928783,
"ce_loss": 0.47023873592962145,
"lb_loss": 0.9999999940484248
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 216000000,
"cumulative_training_bytes": 216004742,
"metrics": {
"loss": 0.48021305715383633,
"ce_loss": 0.4702130666905795,
"lb_loss": 0.9999999940762461
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 217000000,
"cumulative_training_bytes": 217002756,
"metrics": {
"loss": 0.480171780298706,
"ce_loss": 0.47017178983544916,
"lb_loss": 0.9999999941522144
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 218000000,
"cumulative_training_bytes": 218000797,
"metrics": {
"loss": 0.4801445482863861,
"ce_loss": 0.47014455782312925,
"lb_loss": 0.9999999941895609
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 219000000,
"cumulative_training_bytes": 219003956,
"metrics": {
"loss": 0.4801110330942356,
"ce_loss": 0.47011104263097875,
"lb_loss": 0.9999999942022717
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 220000000,
"cumulative_training_bytes": 220002155,
"metrics": {
"loss": 0.48008201318376237,
"ce_loss": 0.47008202272050553,
"lb_loss": 0.9999999942024199
}
},
{
"epoch": 3,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.48007934931477336,
"ce_loss": 0.4700793588515165,
"lb_loss": 0.9999999941943272,
"training_bytes": 73364838
},
"cumulative_training_bytes": 220094612,
"training_bytes_this_epoch": 73364838
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 221000000,
"cumulative_training_bytes": 221000512,
"metrics": {
"loss": 0.4711359363188718,
"ce_loss": 0.46113594585561496,
"lb_loss": 0.9999999980875515
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 222000000,
"cumulative_training_bytes": 222003838,
"metrics": {
"loss": 0.47099697650386596,
"ce_loss": 0.4609969860406091,
"lb_loss": 0.9999999947051712
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 223000000,
"cumulative_training_bytes": 223001840,
"metrics": {
"loss": 0.4709537665049235,
"ce_loss": 0.46095377604166665,
"lb_loss": 0.9999999970197677
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 224000000,
"cumulative_training_bytes": 224000184,
"metrics": {
"loss": 0.471029573279634,
"ce_loss": 0.4610295828163772,
"lb_loss": 0.9999999985949277
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 225000000,
"cumulative_training_bytes": 225003512,
"metrics": {
"loss": 0.4712440520377879,
"ce_loss": 0.4612440615745311,
"lb_loss": 0.9999999992939232
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 226000000,
"cumulative_training_bytes": 226001385,
"metrics": {
"loss": 0.4713188224977113,
"ce_loss": 0.46131883203445445,
"lb_loss": 0.9999999968217376
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 227000000,
"cumulative_training_bytes": 227000179,
"metrics": {
"loss": 0.47135689835799366,
"ce_loss": 0.4613569078947368,
"lb_loss": 0.9999999961936683
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 228000000,
"cumulative_training_bytes": 228003643,
"metrics": {
"loss": 0.47145688767526667,
"ce_loss": 0.46145689721200983,
"lb_loss": 0.9999999966399342
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 229000000,
"cumulative_training_bytes": 229002271,
"metrics": {
"loss": 0.4714496810236486,
"ce_loss": 0.46144969056039176,
"lb_loss": 0.9999999964976596
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 230000000,
"cumulative_training_bytes": 230001193,
"metrics": {
"loss": 0.471488837393296,
"ce_loss": 0.46148884693003916,
"lb_loss": 0.9999999964423842
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 231000000,
"cumulative_training_bytes": 231004533,
"metrics": {
"loss": 0.4715379180509956,
"ce_loss": 0.46153792758773876,
"lb_loss": 0.9999999968754562
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 232000000,
"cumulative_training_bytes": 232002563,
"metrics": {
"loss": 0.47164020210346846,
"ce_loss": 0.46164021164021163,
"lb_loss": 0.999999997040388
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 233000000,
"cumulative_training_bytes": 233001313,
"metrics": {
"loss": 0.4716305821361821,
"ce_loss": 0.46163059167292525,
"lb_loss": 0.9999999968664475
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 234000000,
"cumulative_training_bytes": 234004559,
"metrics": {
"loss": 0.4716438820137795,
"ce_loss": 0.46164389155052266,
"lb_loss": 0.9999999965317158
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 235000000,
"cumulative_training_bytes": 235002620,
"metrics": {
"loss": 0.47168165902632575,
"ce_loss": 0.4616816685630689,
"lb_loss": 0.9999999968996284
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 236000000,
"cumulative_training_bytes": 236000507,
"metrics": {
"loss": 0.47173076152510934,
"ce_loss": 0.4617307710618525,
"lb_loss": 0.9999999962769799
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 237000000,
"cumulative_training_bytes": 237000271,
"metrics": {
"loss": 0.47172198820551603,
"ce_loss": 0.4617219977422592,
"lb_loss": 0.9999999962747097
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 238000000,
"cumulative_training_bytes": 238002987,
"metrics": {
"loss": 0.4717779420870728,
"ce_loss": 0.46177795162381596,
"lb_loss": 0.9999999964834067
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 239000000,
"cumulative_training_bytes": 239002206,
"metrics": {
"loss": 0.4717871323819956,
"ce_loss": 0.46178714191873876,
"lb_loss": 0.9999999965774313
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 240000000,
"cumulative_training_bytes": 240000715,
"metrics": {
"loss": 0.4718072897388838,
"ce_loss": 0.46180729927562697,
"lb_loss": 0.9999999965749462
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 241000000,
"cumulative_training_bytes": 241004242,
"metrics": {
"loss": 0.4718284848999745,
"ce_loss": 0.46182849443671764,
"lb_loss": 0.999999996628759
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 242000000,
"cumulative_training_bytes": 242003525,
"metrics": {
"loss": 0.4718596074433453,
"ce_loss": 0.4618596169800885,
"lb_loss": 0.9999999967032829
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 243000000,
"cumulative_training_bytes": 243001106,
"metrics": {
"loss": 0.47192603786063303,
"ce_loss": 0.4619260473973762,
"lb_loss": 0.9999999964307841
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 244000000,
"cumulative_training_bytes": 244003951,
"metrics": {
"loss": 0.4719613666542157,
"ce_loss": 0.46196137619095884,
"lb_loss": 0.9999999957347578
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 245000000,
"cumulative_training_bytes": 245002710,
"metrics": {
"loss": 0.4719913950409957,
"ce_loss": 0.46199140457773885,
"lb_loss": 0.9999999953838006
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 246000000,
"cumulative_training_bytes": 246001859,
"metrics": {
"loss": 0.47201764715175076,
"ce_loss": 0.4620176566884939,
"lb_loss": 0.9999999954167429
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 247000000,
"cumulative_training_bytes": 247000039,
"metrics": {
"loss": 0.47202893232057985,
"ce_loss": 0.462028941857323,
"lb_loss": 0.9999999953506015
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 248000000,
"cumulative_training_bytes": 248004378,
"metrics": {
"loss": 0.4720249720106691,
"ce_loss": 0.4620249815474123,
"lb_loss": 0.9999999952175502
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 249000000,
"cumulative_training_bytes": 249002307,
"metrics": {
"loss": 0.47205487183481953,
"ce_loss": 0.4620548813715627,
"lb_loss": 0.9999999950029641
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 250000000,
"cumulative_training_bytes": 250002323,
"metrics": {
"loss": 0.47208340821042055,
"ce_loss": 0.4620834177471637,
"lb_loss": 0.9999999950152355
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 251000000,
"cumulative_training_bytes": 251001550,
"metrics": {
"loss": 0.4721088753440598,
"ce_loss": 0.462108884880803,
"lb_loss": 0.9999999951949832
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 252000000,
"cumulative_training_bytes": 252003554,
"metrics": {
"loss": 0.47214146756336317,
"ce_loss": 0.46214147710010633,
"lb_loss": 0.9999999949748477
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 253000000,
"cumulative_training_bytes": 253002278,
"metrics": {
"loss": 0.4721357178347401,
"ce_loss": 0.4621357273714833,
"lb_loss": 0.9999999954872901
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 254000000,
"cumulative_training_bytes": 254000659,
"metrics": {
"loss": 0.4721559947520345,
"ce_loss": 0.4621560042887777,
"lb_loss": 0.9999999952026569
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 255000000,
"cumulative_training_bytes": 255004377,
"metrics": {
"loss": 0.4721760244642288,
"ce_loss": 0.46217603400097196,
"lb_loss": 0.999999995059154
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 256000000,
"cumulative_training_bytes": 256003159,
"metrics": {
"loss": 0.4721671577148767,
"ce_loss": 0.4621671672516199,
"lb_loss": 0.9999999949229845
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 257000000,
"cumulative_training_bytes": 257002287,
"metrics": {
"loss": 0.4721662097632306,
"ce_loss": 0.46216621929997376,
"lb_loss": 0.9999999948098398
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 258000000,
"cumulative_training_bytes": 258004823,
"metrics": {
"loss": 0.4721808876471208,
"ce_loss": 0.46218089718386396,
"lb_loss": 0.9999999949395878
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 259000000,
"cumulative_training_bytes": 259003094,
"metrics": {
"loss": 0.4721888822503504,
"ce_loss": 0.46218889178709355,
"lb_loss": 0.9999999949951999
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 260000000,
"cumulative_training_bytes": 260003317,
"metrics": {
"loss": 0.4721786844387214,
"ce_loss": 0.46217869397546457,
"lb_loss": 0.9999999949394331
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 261000000,
"cumulative_training_bytes": 261002561,
"metrics": {
"loss": 0.47218055651077434,
"ce_loss": 0.4621805660475175,
"lb_loss": 0.99999999493583
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 262000000,
"cumulative_training_bytes": 262000974,
"metrics": {
"loss": 0.472164943109295,
"ce_loss": 0.4621649526460382,
"lb_loss": 0.9999999951530288
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 263000000,
"cumulative_training_bytes": 263000368,
"metrics": {
"loss": 0.4721608691775264,
"ce_loss": 0.4621608787142696,
"lb_loss": 0.9999999952254329
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 264000000,
"cumulative_training_bytes": 264000070,
"metrics": {
"loss": 0.4721552553826562,
"ce_loss": 0.46215526491939934,
"lb_loss": 0.9999999952748002
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 265000000,
"cumulative_training_bytes": 265003237,
"metrics": {
"loss": 0.4721514470424883,
"ce_loss": 0.46215145657923146,
"lb_loss": 0.9999999955090628
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 266000000,
"cumulative_training_bytes": 266001732,
"metrics": {
"loss": 0.472134939644883,
"ce_loss": 0.4621349491816262,
"lb_loss": 0.9999999954745787
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 267000000,
"cumulative_training_bytes": 267000618,
"metrics": {
"loss": 0.4721354851795652,
"ce_loss": 0.4621354947163084,
"lb_loss": 0.9999999952875617
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 268000000,
"cumulative_training_bytes": 268000768,
"metrics": {
"loss": 0.4721304739180231,
"ce_loss": 0.46213048345476626,
"lb_loss": 0.9999999954340502
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 269000000,
"cumulative_training_bytes": 269004654,
"metrics": {
"loss": 0.4721118046916244,
"ce_loss": 0.46211181422836756,
"lb_loss": 0.9999999954804685
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 270000000,
"cumulative_training_bytes": 270004377,
"metrics": {
"loss": 0.47210367398681197,
"ce_loss": 0.46210368352355513,
"lb_loss": 0.9999999955477443
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 271000000,
"cumulative_training_bytes": 271003354,
"metrics": {
"loss": 0.4720871205580097,
"ce_loss": 0.4620871300947529,
"lb_loss": 0.9999999955669719
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 272000000,
"cumulative_training_bytes": 272002318,
"metrics": {
"loss": 0.47207904797236305,
"ce_loss": 0.4620790575091062,
"lb_loss": 0.9999999955075233
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 273000000,
"cumulative_training_bytes": 273000768,
"metrics": {
"loss": 0.4720791549345296,
"ce_loss": 0.4620791644712728,
"lb_loss": 0.9999999955923258
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 274000000,
"cumulative_training_bytes": 274000695,
"metrics": {
"loss": 0.47207574480267583,
"ce_loss": 0.462075754339419,
"lb_loss": 0.9999999954434798
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 275000000,
"cumulative_training_bytes": 275004246,
"metrics": {
"loss": 0.4720716687366985,
"ce_loss": 0.46207167827344164,
"lb_loss": 0.9999999954899187
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 276000000,
"cumulative_training_bytes": 276003111,
"metrics": {
"loss": 0.472073087335005,
"ce_loss": 0.4620730968717482,
"lb_loss": 0.9999999956066642
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 277000000,
"cumulative_training_bytes": 277000979,
"metrics": {
"loss": 0.4720668020474279,
"ce_loss": 0.46206681158417107,
"lb_loss": 0.9999999956583769
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 278000000,
"cumulative_training_bytes": 278004701,
"metrics": {
"loss": 0.4720647272778236,
"ce_loss": 0.4620647368145668,
"lb_loss": 0.9999999954741388
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 279000000,
"cumulative_training_bytes": 279002393,
"metrics": {
"loss": 0.4720607262051711,
"ce_loss": 0.46206073574191425,
"lb_loss": 0.9999999954478553
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 280000000,
"cumulative_training_bytes": 280000517,
"metrics": {
"loss": 0.4720587552322137,
"ce_loss": 0.46205876476895685,
"lb_loss": 0.9999999954948015
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 281000000,
"cumulative_training_bytes": 281003892,
"metrics": {
"loss": 0.4720639106916873,
"ce_loss": 0.46206392022843046,
"lb_loss": 0.9999999954409373
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 282000000,
"cumulative_training_bytes": 282002709,
"metrics": {
"loss": 0.4720607285212052,
"ce_loss": 0.46206073805794834,
"lb_loss": 0.9999999953744555
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 283000000,
"cumulative_training_bytes": 283002118,
"metrics": {
"loss": 0.4720594536067525,
"ce_loss": 0.46205946314349566,
"lb_loss": 0.9999999952274025
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 284000000,
"cumulative_training_bytes": 284001597,
"metrics": {
"loss": 0.4720484395794759,
"ce_loss": 0.46204844911621906,
"lb_loss": 0.9999999951392055
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 285000000,
"cumulative_training_bytes": 285004657,
"metrics": {
"loss": 0.47204512196112824,
"ce_loss": 0.4620451314978714,
"lb_loss": 0.9999999952588732
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 286000000,
"cumulative_training_bytes": 286003945,
"metrics": {
"loss": 0.47203695832828074,
"ce_loss": 0.4620369678650239,
"lb_loss": 0.9999999952649491
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 287000000,
"cumulative_training_bytes": 287003359,
"metrics": {
"loss": 0.47202351910248586,
"ce_loss": 0.462023528639229,
"lb_loss": 0.9999999952794815
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 288000000,
"cumulative_training_bytes": 288002717,
"metrics": {
"loss": 0.4720050370471078,
"ce_loss": 0.46200504658385094,
"lb_loss": 0.999999995314863
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 289000000,
"cumulative_training_bytes": 289000675,
"metrics": {
"loss": 0.4720141621282818,
"ce_loss": 0.46201417166502495,
"lb_loss": 0.9999999954834164
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 290000000,
"cumulative_training_bytes": 290004466,
"metrics": {
"loss": 0.47201130287657167,
"ce_loss": 0.46201131241331483,
"lb_loss": 0.9999999954862502
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 291000000,
"cumulative_training_bytes": 291003703,
"metrics": {
"loss": 0.47201100146250474,
"ce_loss": 0.4620110109992479,
"lb_loss": 0.9999999954479428
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 292000000,
"cumulative_training_bytes": 292003073,
"metrics": {
"loss": 0.47199688168662524,
"ce_loss": 0.4619968912233684,
"lb_loss": 0.9999999953785503
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 293000000,
"cumulative_training_bytes": 293002336,
"metrics": {
"loss": 0.471982888995309,
"ce_loss": 0.46198289853205216,
"lb_loss": 0.999999995291241
}
},
{
"epoch": 4,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4719807145017819,
"ce_loss": 0.4619807240385251,
"lb_loss": 0.9999999953011074,
"training_bytes": 73364897
},
"cumulative_training_bytes": 293459509,
"training_bytes_this_epoch": 73364897
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 294000000,
"cumulative_training_bytes": 294002959,
"metrics": {
"loss": 0.46361327171325684,
"ce_loss": 0.45361328125,
"lb_loss": 1.0000000010643686
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 295000000,
"cumulative_training_bytes": 295000958,
"metrics": {
"loss": 0.4638804444726908,
"ce_loss": 0.45388045400943394,
"lb_loss": 0.9999999983130761
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 296000000,
"cumulative_training_bytes": 296004583,
"metrics": {
"loss": 0.4637797523680187,
"ce_loss": 0.4537797619047619,
"lb_loss": 0.9999999964804877
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 297000000,
"cumulative_training_bytes": 297004491,
"metrics": {
"loss": 0.4639212028435578,
"ce_loss": 0.453921212380301,
"lb_loss": 0.99999999673846
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 298000000,
"cumulative_training_bytes": 298003144,
"metrics": {
"loss": 0.4639024884621896,
"ce_loss": 0.4539024979989328,
"lb_loss": 0.9999999972010626
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 299000000,
"cumulative_training_bytes": 299002426,
"metrics": {
"loss": 0.46409215472397425,
"ce_loss": 0.4540921642607174,
"lb_loss": 0.9999999960889341
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 300000000,
"cumulative_training_bytes": 300000259,
"metrics": {
"loss": 0.4642571948739314,
"ce_loss": 0.4542572044106746,
"lb_loss": 0.9999999966419918
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 301000000,
"cumulative_training_bytes": 301004717,
"metrics": {
"loss": 0.4644429732717401,
"ce_loss": 0.4544429828084833,
"lb_loss": 0.9999999972036381
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 302000000,
"cumulative_training_bytes": 302003675,
"metrics": {
"loss": 0.4645393988060491,
"ce_loss": 0.4545394083427923,
"lb_loss": 0.9999999968201836
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 303000000,
"cumulative_training_bytes": 303002992,
"metrics": {
"loss": 0.46463151675898856,
"ce_loss": 0.4546315262957317,
"lb_loss": 0.9999999958506929
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 304000000,
"cumulative_training_bytes": 304002454,
"metrics": {
"loss": 0.46469988840483917,
"ce_loss": 0.45469989794158233,
"lb_loss": 0.9999999959148611
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 305000000,
"cumulative_training_bytes": 305000421,
"metrics": {
"loss": 0.46478842927628206,
"ce_loss": 0.4547884388130252,
"lb_loss": 0.9999999954169538
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 306000000,
"cumulative_training_bytes": 306000366,
"metrics": {
"loss": 0.4648084845177812,
"ce_loss": 0.45480849405452434,
"lb_loss": 0.9999999952288625
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 307000000,
"cumulative_training_bytes": 307003767,
"metrics": {
"loss": 0.46488371204399437,
"ce_loss": 0.45488372158073753,
"lb_loss": 0.9999999947501816
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 308000000,
"cumulative_training_bytes": 308003273,
"metrics": {
"loss": 0.46492637217382704,
"ce_loss": 0.4549263817105702,
"lb_loss": 0.9999999948722913
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 309000000,
"cumulative_training_bytes": 309001955,
"metrics": {
"loss": 0.4650177849250353,
"ce_loss": 0.4550177944617785,
"lb_loss": 0.9999999948857169
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 310000000,
"cumulative_training_bytes": 310000618,
"metrics": {
"loss": 0.46512678612435326,
"ce_loss": 0.45512679566109643,
"lb_loss": 0.9999999947577269
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 311000000,
"cumulative_training_bytes": 311003605,
"metrics": {
"loss": 0.4651898614209683,
"ce_loss": 0.45518987095771146,
"lb_loss": 0.9999999944975259
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 312000000,
"cumulative_training_bytes": 312002374,
"metrics": {
"loss": 0.46525381499254553,
"ce_loss": 0.4552538245292887,
"lb_loss": 0.9999999948718807
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 313000000,
"cumulative_training_bytes": 313000646,
"metrics": {
"loss": 0.46529959886896405,
"ce_loss": 0.4552996084057072,
"lb_loss": 0.9999999948529984
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 314000000,
"cumulative_training_bytes": 314003740,
"metrics": {
"loss": 0.4653735964934669,
"ce_loss": 0.45537360603021004,
"lb_loss": 0.9999999949075097
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 315000000,
"cumulative_training_bytes": 315002206,
"metrics": {
"loss": 0.4654086916505177,
"ce_loss": 0.45540870118726084,
"lb_loss": 0.9999999946338379
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 316000000,
"cumulative_training_bytes": 316001950,
"metrics": {
"loss": 0.4654431973357025,
"ce_loss": 0.45544320687244566,
"lb_loss": 0.9999999943715984
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 317000000,
"cumulative_training_bytes": 317000791,
"metrics": {
"loss": 0.46548764816150606,
"ce_loss": 0.4554876576982492,
"lb_loss": 0.9999999944999216
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 318000000,
"cumulative_training_bytes": 318004641,
"metrics": {
"loss": 0.4654948232862517,
"ce_loss": 0.45549483282299486,
"lb_loss": 0.9999999947837105
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 319000000,
"cumulative_training_bytes": 319003086,
"metrics": {
"loss": 0.4655434100484884,
"ce_loss": 0.4555434195852316,
"lb_loss": 0.9999999944898516
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 320000000,
"cumulative_training_bytes": 320001778,
"metrics": {
"loss": 0.465581560727232,
"ce_loss": 0.45558157026397517,
"lb_loss": 0.9999999946863233
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 321000000,
"cumulative_training_bytes": 321000981,
"metrics": {
"loss": 0.46562760379952445,
"ce_loss": 0.4556276133362676,
"lb_loss": 0.9999999947950874
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 322000000,
"cumulative_training_bytes": 322004196,
"metrics": {
"loss": 0.4656789493981982,
"ce_loss": 0.45567895893494137,
"lb_loss": 0.9999999946541104
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 323000000,
"cumulative_training_bytes": 323002832,
"metrics": {
"loss": 0.46569165251397077,
"ce_loss": 0.45569166205071393,
"lb_loss": 0.9999999947468088
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 324000000,
"cumulative_training_bytes": 324001797,
"metrics": {
"loss": 0.4657140671024059,
"ce_loss": 0.4557140766391491,
"lb_loss": 0.999999994833444
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 325000000,
"cumulative_training_bytes": 325000041,
"metrics": {
"loss": 0.4657587817776304,
"ce_loss": 0.4557587913143736,
"lb_loss": 0.9999999947404972
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 326000000,
"cumulative_training_bytes": 326004479,
"metrics": {
"loss": 0.4657744545760399,
"ce_loss": 0.45577446411278305,
"lb_loss": 0.9999999946984546
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 327000000,
"cumulative_training_bytes": 327002957,
"metrics": {
"loss": 0.46581329000792293,
"ce_loss": 0.4558132995446661,
"lb_loss": 0.9999999949338637
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 328000000,
"cumulative_training_bytes": 328000993,
"metrics": {
"loss": 0.46584905699891094,
"ce_loss": 0.4558490665356541,
"lb_loss": 0.9999999950636244
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 329000000,
"cumulative_training_bytes": 329004548,
"metrics": {
"loss": 0.46587311273511606,
"ce_loss": 0.4558731222718592,
"lb_loss": 0.9999999950647906
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 330000000,
"cumulative_training_bytes": 330003193,
"metrics": {
"loss": 0.46590269337887313,
"ce_loss": 0.4559027029156163,
"lb_loss": 0.9999999950652384
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 331000000,
"cumulative_training_bytes": 331001539,
"metrics": {
"loss": 0.4659443212297556,
"ce_loss": 0.45594433076649876,
"lb_loss": 0.9999999952273176
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 332000000,
"cumulative_training_bytes": 332000431,
"metrics": {
"loss": 0.4659673291222077,
"ce_loss": 0.45596733865895084,
"lb_loss": 0.999999995358501
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 333000000,
"cumulative_training_bytes": 333002833,
"metrics": {
"loss": 0.4660065443039876,
"ce_loss": 0.45600655384073074,
"lb_loss": 0.9999999952570605
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 334000000,
"cumulative_training_bytes": 334001068,
"metrics": {
"loss": 0.4660331893839696,
"ce_loss": 0.45603319892071276,
"lb_loss": 0.9999999951315508
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 335000000,
"cumulative_training_bytes": 335003509,
"metrics": {
"loss": 0.46606596045683835,
"ce_loss": 0.4560659699935815,
"lb_loss": 0.9999999950961286
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 336000000,
"cumulative_training_bytes": 336002128,
"metrics": {
"loss": 0.46609774687351324,
"ce_loss": 0.4560977564102564,
"lb_loss": 0.9999999952791763
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 337000000,
"cumulative_training_bytes": 337000277,
"metrics": {
"loss": 0.4661180713423349,
"ce_loss": 0.45611808087907807,
"lb_loss": 0.9999999952879081
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 338000000,
"cumulative_training_bytes": 338003542,
"metrics": {
"loss": 0.46613396786584715,
"ce_loss": 0.4561339774025903,
"lb_loss": 0.9999999951345795
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 339000000,
"cumulative_training_bytes": 339002579,
"metrics": {
"loss": 0.46615904976972905,
"ce_loss": 0.4561590593064722,
"lb_loss": 0.9999999952349278
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 340000000,
"cumulative_training_bytes": 340001843,
"metrics": {
"loss": 0.466160675684611,
"ce_loss": 0.45616068522135417,
"lb_loss": 0.9999999953371783
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 341000000,
"cumulative_training_bytes": 341000740,
"metrics": {
"loss": 0.46616024152128255,
"ce_loss": 0.4561602510580257,
"lb_loss": 0.999999995264938
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 342000000,
"cumulative_training_bytes": 342004048,
"metrics": {
"loss": 0.46617922308584747,
"ce_loss": 0.45617923262259064,
"lb_loss": 0.9999999950532847
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 343000000,
"cumulative_training_bytes": 343002454,
"metrics": {
"loss": 0.46619792751678457,
"ce_loss": 0.45619793705352774,
"lb_loss": 0.9999999951296723
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 344000000,
"cumulative_training_bytes": 344004748,
"metrics": {
"loss": 0.46621765693409895,
"ce_loss": 0.4562176664708421,
"lb_loss": 0.9999999949462396
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 345000000,
"cumulative_training_bytes": 345002981,
"metrics": {
"loss": 0.46624444264770004,
"ce_loss": 0.4562444521844432,
"lb_loss": 0.9999999950441586
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 346000000,
"cumulative_training_bytes": 346003277,
"metrics": {
"loss": 0.46624570144544913,
"ce_loss": 0.4562457109821923,
"lb_loss": 0.9999999948028797
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 347000000,
"cumulative_training_bytes": 347001483,
"metrics": {
"loss": 0.46624762068102216,
"ce_loss": 0.4562476302177653,
"lb_loss": 0.9999999947594974
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 348000000,
"cumulative_training_bytes": 348004310,
"metrics": {
"loss": 0.46627123857453584,
"ce_loss": 0.456271248111279,
"lb_loss": 0.999999994612219
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 349000000,
"cumulative_training_bytes": 349004164,
"metrics": {
"loss": 0.46626359431897824,
"ce_loss": 0.4562636038557214,
"lb_loss": 0.9999999945946386
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 350000000,
"cumulative_training_bytes": 350004442,
"metrics": {
"loss": 0.4662550478562946,
"ce_loss": 0.4562550573930378,
"lb_loss": 0.9999999947105541
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 351000000,
"cumulative_training_bytes": 351002177,
"metrics": {
"loss": 0.466267367664369,
"ce_loss": 0.45626737720111216,
"lb_loss": 0.999999994822446
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 352000000,
"cumulative_training_bytes": 352000011,
"metrics": {
"loss": 0.46628234035973715,
"ce_loss": 0.4562823498964803,
"lb_loss": 0.9999999949601373
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 353000000,
"cumulative_training_bytes": 353004120,
"metrics": {
"loss": 0.46630100515956036,
"ce_loss": 0.4563010146963035,
"lb_loss": 0.9999999949043415
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 354000000,
"cumulative_training_bytes": 354003159,
"metrics": {
"loss": 0.46630914099777,
"ce_loss": 0.45630915053451315,
"lb_loss": 0.9999999947640699
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 355000000,
"cumulative_training_bytes": 355002650,
"metrics": {
"loss": 0.4663111660186373,
"ce_loss": 0.4563111755553805,
"lb_loss": 0.9999999947222609
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 356000000,
"cumulative_training_bytes": 356000221,
"metrics": {
"loss": 0.46632584438767544,
"ce_loss": 0.4563258539244186,
"lb_loss": 0.9999999946402025
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 357000000,
"cumulative_training_bytes": 357003562,
"metrics": {
"loss": 0.4663275968949346,
"ce_loss": 0.45632760643167775,
"lb_loss": 0.9999999947339453
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 358000000,
"cumulative_training_bytes": 358002372,
"metrics": {
"loss": 0.46632777510045353,
"ce_loss": 0.4563277846371967,
"lb_loss": 0.999999994712455
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 359000000,
"cumulative_training_bytes": 359001228,
"metrics": {
"loss": 0.4663366162306953,
"ce_loss": 0.45633662576743844,
"lb_loss": 0.9999999946960287
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 360000000,
"cumulative_training_bytes": 360004692,
"metrics": {
"loss": 0.46634837241174876,
"ce_loss": 0.4563483819484919,
"lb_loss": 0.9999999946848255
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 361000000,
"cumulative_training_bytes": 361003194,
"metrics": {
"loss": 0.46635145987719595,
"ce_loss": 0.4563514694139391,
"lb_loss": 0.9999999946350686
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 362000000,
"cumulative_training_bytes": 362001994,
"metrics": {
"loss": 0.46635404292824484,
"ce_loss": 0.456354052464988,
"lb_loss": 0.999999994725887
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 363000000,
"cumulative_training_bytes": 363000593,
"metrics": {
"loss": 0.4663513775850499,
"ce_loss": 0.4563513871217931,
"lb_loss": 0.9999999945730852
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 364000000,
"cumulative_training_bytes": 364004905,
"metrics": {
"loss": 0.4663431971157206,
"ce_loss": 0.45634320665246375,
"lb_loss": 0.9999999945724587
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 365000000,
"cumulative_training_bytes": 365004297,
"metrics": {
"loss": 0.4663444636370049,
"ce_loss": 0.45634447317374804,
"lb_loss": 0.9999999944583877
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 366000000,
"cumulative_training_bytes": 366004177,
"metrics": {
"loss": 0.46634177937590804,
"ce_loss": 0.4563417889126512,
"lb_loss": 0.9999999944191601
}
},
{
"epoch": 5,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.46634681425067503,
"ce_loss": 0.4563468237874182,
"lb_loss": 0.9999999944030793,
"training_bytes": 73364767
},
"cumulative_training_bytes": 366824276,
"training_bytes_this_epoch": 73364767
}
]
}