PI1M-1B / metadata.json
jordiferrero's picture
Add files using upload-large-folder tool
8b36b54 verified
{
"run_name": "run_large_20251112_150502",
"timestamp": "20251112_150502",
"phase": "large",
"config": {
"arch_layout": [
"m4",
[
"T22"
],
"m4"
],
"d_model": [
1024,
1536
],
"d_intermediate": [
0,
4096
],
"vocab_size": 256,
"ssm_cfg": {
"chunk_size": 256,
"d_conv": 4,
"d_state": 128,
"expand": 2
},
"attn_cfg": {
"num_heads": [
16,
16
],
"rotary_emb_dim": [
32,
48
],
"window_size": [
1023,
-1
]
},
"tie_embeddings": false
},
"training_args": {
"data": "datasets/PI1M/PI1M_v2.csv",
"max_samples": null,
"batch_size": 16,
"epochs": 22,
"lr": 0.0001,
"weight_decay": 0.1,
"gradient_accumulation": 8,
"concatenate": true,
"num_concatenate": 10,
"concatenate_separator": " ",
"checkpoint_bytes": 1000000,
"num_test_samples": 5,
"num_visualize": 5,
"skip_visualization": false
},
"dataset_info": {
"train_size": 99574,
"test_size": 5,
"test_smiles_file": "checkpoints/run_large_20251112_150502/test_smiles.txt"
},
"model_info": {
"num_parameters": 622923776,
"device": "cuda",
"dtype": "torch.bfloat16",
"use_amp": true
},
"training_history": [
{
"checkpoint_type": "bytes",
"bytes_threshold": 1000000,
"cumulative_training_bytes": 1006216,
"metrics": {
"loss": 2.65240060560631,
"ce_loss": 2.6424005681818183,
"lb_loss": 0.9999999900658926
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 2000000,
"cumulative_training_bytes": 2003501,
"metrics": {
"loss": 1.91590709904678,
"ce_loss": 1.9059070849236641,
"lb_loss": 0.9999999920375474
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 3000000,
"cumulative_training_bytes": 3003412,
"metrics": {
"loss": 1.5786327193408218,
"ce_loss": 1.5686327131043256,
"lb_loss": 0.9999999945400325
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 4000000,
"cumulative_training_bytes": 4003745,
"metrics": {
"loss": 1.373363253724484,
"ce_loss": 1.3633632514312977,
"lb_loss": 0.9999999956775257
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 5000000,
"cumulative_training_bytes": 5000057,
"metrics": {
"loss": 1.2337313646205703,
"ce_loss": 1.223731364678899,
"lb_loss": 0.9999999960810402
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 6000000,
"cumulative_training_bytes": 6004365,
"metrics": {
"loss": 1.1314371002707513,
"ce_loss": 1.121437101910828,
"lb_loss": 0.9999999965831732
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 7000000,
"cumulative_training_bytes": 7005346,
"metrics": {
"loss": 1.0538173757786313,
"ce_loss": 1.043817378548035,
"lb_loss": 0.999999995575201
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 8000000,
"cumulative_training_bytes": 8001141,
"metrics": {
"loss": 0.9934003011218449,
"ce_loss": 0.9834003047323135,
"lb_loss": 0.9999999950424435
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 9000000,
"cumulative_training_bytes": 9001157,
"metrics": {
"loss": 0.944607644944041,
"ce_loss": 0.9346076492141037,
"lb_loss": 0.999999994632037
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 10000000,
"cumulative_training_bytes": 10003863,
"metrics": {
"loss": 0.9043117426221888,
"ce_loss": 0.8943117474197247,
"lb_loss": 0.9999999956253471
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 11000000,
"cumulative_training_bytes": 11001271,
"metrics": {
"loss": 0.8709831310744411,
"ce_loss": 0.8609831363004172,
"lb_loss": 0.9999999957306827
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 12000000,
"cumulative_training_bytes": 12005542,
"metrics": {
"loss": 0.8426262691113658,
"ce_loss": 0.8326262746972594,
"lb_loss": 0.9999999964670288
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 13000000,
"cumulative_training_bytes": 13006694,
"metrics": {
"loss": 0.8181169518302469,
"ce_loss": 0.8081169577205882,
"lb_loss": 0.9999999966340907
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 14000000,
"cumulative_training_bytes": 14004830,
"metrics": {
"loss": 0.797012246241335,
"ce_loss": 0.7870122523907104,
"lb_loss": 0.9999999969057698
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 15000000,
"cumulative_training_bytes": 15004637,
"metrics": {
"loss": 0.778617926120515,
"ce_loss": 0.7686179324961754,
"lb_loss": 0.9999999971732626
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 16000000,
"cumulative_training_bytes": 16000188,
"metrics": {
"loss": 0.7623379522393846,
"ce_loss": 0.7523379588115734,
"lb_loss": 0.9999999962372964
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 17000000,
"cumulative_training_bytes": 17001433,
"metrics": {
"loss": 0.7477327509037983,
"ce_loss": 0.7377327576507651,
"lb_loss": 0.9999999957885107
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 18000000,
"cumulative_training_bytes": 18004044,
"metrics": {
"loss": 0.7345919368928714,
"ce_loss": 0.7245919437951551,
"lb_loss": 0.9999999957190034
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 19000000,
"cumulative_training_bytes": 19005595,
"metrics": {
"loss": 0.7230352938847073,
"ce_loss": 0.7130353009259259,
"lb_loss": 0.9999999958487908
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 20000000,
"cumulative_training_bytes": 20004400,
"metrics": {
"loss": 0.7124804242391204,
"ce_loss": 0.7024804314053538,
"lb_loss": 0.9999999958516079
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 21000000,
"cumulative_training_bytes": 21006730,
"metrics": {
"loss": 0.7027192320688224,
"ce_loss": 0.6927192393481427,
"lb_loss": 0.9999999955936844
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 22000000,
"cumulative_training_bytes": 22003387,
"metrics": {
"loss": 0.6940317696092523,
"ce_loss": 0.6840317769906119,
"lb_loss": 0.9999999956270584
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 23000000,
"cumulative_training_bytes": 23005816,
"metrics": {
"loss": 0.6859377783162276,
"ce_loss": 0.6759377857914866,
"lb_loss": 0.9999999957184559
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 24000000,
"cumulative_training_bytes": 24002853,
"metrics": {
"loss": 0.6783933619005801,
"ce_loss": 0.6683933694612687,
"lb_loss": 0.9999999955348768
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 25000000,
"cumulative_training_bytes": 25001613,
"metrics": {
"loss": 0.6714792498981766,
"ce_loss": 0.6614792575374961,
"lb_loss": 0.9999999954023966
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 26000000,
"cumulative_training_bytes": 26001520,
"metrics": {
"loss": 0.665006937754656,
"ce_loss": 0.6550069454665882,
"lb_loss": 0.9999999952274173
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 27000000,
"cumulative_training_bytes": 27002890,
"metrics": {
"loss": 0.6590575319028495,
"ce_loss": 0.6490575396825397,
"lb_loss": 0.9999999950836305
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 28000000,
"cumulative_training_bytes": 28003403,
"metrics": {
"loss": 0.6535597506296638,
"ce_loss": 0.6435597584722602,
"lb_loss": 0.9999999951619077
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 29000000,
"cumulative_training_bytes": 29006503,
"metrics": {
"loss": 0.648446149750561,
"ce_loss": 0.6384461576517151,
"lb_loss": 0.9999999951718666
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 30000000,
"cumulative_training_bytes": 30000438,
"metrics": {
"loss": 0.6435842753673087,
"ce_loss": 0.6335842833227041,
"lb_loss": 0.9999999954080095
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 31000000,
"cumulative_training_bytes": 31000470,
"metrics": {
"loss": 0.6390364181845314,
"ce_loss": 0.6290364261910639,
"lb_loss": 0.9999999954240818
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 32000000,
"cumulative_training_bytes": 32001821,
"metrics": {
"loss": 0.6348851022396517,
"ce_loss": 0.6248851102941176,
"lb_loss": 0.9999999954533999
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 33000000,
"cumulative_training_bytes": 33002772,
"metrics": {
"loss": 0.6308636427589162,
"ce_loss": 0.6208636508580705,
"lb_loss": 0.9999999956319416
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 34000000,
"cumulative_training_bytes": 34007477,
"metrics": {
"loss": 0.6269874756813264,
"ce_loss": 0.6169874838228674,
"lb_loss": 0.999999995760732
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 35000000,
"cumulative_training_bytes": 35006427,
"metrics": {
"loss": 0.6233431712141606,
"ce_loss": 0.6133431793953641,
"lb_loss": 0.9999999954902237
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 36000000,
"cumulative_training_bytes": 36003597,
"metrics": {
"loss": 0.6199277473362917,
"ce_loss": 0.6099277555549649,
"lb_loss": 0.9999999953360601
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 37000000,
"cumulative_training_bytes": 37005705,
"metrics": {
"loss": 0.6167014650337747,
"ce_loss": 0.6067014732881671,
"lb_loss": 0.9999999952651668
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 38000000,
"cumulative_training_bytes": 38004996,
"metrics": {
"loss": 0.613597062496674,
"ce_loss": 0.6035970707846495,
"lb_loss": 0.999999995257084
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 39000000,
"cumulative_training_bytes": 39003622,
"metrics": {
"loss": 0.6106150688542822,
"ce_loss": 0.6006150771741264,
"lb_loss": 0.9999999950270958
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 40000000,
"cumulative_training_bytes": 40003412,
"metrics": {
"loss": 0.6077792466508302,
"ce_loss": 0.5977792550009571,
"lb_loss": 0.9999999950253398
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 41000000,
"cumulative_training_bytes": 41001660,
"metrics": {
"loss": 0.6051997902903125,
"ce_loss": 0.5951997986694678,
"lb_loss": 0.9999999949466837
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 42000000,
"cumulative_training_bytes": 42003749,
"metrics": {
"loss": 0.6027762772521528,
"ce_loss": 0.59277628565895,
"lb_loss": 0.9999999948935139
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 43000000,
"cumulative_training_bytes": 43005510,
"metrics": {
"loss": 0.600378182215773,
"ce_loss": 0.5903781906489229,
"lb_loss": 0.9999999950232191
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 44000000,
"cumulative_training_bytes": 44002690,
"metrics": {
"loss": 0.5980829723362137,
"ce_loss": 0.5880829807943275,
"lb_loss": 0.9999999951150534
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 45000000,
"cumulative_training_bytes": 45005962,
"metrics": {
"loss": 0.5958610677767789,
"ce_loss": 0.5858610762589316,
"lb_loss": 0.9999999951529397
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 46000000,
"cumulative_training_bytes": 46002930,
"metrics": {
"loss": 0.5937242965882373,
"ce_loss": 0.583724305093209,
"lb_loss": 0.999999995138769
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 47000000,
"cumulative_training_bytes": 47006814,
"metrics": {
"loss": 0.5916914568989765,
"ce_loss": 0.5816914654259652,
"lb_loss": 0.9999999951551201
}
},
{
"epoch": 1,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.590403846496175,
"ce_loss": 0.5804038550369537,
"lb_loss": 0.999999995077637,
"training_bytes": 47653393
},
"cumulative_training_bytes": 47653393,
"training_bytes_this_epoch": 47653393
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 48000000,
"cumulative_training_bytes": 48005678,
"metrics": {
"loss": 0.492634161658909,
"ce_loss": 0.48263417119565216,
"lb_loss": 0.9999999961127406
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 49000000,
"cumulative_training_bytes": 49006971,
"metrics": {
"loss": 0.49733226447455625,
"ce_loss": 0.4873322740112994,
"lb_loss": 0.9999999989897518
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 50000000,
"cumulative_training_bytes": 50003595,
"metrics": {
"loss": 0.4957809859844295,
"ce_loss": 0.48578099552117265,
"lb_loss": 0.9999999959228093
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 51000000,
"cumulative_training_bytes": 51000980,
"metrics": {
"loss": 0.49479064406737583,
"ce_loss": 0.484790653604119,
"lb_loss": 0.9999999971357035
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 52000000,
"cumulative_training_bytes": 52001887,
"metrics": {
"loss": 0.4952002545477639,
"ce_loss": 0.48520026408450706,
"lb_loss": 0.9999999968518674
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 53000000,
"cumulative_training_bytes": 53005646,
"metrics": {
"loss": 0.49493103034165453,
"ce_loss": 0.4849310398783977,
"lb_loss": 0.9999999980387599
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 54000000,
"cumulative_training_bytes": 54005477,
"metrics": {
"loss": 0.49478444076446165,
"ce_loss": 0.4847844503012048,
"lb_loss": 0.9999999974865511
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 55000000,
"cumulative_training_bytes": 55002218,
"metrics": {
"loss": 0.4945906480153402,
"ce_loss": 0.48459065755208336,
"lb_loss": 0.999999997826914
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 56000000,
"cumulative_training_bytes": 56003171,
"metrics": {
"loss": 0.4947634672047784,
"ce_loss": 0.48476347674152154,
"lb_loss": 0.9999999967766507
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 57000000,
"cumulative_training_bytes": 57006709,
"metrics": {
"loss": 0.4947378050090834,
"ce_loss": 0.48473781454582654,
"lb_loss": 0.9999999970734217
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 58000000,
"cumulative_training_bytes": 58004016,
"metrics": {
"loss": 0.49519697872139296,
"ce_loss": 0.4851969882581361,
"lb_loss": 0.9999999964290117
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 59000000,
"cumulative_training_bytes": 59006803,
"metrics": {
"loss": 0.49550630157249487,
"ce_loss": 0.48550631110923803,
"lb_loss": 0.9999999969052208
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 60000000,
"cumulative_training_bytes": 60002534,
"metrics": {
"loss": 0.4955277337986567,
"ce_loss": 0.4855277433353999,
"lb_loss": 0.9999999967851183
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 61000000,
"cumulative_training_bytes": 61006527,
"metrics": {
"loss": 0.49546354407564214,
"ce_loss": 0.4854635536123853,
"lb_loss": 0.9999999964797716
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 62000000,
"cumulative_training_bytes": 62003543,
"metrics": {
"loss": 0.49532445517777124,
"ce_loss": 0.4853244647145144,
"lb_loss": 0.999999995547145
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 63000000,
"cumulative_training_bytes": 63006878,
"metrics": {
"loss": 0.495203972695177,
"ce_loss": 0.4852039822319202,
"lb_loss": 0.9999999965812797
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 64000000,
"cumulative_training_bytes": 64003765,
"metrics": {
"loss": 0.495035485193936,
"ce_loss": 0.48503549473067914,
"lb_loss": 0.9999999969290347
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 65000000,
"cumulative_training_bytes": 65007467,
"metrics": {
"loss": 0.4949671338326302,
"ce_loss": 0.48496714336937335,
"lb_loss": 0.9999999965015809
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 66000000,
"cumulative_training_bytes": 66003916,
"metrics": {
"loss": 0.49481925303629526,
"ce_loss": 0.4848192625730384,
"lb_loss": 0.9999999961689836
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 67000000,
"cumulative_training_bytes": 67000894,
"metrics": {
"loss": 0.4946804078722038,
"ce_loss": 0.48468041740894696,
"lb_loss": 0.9999999959178133
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 68000000,
"cumulative_training_bytes": 68004976,
"metrics": {
"loss": 0.494755765303302,
"ce_loss": 0.4847557748400452,
"lb_loss": 0.9999999962985449
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 69000000,
"cumulative_training_bytes": 69000639,
"metrics": {
"loss": 0.4949356291428406,
"ce_loss": 0.4849356386795838,
"lb_loss": 0.9999999967278398
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 70000000,
"cumulative_training_bytes": 70003667,
"metrics": {
"loss": 0.4949633375760053,
"ce_loss": 0.48496334711274847,
"lb_loss": 0.9999999967726066
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 71000000,
"cumulative_training_bytes": 71005669,
"metrics": {
"loss": 0.4949598389791637,
"ce_loss": 0.48495984851590684,
"lb_loss": 0.9999999968330756
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 72000000,
"cumulative_training_bytes": 72002076,
"metrics": {
"loss": 0.4950010472027976,
"ce_loss": 0.48500105673954075,
"lb_loss": 0.9999999969625818
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 73000000,
"cumulative_training_bytes": 73005205,
"metrics": {
"loss": 0.49494027558410275,
"ce_loss": 0.4849402851208459,
"lb_loss": 0.9999999970467789
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 74000000,
"cumulative_training_bytes": 74001335,
"metrics": {
"loss": 0.4949183452961057,
"ce_loss": 0.48491835483284884,
"lb_loss": 0.9999999970370947
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 75000000,
"cumulative_training_bytes": 75003123,
"metrics": {
"loss": 0.49487051867664245,
"ce_loss": 0.4848705282133856,
"lb_loss": 0.9999999971290958
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 76000000,
"cumulative_training_bytes": 76000007,
"metrics": {
"loss": 0.4947565385245916,
"ce_loss": 0.4847565480613348,
"lb_loss": 0.999999997036678
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 77000000,
"cumulative_training_bytes": 77005449,
"metrics": {
"loss": 0.4946735897979816,
"ce_loss": 0.48467359933472476,
"lb_loss": 0.9999999965789143
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 78000000,
"cumulative_training_bytes": 78003357,
"metrics": {
"loss": 0.4945139713489495,
"ce_loss": 0.48451398088569264,
"lb_loss": 0.9999999965858556
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 79000000,
"cumulative_training_bytes": 79005000,
"metrics": {
"loss": 0.4944432114879271,
"ce_loss": 0.48444322102467025,
"lb_loss": 0.9999999965495113
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 80000000,
"cumulative_training_bytes": 80002503,
"metrics": {
"loss": 0.49434447288513184,
"ce_loss": 0.484344482421875,
"lb_loss": 0.9999999965145958
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 81000000,
"cumulative_training_bytes": 81006707,
"metrics": {
"loss": 0.4942619738157253,
"ce_loss": 0.48426198335246845,
"lb_loss": 0.9999999964552002
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 82000000,
"cumulative_training_bytes": 82000170,
"metrics": {
"loss": 0.4941502329922488,
"ce_loss": 0.48415024252899197,
"lb_loss": 0.9999999965571804
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 83000000,
"cumulative_training_bytes": 83002106,
"metrics": {
"loss": 0.4940558883505808,
"ce_loss": 0.48405589788732395,
"lb_loss": 0.9999999969907081
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 84000000,
"cumulative_training_bytes": 84002576,
"metrics": {
"loss": 0.4939967936922918,
"ce_loss": 0.483996803229035,
"lb_loss": 0.9999999966467678
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 85000000,
"cumulative_training_bytes": 85005146,
"metrics": {
"loss": 0.493969307538303,
"ce_loss": 0.48396931707504615,
"lb_loss": 0.9999999965168498
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 86000000,
"cumulative_training_bytes": 86000950,
"metrics": {
"loss": 0.49385267483014317,
"ce_loss": 0.48385268436688633,
"lb_loss": 0.9999999967263277
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 87000000,
"cumulative_training_bytes": 87002861,
"metrics": {
"loss": 0.4937762804836928,
"ce_loss": 0.483776290020436,
"lb_loss": 0.9999999968097942
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 88000000,
"cumulative_training_bytes": 88007699,
"metrics": {
"loss": 0.49367403024784595,
"ce_loss": 0.4836740397845891,
"lb_loss": 0.9999999967759872
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 89000000,
"cumulative_training_bytes": 89003123,
"metrics": {
"loss": 0.49364641225432926,
"ce_loss": 0.4836464217910724,
"lb_loss": 0.9999999966438577
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 90000000,
"cumulative_training_bytes": 90001615,
"metrics": {
"loss": 0.49368486275077944,
"ce_loss": 0.4836848722875226,
"lb_loss": 0.9999999966694693
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 91000000,
"cumulative_training_bytes": 91003122,
"metrics": {
"loss": 0.49359353345477774,
"ce_loss": 0.4835935429915209,
"lb_loss": 0.9999999963990834
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 92000000,
"cumulative_training_bytes": 92003002,
"metrics": {
"loss": 0.4935082090351788,
"ce_loss": 0.48350821857192194,
"lb_loss": 0.9999999964799191
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 93000000,
"cumulative_training_bytes": 93004006,
"metrics": {
"loss": 0.49344559055613085,
"ce_loss": 0.483445600092874,
"lb_loss": 0.9999999965275916
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 94000000,
"cumulative_training_bytes": 94007117,
"metrics": {
"loss": 0.4933724529818427,
"ce_loss": 0.48337246251858584,
"lb_loss": 0.9999999964254938
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 95000000,
"cumulative_training_bytes": 95006117,
"metrics": {
"loss": 0.4932983081985414,
"ce_loss": 0.4832983177352846,
"lb_loss": 0.9999999962602519
}
},
{
"epoch": 2,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.49327259198558976,
"ce_loss": 0.4832726015223329,
"lb_loss": 0.9999999961789439,
"training_bytes": 47653382
},
"cumulative_training_bytes": 95306775,
"training_bytes_this_epoch": 47653382
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 96000000,
"cumulative_training_bytes": 96002802,
"metrics": {
"loss": 0.48830098634237773,
"ce_loss": 0.4783009958791209,
"lb_loss": 0.9999999927950429
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 97000000,
"cumulative_training_bytes": 97003339,
"metrics": {
"loss": 0.48807572244523884,
"ce_loss": 0.478075731981982,
"lb_loss": 0.9999999940932334
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 98000000,
"cumulative_training_bytes": 98005141,
"metrics": {
"loss": 0.4878903941459764,
"ce_loss": 0.47789040368271957,
"lb_loss": 0.9999999947655978
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 99000000,
"cumulative_training_bytes": 99004099,
"metrics": {
"loss": 0.487553206042967,
"ce_loss": 0.47755321557971014,
"lb_loss": 0.9999999979021139
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 100000000,
"cumulative_training_bytes": 100001082,
"metrics": {
"loss": 0.4875374598759811,
"ce_loss": 0.4775374694127243,
"lb_loss": 0.9999999975691417
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 101000000,
"cumulative_training_bytes": 101002438,
"metrics": {
"loss": 0.48744717208288046,
"ce_loss": 0.47744718161962363,
"lb_loss": 0.9999999971960181
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 102000000,
"cumulative_training_bytes": 102006394,
"metrics": {
"loss": 0.48746427617754257,
"ce_loss": 0.47746428571428573,
"lb_loss": 0.9999999976158143
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 103000000,
"cumulative_training_bytes": 103004065,
"metrics": {
"loss": 0.487491439468232,
"ce_loss": 0.47749144900497514,
"lb_loss": 0.9999999987545298
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 104000000,
"cumulative_training_bytes": 104001074,
"metrics": {
"loss": 0.4877171573134771,
"ce_loss": 0.47771716685022025,
"lb_loss": 0.9999999984245468
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 105000000,
"cumulative_training_bytes": 105004888,
"metrics": {
"loss": 0.4877519528447734,
"ce_loss": 0.47775196238151657,
"lb_loss": 0.999999998210919
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 106000000,
"cumulative_training_bytes": 106007608,
"metrics": {
"loss": 0.4879144388347672,
"ce_loss": 0.47791444837151037,
"lb_loss": 0.999999997525362
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 107000000,
"cumulative_training_bytes": 107001146,
"metrics": {
"loss": 0.4879950379419733,
"ce_loss": 0.47799504747871646,
"lb_loss": 0.9999999976970045
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 108000000,
"cumulative_training_bytes": 108007350,
"metrics": {
"loss": 0.4880941454512014,
"ce_loss": 0.47809415498794455,
"lb_loss": 0.9999999979161728
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 109000000,
"cumulative_training_bytes": 109006285,
"metrics": {
"loss": 0.4880014053598471,
"ce_loss": 0.47800141489659026,
"lb_loss": 0.9999999974678854
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 110000000,
"cumulative_training_bytes": 110005030,
"metrics": {
"loss": 0.4881331284840902,
"ce_loss": 0.47813313802083335,
"lb_loss": 0.9999999972991646
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 111000000,
"cumulative_training_bytes": 111001255,
"metrics": {
"loss": 0.48820883192667147,
"ce_loss": 0.47820884146341464,
"lb_loss": 0.9999999975285879
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 112000000,
"cumulative_training_bytes": 112000565,
"metrics": {
"loss": 0.48811692710316507,
"ce_loss": 0.47811693663990823,
"lb_loss": 0.9999999980860894
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 113000000,
"cumulative_training_bytes": 113002042,
"metrics": {
"loss": 0.4881750229167402,
"ce_loss": 0.47817503245348336,
"lb_loss": 0.9999999979108714
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 114000000,
"cumulative_training_bytes": 114007396,
"metrics": {
"loss": 0.4890242917531831,
"ce_loss": 0.4790243012899263,
"lb_loss": 0.999999997876493
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 115000000,
"cumulative_training_bytes": 115004141,
"metrics": {
"loss": 0.48968308777184605,
"ce_loss": 0.4796830973085892,
"lb_loss": 0.9999999972201488
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 116000000,
"cumulative_training_bytes": 116007191,
"metrics": {
"loss": 0.48999201616591953,
"ce_loss": 0.4799920257026627,
"lb_loss": 0.9999999972446078
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 117000000,
"cumulative_training_bytes": 117006642,
"metrics": {
"loss": 0.49022899178600815,
"ce_loss": 0.4802290013227513,
"lb_loss": 0.9999999971827082
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 118000000,
"cumulative_training_bytes": 118004076,
"metrics": {
"loss": 0.49022830517657895,
"ce_loss": 0.4802283147133221,
"lb_loss": 0.9999999977886979
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 119000000,
"cumulative_training_bytes": 119004633,
"metrics": {
"loss": 0.4901273379996058,
"ce_loss": 0.48012734753634895,
"lb_loss": 0.9999999980934217
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 120000000,
"cumulative_training_bytes": 120007595,
"metrics": {
"loss": 0.4901242494139698,
"ce_loss": 0.480124258950713,
"lb_loss": 0.9999999980969378
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 121000000,
"cumulative_training_bytes": 121001712,
"metrics": {
"loss": 0.4901183884817312,
"ce_loss": 0.4801183980184744,
"lb_loss": 0.9999999977088798
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 122000000,
"cumulative_training_bytes": 122006493,
"metrics": {
"loss": 0.4901035447362709,
"ce_loss": 0.48010355427301404,
"lb_loss": 0.9999999979146066
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 123000000,
"cumulative_training_bytes": 123005264,
"metrics": {
"loss": 0.4900173131754493,
"ce_loss": 0.48001732271219244,
"lb_loss": 0.999999997643499
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 124000000,
"cumulative_training_bytes": 124002472,
"metrics": {
"loss": 0.4899558301349942,
"ce_loss": 0.4799558396717374,
"lb_loss": 0.9999999973275739
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 125000000,
"cumulative_training_bytes": 125002266,
"metrics": {
"loss": 0.4899716454722821,
"ce_loss": 0.4799716550090253,
"lb_loss": 0.9999999974024794
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 126000000,
"cumulative_training_bytes": 126000850,
"metrics": {
"loss": 0.4900195558162127,
"ce_loss": 0.48001956535295587,
"lb_loss": 0.9999999972346062
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 127000000,
"cumulative_training_bytes": 127007464,
"metrics": {
"loss": 0.49008660731108294,
"ce_loss": 0.4800866168478261,
"lb_loss": 0.9999999973796992
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 128000000,
"cumulative_training_bytes": 128005894,
"metrics": {
"loss": 0.49008085990119593,
"ce_loss": 0.4800808694379391,
"lb_loss": 0.9999999971523776
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 129000000,
"cumulative_training_bytes": 129003149,
"metrics": {
"loss": 0.4900173022530296,
"ce_loss": 0.48001731178977275,
"lb_loss": 0.9999999971552329
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 130000000,
"cumulative_training_bytes": 130006682,
"metrics": {
"loss": 0.48996655758420143,
"ce_loss": 0.4799665671209446,
"lb_loss": 0.9999999974084937
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 131000000,
"cumulative_training_bytes": 131001772,
"metrics": {
"loss": 0.4899122611669685,
"ce_loss": 0.4799122707037117,
"lb_loss": 0.9999999974551975
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 132000000,
"cumulative_training_bytes": 132002042,
"metrics": {
"loss": 0.48989445976104484,
"ce_loss": 0.479894469297788,
"lb_loss": 0.9999999973630667
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 133000000,
"cumulative_training_bytes": 133001074,
"metrics": {
"loss": 0.48980843977248073,
"ce_loss": 0.4798084493092239,
"lb_loss": 0.9999999974206035
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 134000000,
"cumulative_training_bytes": 134006568,
"metrics": {
"loss": 0.48973665680008643,
"ce_loss": 0.4797366663368296,
"lb_loss": 0.9999999974284954
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 135000000,
"cumulative_training_bytes": 135007246,
"metrics": {
"loss": 0.4897220016997538,
"ce_loss": 0.47972201123649694,
"lb_loss": 0.9999999975164732
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 136000000,
"cumulative_training_bytes": 136007771,
"metrics": {
"loss": 0.4896772011876218,
"ce_loss": 0.479677210724365,
"lb_loss": 0.9999999976561861
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 137000000,
"cumulative_training_bytes": 137003574,
"metrics": {
"loss": 0.4896326092649097,
"ce_loss": 0.47963261880165287,
"lb_loss": 0.9999999975807848
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 138000000,
"cumulative_training_bytes": 138005969,
"metrics": {
"loss": 0.48960041247276187,
"ce_loss": 0.47960042200950503,
"lb_loss": 0.9999999974772783
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 139000000,
"cumulative_training_bytes": 139005934,
"metrics": {
"loss": 0.48958851661535074,
"ce_loss": 0.4795885261520939,
"lb_loss": 0.999999997409856
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 140000000,
"cumulative_training_bytes": 140005424,
"metrics": {
"loss": 0.48959421686784743,
"ce_loss": 0.4795942264045906,
"lb_loss": 0.9999999973352497
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 141000000,
"cumulative_training_bytes": 141006781,
"metrics": {
"loss": 0.4895374967457162,
"ce_loss": 0.4795375062824594,
"lb_loss": 0.9999999973438037
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 142000000,
"cumulative_training_bytes": 142002589,
"metrics": {
"loss": 0.48945935367853804,
"ce_loss": 0.4794593632152812,
"lb_loss": 0.9999999972538276
}
},
{
"epoch": 3,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.48934782746518485,
"ce_loss": 0.479347837001928,
"lb_loss": 0.9999999973377103,
"training_bytes": 47653416
},
"cumulative_training_bytes": 142960191,
"training_bytes_this_epoch": 47653416
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 143000000,
"cumulative_training_bytes": 143005811,
"metrics": {
"loss": 0.4911197821299235,
"ce_loss": 0.4811197916666667,
"lb_loss": 0.9999999503294627
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 144000000,
"cumulative_training_bytes": 144005584,
"metrics": {
"loss": 0.4834604684044333,
"ce_loss": 0.47346047794117646,
"lb_loss": 0.9999999938642278
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 145000000,
"cumulative_training_bytes": 145002597,
"metrics": {
"loss": 0.4844184585083696,
"ce_loss": 0.4744184680451128,
"lb_loss": 0.9999999959666029
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 146000000,
"cumulative_training_bytes": 146003208,
"metrics": {
"loss": 0.4850127817101082,
"ce_loss": 0.47501279124685136,
"lb_loss": 0.9999999965468341
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 147000000,
"cumulative_training_bytes": 147007482,
"metrics": {
"loss": 0.48537507924166595,
"ce_loss": 0.4753750887784091,
"lb_loss": 0.999999994807171
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 148000000,
"cumulative_training_bytes": 148004641,
"metrics": {
"loss": 0.48546422868514133,
"ce_loss": 0.4754642382218845,
"lb_loss": 0.9999999916662199
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 149000000,
"cumulative_training_bytes": 149002815,
"metrics": {
"loss": 0.48531080502543955,
"ce_loss": 0.4753108145621827,
"lb_loss": 0.9999999919064759
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 150000000,
"cumulative_training_bytes": 150002243,
"metrics": {
"loss": 0.48544034682887166,
"ce_loss": 0.4754403563656148,
"lb_loss": 0.9999999926061703
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 151000000,
"cumulative_training_bytes": 151002455,
"metrics": {
"loss": 0.48702938034420923,
"ce_loss": 0.4770293898809524,
"lb_loss": 0.9999999929609753
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 152000000,
"cumulative_training_bytes": 152006519,
"metrics": {
"loss": 0.487319925899328,
"ce_loss": 0.47731993543607115,
"lb_loss": 0.9999999927323718
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 153000000,
"cumulative_training_bytes": 153001757,
"metrics": {
"loss": 0.4873133485868266,
"ce_loss": 0.4773133581235698,
"lb_loss": 0.9999999942259421
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 154000000,
"cumulative_training_bytes": 154001996,
"metrics": {
"loss": 0.4871629306957343,
"ce_loss": 0.47716294023247746,
"lb_loss": 0.9999999934645845
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 155000000,
"cumulative_training_bytes": 155007554,
"metrics": {
"loss": 0.487115378901552,
"ce_loss": 0.47711538843829515,
"lb_loss": 0.9999999934404558
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 156000000,
"cumulative_training_bytes": 156006842,
"metrics": {
"loss": 0.4871095489629632,
"ce_loss": 0.4771095584997064,
"lb_loss": 0.999999993630038
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 157000000,
"cumulative_training_bytes": 157000485,
"metrics": {
"loss": 0.4868759286455712,
"ce_loss": 0.4768759381823144,
"lb_loss": 0.9999999938833661
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 158000000,
"cumulative_training_bytes": 158002706,
"metrics": {
"loss": 0.4868062579747189,
"ce_loss": 0.47680626751146205,
"lb_loss": 0.9999999946255619
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 159000000,
"cumulative_training_bytes": 159005682,
"metrics": {
"loss": 0.48680872971827116,
"ce_loss": 0.4768087392550143,
"lb_loss": 0.9999999952748945
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 160000000,
"cumulative_training_bytes": 160001107,
"metrics": {
"loss": 0.4867460350338504,
"ce_loss": 0.47674604457059355,
"lb_loss": 0.9999999951758831
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 161000000,
"cumulative_training_bytes": 161002648,
"metrics": {
"loss": 0.4867391425333205,
"ce_loss": 0.4767391520700637,
"lb_loss": 0.9999999951405131
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 162000000,
"cumulative_training_bytes": 162004313,
"metrics": {
"loss": 0.4866803379089527,
"ce_loss": 0.4766803474456959,
"lb_loss": 0.9999999951088706
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 163000000,
"cumulative_training_bytes": 163005127,
"metrics": {
"loss": 0.48664981015183156,
"ce_loss": 0.4766498196885747,
"lb_loss": 0.9999999953081556
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 164000000,
"cumulative_training_bytes": 164000917,
"metrics": {
"loss": 0.4866300357399223,
"ce_loss": 0.47663004527666547,
"lb_loss": 0.9999999955301941
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 165000000,
"cumulative_training_bytes": 165003825,
"metrics": {
"loss": 0.48657877781558484,
"ce_loss": 0.476578787352328,
"lb_loss": 0.9999999957129391
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 166000000,
"cumulative_training_bytes": 166000316,
"metrics": {
"loss": 0.4866410308836623,
"ce_loss": 0.47664104042040545,
"lb_loss": 0.9999999958599632
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 167000000,
"cumulative_training_bytes": 167004464,
"metrics": {
"loss": 0.4865475621193078,
"ce_loss": 0.47654757165605094,
"lb_loss": 0.9999999961845435
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 168000000,
"cumulative_training_bytes": 168000312,
"metrics": {
"loss": 0.4865374044540825,
"ce_loss": 0.47653741399082566,
"lb_loss": 0.9999999960081293
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 169000000,
"cumulative_training_bytes": 169004041,
"metrics": {
"loss": 0.48646658576610896,
"ce_loss": 0.4764665953028521,
"lb_loss": 0.9999999961969398
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 170000000,
"cumulative_training_bytes": 170004115,
"metrics": {
"loss": 0.4864624011017053,
"ce_loss": 0.47646241063844846,
"lb_loss": 0.9999999961523616
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 171000000,
"cumulative_training_bytes": 171002131,
"metrics": {
"loss": 0.4865150353376767,
"ce_loss": 0.47651504487441987,
"lb_loss": 0.9999999958831627
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 172000000,
"cumulative_training_bytes": 172000593,
"metrics": {
"loss": 0.48652182180880243,
"ce_loss": 0.4765218313455456,
"lb_loss": 0.9999999957268151
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 173000000,
"cumulative_training_bytes": 173003618,
"metrics": {
"loss": 0.48656199285179186,
"ce_loss": 0.476562002388535,
"lb_loss": 0.999999995732763
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 174000000,
"cumulative_training_bytes": 174000074,
"metrics": {
"loss": 0.4865022831574615,
"ce_loss": 0.4765022926942047,
"lb_loss": 0.9999999959283633
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 175000000,
"cumulative_training_bytes": 175004955,
"metrics": {
"loss": 0.4864411783514556,
"ce_loss": 0.4764411878881988,
"lb_loss": 0.9999999958849158
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 176000000,
"cumulative_training_bytes": 176002879,
"metrics": {
"loss": 0.4863869083270196,
"ce_loss": 0.47638691786376275,
"lb_loss": 0.9999999958569524
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 177000000,
"cumulative_training_bytes": 177007060,
"metrics": {
"loss": 0.48635870139197285,
"ce_loss": 0.476358710928716,
"lb_loss": 0.9999999955903017
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 178000000,
"cumulative_training_bytes": 178007425,
"metrics": {
"loss": 0.4863939705036675,
"ce_loss": 0.47639398004041067,
"lb_loss": 0.9999999956123273
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 179000000,
"cumulative_training_bytes": 179000051,
"metrics": {
"loss": 0.4864388644543359,
"ce_loss": 0.47643887399107904,
"lb_loss": 0.9999999957081618
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 180000000,
"cumulative_training_bytes": 180003883,
"metrics": {
"loss": 0.48640507803300265,
"ce_loss": 0.4764050875697458,
"lb_loss": 0.9999999957381263
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 181000000,
"cumulative_training_bytes": 181006010,
"metrics": {
"loss": 0.48639232886868944,
"ce_loss": 0.4763923384054326,
"lb_loss": 0.9999999955626321
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 182000000,
"cumulative_training_bytes": 182004597,
"metrics": {
"loss": 0.4863602845809039,
"ce_loss": 0.4763602941176471,
"lb_loss": 0.9999999954653721
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 183000000,
"cumulative_training_bytes": 183005222,
"metrics": {
"loss": 0.48625701125675586,
"ce_loss": 0.476257020793499,
"lb_loss": 0.999999995703451
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 184000000,
"cumulative_training_bytes": 184003606,
"metrics": {
"loss": 0.4863026811115777,
"ce_loss": 0.47630269064832087,
"lb_loss": 0.9999999957186962
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 185000000,
"cumulative_training_bytes": 185005739,
"metrics": {
"loss": 0.48644190962643297,
"ce_loss": 0.47644191916317613,
"lb_loss": 0.9999999957339964
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 186000000,
"cumulative_training_bytes": 186001058,
"metrics": {
"loss": 0.48650585296548066,
"ce_loss": 0.4765058625022238,
"lb_loss": 0.999999995779639
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 187000000,
"cumulative_training_bytes": 187002105,
"metrics": {
"loss": 0.4865529829006699,
"ce_loss": 0.47655299243741306,
"lb_loss": 0.9999999958343068
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 188000000,
"cumulative_training_bytes": 188006850,
"metrics": {
"loss": 0.4865126912536699,
"ce_loss": 0.4765127007904131,
"lb_loss": 0.9999999957649598
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 189000000,
"cumulative_training_bytes": 189001113,
"metrics": {
"loss": 0.4865072715625417,
"ce_loss": 0.47650728109928486,
"lb_loss": 0.9999999957573943
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 190000000,
"cumulative_training_bytes": 190003438,
"metrics": {
"loss": 0.4864817460378011,
"ce_loss": 0.47648175557454425,
"lb_loss": 0.9999999960709829
}
},
{
"epoch": 4,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.48646113130917584,
"ce_loss": 0.476461140845919,
"lb_loss": 0.9999999960640249,
"training_bytes": 47653407
},
"cumulative_training_bytes": 190613598,
"training_bytes_this_epoch": 47653407
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 191000000,
"cumulative_training_bytes": 191004593,
"metrics": {
"loss": 0.4838434340907078,
"ce_loss": 0.47384344362745096,
"lb_loss": 0.9999999871440962
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 192000000,
"cumulative_training_bytes": 192004632,
"metrics": {
"loss": 0.4837293860676524,
"ce_loss": 0.4737293956043956,
"lb_loss": 0.9999999944325332
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 193000000,
"cumulative_training_bytes": 193001112,
"metrics": {
"loss": 0.48330102211389786,
"ce_loss": 0.473301031650641,
"lb_loss": 0.9999999961791894
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 194000000,
"cumulative_training_bytes": 194004855,
"metrics": {
"loss": 0.4841243908582907,
"ce_loss": 0.47412440039503384,
"lb_loss": 0.9999999960981159
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 195000000,
"cumulative_training_bytes": 195000605,
"metrics": {
"loss": 0.48411852776692177,
"ce_loss": 0.47411853730366493,
"lb_loss": 0.9999999941747643
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 196000000,
"cumulative_training_bytes": 196000298,
"metrics": {
"loss": 0.4837925535855897,
"ce_loss": 0.47379256312233287,
"lb_loss": 0.9999999937258268
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 197000000,
"cumulative_training_bytes": 197005504,
"metrics": {
"loss": 0.4837077430112185,
"ce_loss": 0.47370775254796166,
"lb_loss": 0.9999999942825281
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 198000000,
"cumulative_training_bytes": 198005978,
"metrics": {
"loss": 0.48402241598139156,
"ce_loss": 0.4740224255181347,
"lb_loss": 0.9999999936380535
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 199000000,
"cumulative_training_bytes": 199002040,
"metrics": {
"loss": 0.4838441685454486,
"ce_loss": 0.47384417808219176,
"lb_loss": 0.9999999936312846
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 200000000,
"cumulative_training_bytes": 200003598,
"metrics": {
"loss": 0.48375069142165816,
"ce_loss": 0.4737507009584013,
"lb_loss": 0.999999993242214
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 201000000,
"cumulative_training_bytes": 201005092,
"metrics": {
"loss": 0.48378752730924063,
"ce_loss": 0.4737875368459838,
"lb_loss": 0.9999999931918055
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 202000000,
"cumulative_training_bytes": 202001683,
"metrics": {
"loss": 0.4836702367645346,
"ce_loss": 0.47367024630127774,
"lb_loss": 0.9999999932258339
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 203000000,
"cumulative_training_bytes": 203001980,
"metrics": {
"loss": 0.4837692078612791,
"ce_loss": 0.47376921739802225,
"lb_loss": 0.9999999935164292
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 204000000,
"cumulative_training_bytes": 204006015,
"metrics": {
"loss": 0.4837763001402151,
"ce_loss": 0.4737763096769583,
"lb_loss": 0.9999999933204629
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 205000000,
"cumulative_training_bytes": 205004293,
"metrics": {
"loss": 0.483796516208334,
"ce_loss": 0.4737965257450772,
"lb_loss": 0.9999999935288199
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 206000000,
"cumulative_training_bytes": 206000909,
"metrics": {
"loss": 0.48376647876589496,
"ce_loss": 0.4737664883026381,
"lb_loss": 0.9999999937398805
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 207000000,
"cumulative_training_bytes": 207000656,
"metrics": {
"loss": 0.4837419186423125,
"ce_loss": 0.47374192817905564,
"lb_loss": 0.9999999936744954
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 208000000,
"cumulative_training_bytes": 208006392,
"metrics": {
"loss": 0.48362419888836694,
"ce_loss": 0.4736242084251101,
"lb_loss": 0.9999999941183082
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 209000000,
"cumulative_training_bytes": 209002552,
"metrics": {
"loss": 0.4836824448903402,
"ce_loss": 0.47368245442708334,
"lb_loss": 0.9999999945114056
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 210000000,
"cumulative_training_bytes": 210001732,
"metrics": {
"loss": 0.48379524184610945,
"ce_loss": 0.4737952513828526,
"lb_loss": 0.9999999946306365
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 211000000,
"cumulative_training_bytes": 211005793,
"metrics": {
"loss": 0.48375900037122077,
"ce_loss": 0.47375900990796393,
"lb_loss": 0.9999999952083418
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 212000000,
"cumulative_training_bytes": 212001830,
"metrics": {
"loss": 0.48375242487407344,
"ce_loss": 0.4737524344108166,
"lb_loss": 0.9999999954314491
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 213000000,
"cumulative_training_bytes": 213002388,
"metrics": {
"loss": 0.48380419419230236,
"ce_loss": 0.4738042037290455,
"lb_loss": 0.9999999950856245
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 214000000,
"cumulative_training_bytes": 214006969,
"metrics": {
"loss": 0.4837882087834926,
"ce_loss": 0.47378821832023577,
"lb_loss": 0.9999999953159415
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 215000000,
"cumulative_training_bytes": 215004172,
"metrics": {
"loss": 0.483773889254086,
"ce_loss": 0.47377389879082915,
"lb_loss": 0.9999999951327866
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 216000000,
"cumulative_training_bytes": 216001255,
"metrics": {
"loss": 0.4837848567502212,
"ce_loss": 0.47378486628696437,
"lb_loss": 0.9999999949999725
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 217000000,
"cumulative_training_bytes": 217004188,
"metrics": {
"loss": 0.4841484393021538,
"ce_loss": 0.47414844883889695,
"lb_loss": 0.9999999951381988
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 218000000,
"cumulative_training_bytes": 218001725,
"metrics": {
"loss": 0.4844258503625857,
"ce_loss": 0.47442585989932884,
"lb_loss": 0.9999999952662978
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 219000000,
"cumulative_training_bytes": 219003579,
"metrics": {
"loss": 0.48454192548483765,
"ce_loss": 0.4745419350215808,
"lb_loss": 0.9999999954175011
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 220000000,
"cumulative_training_bytes": 220002944,
"metrics": {
"loss": 0.4846480412058311,
"ce_loss": 0.47464805074257427,
"lb_loss": 0.9999999953409605
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 221000000,
"cumulative_training_bytes": 221001782,
"metrics": {
"loss": 0.48471910264516666,
"ce_loss": 0.4747191121819098,
"lb_loss": 0.9999999951493322
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 222000000,
"cumulative_training_bytes": 222000475,
"metrics": {
"loss": 0.4846779797594266,
"ce_loss": 0.4746779892961698,
"lb_loss": 0.9999999951286762
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 223000000,
"cumulative_training_bytes": 223000890,
"metrics": {
"loss": 0.484692938897063,
"ce_loss": 0.47469294843380616,
"lb_loss": 0.9999999951386283
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 224000000,
"cumulative_training_bytes": 224000466,
"metrics": {
"loss": 0.484730315864633,
"ce_loss": 0.47473032540137616,
"lb_loss": 0.9999999953656021
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 225000000,
"cumulative_training_bytes": 225004782,
"metrics": {
"loss": 0.4847559259453321,
"ce_loss": 0.4747559354820753,
"lb_loss": 0.999999995487513
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 226000000,
"cumulative_training_bytes": 226004131,
"metrics": {
"loss": 0.48467487820400557,
"ce_loss": 0.47467488774074873,
"lb_loss": 0.9999999957305481
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 227000000,
"cumulative_training_bytes": 227007102,
"metrics": {
"loss": 0.48469074567159015,
"ce_loss": 0.4746907552083333,
"lb_loss": 0.9999999959736762
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 228000000,
"cumulative_training_bytes": 228004925,
"metrics": {
"loss": 0.48469536216098363,
"ce_loss": 0.4746953716977268,
"lb_loss": 0.9999999959108016
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 229000000,
"cumulative_training_bytes": 229001388,
"metrics": {
"loss": 0.4846923528211264,
"ce_loss": 0.47469236235786955,
"lb_loss": 0.9999999959217248
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 230000000,
"cumulative_training_bytes": 230000104,
"metrics": {
"loss": 0.4846553168778009,
"ce_loss": 0.47465532641454405,
"lb_loss": 0.9999999958393803
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 231000000,
"cumulative_training_bytes": 231004256,
"metrics": {
"loss": 0.4846512128513873,
"ce_loss": 0.47465122238813046,
"lb_loss": 0.9999999958523124
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 232000000,
"cumulative_training_bytes": 232007293,
"metrics": {
"loss": 0.484631408132082,
"ce_loss": 0.47463141766882516,
"lb_loss": 0.9999999958756453
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 233000000,
"cumulative_training_bytes": 233002626,
"metrics": {
"loss": 0.48464676948313035,
"ce_loss": 0.4746467790198735,
"lb_loss": 0.9999999958325207
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 234000000,
"cumulative_training_bytes": 234002992,
"metrics": {
"loss": 0.4846627954623744,
"ce_loss": 0.47466280499911756,
"lb_loss": 0.9999999956448424
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 235000000,
"cumulative_training_bytes": 235001903,
"metrics": {
"loss": 0.48463665597990335,
"ce_loss": 0.4746366655166465,
"lb_loss": 0.999999995661004
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 236000000,
"cumulative_training_bytes": 236005937,
"metrics": {
"loss": 0.4846179318009761,
"ce_loss": 0.4746179413377193,
"lb_loss": 0.999999995596013
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 237000000,
"cumulative_training_bytes": 237000271,
"metrics": {
"loss": 0.4846171031654688,
"ce_loss": 0.47461711270221196,
"lb_loss": 0.9999999953855103
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 238000000,
"cumulative_training_bytes": 238005016,
"metrics": {
"loss": 0.48460431618227445,
"ce_loss": 0.4746043257190176,
"lb_loss": 0.9999999954639218
}
},
{
"epoch": 5,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4846178382098828,
"ce_loss": 0.47461784774662596,
"lb_loss": 0.9999999954128174,
"training_bytes": 47653413
},
"cumulative_training_bytes": 238267011,
"training_bytes_this_epoch": 47653413
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 239000000,
"cumulative_training_bytes": 239004387,
"metrics": {
"loss": 0.4854953190715043,
"ce_loss": 0.47549532860824745,
"lb_loss": 0.9999999821800547
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 240000000,
"cumulative_training_bytes": 240001325,
"metrics": {
"loss": 0.4832241094059881,
"ce_loss": 0.47322411894273125,
"lb_loss": 0.9999999915975831
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 241000000,
"cumulative_training_bytes": 241000019,
"metrics": {
"loss": 0.483070870351525,
"ce_loss": 0.4730708798882682,
"lb_loss": 0.9999999950051973
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 242000000,
"cumulative_training_bytes": 242004419,
"metrics": {
"loss": 0.48300372896018934,
"ce_loss": 0.4730037384969325,
"lb_loss": 0.9999999974402913
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 243000000,
"cumulative_training_bytes": 243002073,
"metrics": {
"loss": 0.48269094866196444,
"ce_loss": 0.4726909581987076,
"lb_loss": 0.9999999974964123
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 244000000,
"cumulative_training_bytes": 244003061,
"metrics": {
"loss": 0.48260155296325685,
"ce_loss": 0.4726015625,
"lb_loss": 0.9999999978542328
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 245000000,
"cumulative_training_bytes": 245004215,
"metrics": {
"loss": 0.48258308141104345,
"ce_loss": 0.4725830909477866,
"lb_loss": 0.9999999963465939
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 246000000,
"cumulative_training_bytes": 246005003,
"metrics": {
"loss": 0.48249219340298016,
"ce_loss": 0.4724922029397233,
"lb_loss": 0.9999999967017192
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 247000000,
"cumulative_training_bytes": 247006730,
"metrics": {
"loss": 0.4825161212594073,
"ce_loss": 0.47251613079615046,
"lb_loss": 0.9999999975490653
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 248000000,
"cumulative_training_bytes": 248005797,
"metrics": {
"loss": 0.4824859365551657,
"ce_loss": 0.47248594609190886,
"lb_loss": 0.9999999967692691
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 249000000,
"cumulative_training_bytes": 249006395,
"metrics": {
"loss": 0.48267154272465285,
"ce_loss": 0.472671552261396,
"lb_loss": 0.9999999966461774
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 250000000,
"cumulative_training_bytes": 250006518,
"metrics": {
"loss": 0.4824703498178853,
"ce_loss": 0.47247035935462844,
"lb_loss": 0.9999999973578123
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 251000000,
"cumulative_training_bytes": 251001307,
"metrics": {
"loss": 0.482704364336454,
"ce_loss": 0.47270437387319714,
"lb_loss": 0.9999999973851328
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 252000000,
"cumulative_training_bytes": 252005393,
"metrics": {
"loss": 0.48272043788665514,
"ce_loss": 0.4727204474233983,
"lb_loss": 0.9999999973767315
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 253000000,
"cumulative_training_bytes": 253007637,
"metrics": {
"loss": 0.4827677896584801,
"ce_loss": 0.47276779919522327,
"lb_loss": 0.9999999973075784
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 254000000,
"cumulative_training_bytes": 254001252,
"metrics": {
"loss": 0.48291178065051366,
"ce_loss": 0.4729117901872568,
"lb_loss": 0.9999999969559885
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 255000000,
"cumulative_training_bytes": 255003892,
"metrics": {
"loss": 0.4829045114966359,
"ce_loss": 0.47290452103337904,
"lb_loss": 0.9999999969475445
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 256000000,
"cumulative_training_bytes": 256002122,
"metrics": {
"loss": 0.48277088224141823,
"ce_loss": 0.4727708917781614,
"lb_loss": 0.9999999973760579
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 257000000,
"cumulative_training_bytes": 257002914,
"metrics": {
"loss": 0.4828110224281261,
"ce_loss": 0.4728110319648693,
"lb_loss": 0.9999999978816977
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 258000000,
"cumulative_training_bytes": 258007111,
"metrics": {
"loss": 0.48270698081804553,
"ce_loss": 0.4727069903547887,
"lb_loss": 0.9999999975501774
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 259000000,
"cumulative_training_bytes": 259001323,
"metrics": {
"loss": 0.4827247332188862,
"ce_loss": 0.47272474275562937,
"lb_loss": 0.9999999972056884
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 260000000,
"cumulative_training_bytes": 260005225,
"metrics": {
"loss": 0.48267412118508785,
"ce_loss": 0.472674130721831,
"lb_loss": 0.9999999975654441
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 261000000,
"cumulative_training_bytes": 261001447,
"metrics": {
"loss": 0.48262796289591675,
"ce_loss": 0.4726279724326599,
"lb_loss": 0.999999997290698
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 262000000,
"cumulative_training_bytes": 262002819,
"metrics": {
"loss": 0.48269655004081247,
"ce_loss": 0.47269655957755563,
"lb_loss": 0.9999999973474876
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 263000000,
"cumulative_training_bytes": 263000855,
"metrics": {
"loss": 0.4826931146895026,
"ce_loss": 0.47269312422624576,
"lb_loss": 0.9999999976571371
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 264000000,
"cumulative_training_bytes": 264003342,
"metrics": {
"loss": 0.48269632542012775,
"ce_loss": 0.4726963349568709,
"lb_loss": 0.9999999976243241
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 265000000,
"cumulative_training_bytes": 265002459,
"metrics": {
"loss": 0.48277645857805784,
"ce_loss": 0.472776468114801,
"lb_loss": 0.9999999977475484
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 266000000,
"cumulative_training_bytes": 266001589,
"metrics": {
"loss": 0.4827284785946397,
"ce_loss": 0.47272848813138285,
"lb_loss": 0.9999999978448224
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 267000000,
"cumulative_training_bytes": 267007878,
"metrics": {
"loss": 0.48272075498110445,
"ce_loss": 0.4727207645178476,
"lb_loss": 0.9999999976659876
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 268000000,
"cumulative_training_bytes": 268004701,
"metrics": {
"loss": 0.48267082352839585,
"ce_loss": 0.472670833065139,
"lb_loss": 0.9999999977287622
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 269000000,
"cumulative_training_bytes": 269000341,
"metrics": {
"loss": 0.482630938395494,
"ce_loss": 0.47263094793223714,
"lb_loss": 0.9999999978171692
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 270000000,
"cumulative_training_bytes": 270005290,
"metrics": {
"loss": 0.4826039372364776,
"ce_loss": 0.47260394677322076,
"lb_loss": 0.9999999979149159
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 271000000,
"cumulative_training_bytes": 271001402,
"metrics": {
"loss": 0.4826388793521457,
"ce_loss": 0.4726388888888889,
"lb_loss": 0.9999999976436994
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 272000000,
"cumulative_training_bytes": 272007043,
"metrics": {
"loss": 0.4826473747120085,
"ce_loss": 0.4726473842487517,
"lb_loss": 0.9999999977002293
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 273000000,
"cumulative_training_bytes": 273004999,
"metrics": {
"loss": 0.4826480611873036,
"ce_loss": 0.47264807072404674,
"lb_loss": 0.9999999978585944
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 274000000,
"cumulative_training_bytes": 274003293,
"metrics": {
"loss": 0.4827403403186553,
"ce_loss": 0.4727403498553985,
"lb_loss": 0.9999999979697647
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 275000000,
"cumulative_training_bytes": 275006178,
"metrics": {
"loss": 0.4827058927345634,
"ce_loss": 0.47270590227130654,
"lb_loss": 0.999999997876142
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 276000000,
"cumulative_training_bytes": 276000051,
"metrics": {
"loss": 0.4826930918783512,
"ce_loss": 0.47269310141509435,
"lb_loss": 0.9999999979442504
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 277000000,
"cumulative_training_bytes": 277003305,
"metrics": {
"loss": 0.48266627628341496,
"ce_loss": 0.4726662858201581,
"lb_loss": 0.9999999977972196
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 278000000,
"cumulative_training_bytes": 278006257,
"metrics": {
"loss": 0.48266338925443386,
"ce_loss": 0.472663398791177,
"lb_loss": 0.9999999979102205
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 279000000,
"cumulative_training_bytes": 279003019,
"metrics": {
"loss": 0.48267459345141694,
"ce_loss": 0.4726746029881601,
"lb_loss": 0.9999999979052681
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 280000000,
"cumulative_training_bytes": 280004197,
"metrics": {
"loss": 0.4827149918388988,
"ce_loss": 0.472715001375642,
"lb_loss": 0.9999999978790717
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 281000000,
"cumulative_training_bytes": 281006273,
"metrics": {
"loss": 0.4827076661080715,
"ce_loss": 0.47270767564481464,
"lb_loss": 0.9999999981530354
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 282000000,
"cumulative_training_bytes": 282004302,
"metrics": {
"loss": 0.4826935047070867,
"ce_loss": 0.4726935142438299,
"lb_loss": 0.999999998111598
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 283000000,
"cumulative_training_bytes": 283002020,
"metrics": {
"loss": 0.4826445411007718,
"ce_loss": 0.472644550637515,
"lb_loss": 0.9999999981536127
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 284000000,
"cumulative_training_bytes": 284001833,
"metrics": {
"loss": 0.4826856697701378,
"ce_loss": 0.472685679306881,
"lb_loss": 0.9999999981439036
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 285000000,
"cumulative_training_bytes": 285006144,
"metrics": {
"loss": 0.48266743957449537,
"ce_loss": 0.47266744911123854,
"lb_loss": 0.9999999983399754
}
},
{
"epoch": 6,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4826675374587581,
"ce_loss": 0.4726675469955013,
"lb_loss": 0.9999999983528279,
"training_bytes": 47653391
},
"cumulative_training_bytes": 285920402,
"training_bytes_this_epoch": 47653391
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 286000000,
"cumulative_training_bytes": 286004101,
"metrics": {
"loss": 0.4783948768268932,
"ce_loss": 0.46839488636363635,
"lb_loss": 0.9999999837441877
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 287000000,
"cumulative_training_bytes": 287000512,
"metrics": {
"loss": 0.4791101411724767,
"ce_loss": 0.46911015070921985,
"lb_loss": 1.0
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 288000000,
"cumulative_training_bytes": 288007385,
"metrics": {
"loss": 0.47894315667204806,
"ce_loss": 0.46894316620879123,
"lb_loss": 0.9999999986900078
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 289000000,
"cumulative_training_bytes": 289001067,
"metrics": {
"loss": 0.47977744083546525,
"ce_loss": 0.4697774503722084,
"lb_loss": 1.000000001331121
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 290000000,
"cumulative_training_bytes": 290005553,
"metrics": {
"loss": 0.47966071728909954,
"ce_loss": 0.4696607268258427,
"lb_loss": 1.0000000011161918
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 291000000,
"cumulative_training_bytes": 291003954,
"metrics": {
"loss": 0.479647133723799,
"ce_loss": 0.4696471432605422,
"lb_loss": 1.0000000008078942
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 292000000,
"cumulative_training_bytes": 292001941,
"metrics": {
"loss": 0.47993072094184624,
"ce_loss": 0.4699307304785894,
"lb_loss": 0.9999999990991741
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 293000000,
"cumulative_training_bytes": 293006561,
"metrics": {
"loss": 0.4797550580308244,
"ce_loss": 0.4697550675675676,
"lb_loss": 0.9999999986468134
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 294000000,
"cumulative_training_bytes": 294002191,
"metrics": {
"loss": 0.4799922149893232,
"ce_loss": 0.46999222452606637,
"lb_loss": 0.9999999979660974
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 295000000,
"cumulative_training_bytes": 295005131,
"metrics": {
"loss": 0.4803671647887206,
"ce_loss": 0.47036717432546377,
"lb_loss": 0.9999999961302212
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 296000000,
"cumulative_training_bytes": 296006078,
"metrics": {
"loss": 0.48054443332582325,
"ce_loss": 0.4705444428625664,
"lb_loss": 0.9999999972392686
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 297000000,
"cumulative_training_bytes": 297002743,
"metrics": {
"loss": 0.4806437253457724,
"ce_loss": 0.47064373488251554,
"lb_loss": 0.9999999974461037
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 298000000,
"cumulative_training_bytes": 298000425,
"metrics": {
"loss": 0.4805099084245758,
"ce_loss": 0.47050991796131897,
"lb_loss": 0.9999999969763022
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 299000000,
"cumulative_training_bytes": 299006190,
"metrics": {
"loss": 0.4805110048075191,
"ce_loss": 0.4705110143442623,
"lb_loss": 0.9999999977665707
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 300000000,
"cumulative_training_bytes": 300006090,
"metrics": {
"loss": 0.4805480549684227,
"ce_loss": 0.47054806450516584,
"lb_loss": 0.9999999978284333
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 301000000,
"cumulative_training_bytes": 301000170,
"metrics": {
"loss": 0.4807665994652883,
"ce_loss": 0.4707666090020315,
"lb_loss": 0.9999999979718074
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 302000000,
"cumulative_training_bytes": 302000958,
"metrics": {
"loss": 0.4808426243918283,
"ce_loss": 0.47084263392857145,
"lb_loss": 0.9999999977009637
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 303000000,
"cumulative_training_bytes": 303002933,
"metrics": {
"loss": 0.4808974649702044,
"ce_loss": 0.4708974745069476,
"lb_loss": 0.9999999980229746
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 304000000,
"cumulative_training_bytes": 304005876,
"metrics": {
"loss": 0.48091397358137705,
"ce_loss": 0.4709139831181202,
"lb_loss": 0.9999999976531617
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 305000000,
"cumulative_training_bytes": 305007296,
"metrics": {
"loss": 0.4807987339423901,
"ce_loss": 0.47079874347913325,
"lb_loss": 0.9999999981582834
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 306000000,
"cumulative_training_bytes": 306001572,
"metrics": {
"loss": 0.4808528401486115,
"ce_loss": 0.4708528496853547,
"lb_loss": 0.9999999983177942
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 307000000,
"cumulative_training_bytes": 307002658,
"metrics": {
"loss": 0.4808918300473469,
"ce_loss": 0.4708918395840901,
"lb_loss": 0.999999998073079
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 308000000,
"cumulative_training_bytes": 308001046,
"metrics": {
"loss": 0.4807877983023134,
"ce_loss": 0.47078780783905655,
"lb_loss": 0.9999999978498498
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 309000000,
"cumulative_training_bytes": 309007041,
"metrics": {
"loss": 0.48085081446823197,
"ce_loss": 0.47085082400497513,
"lb_loss": 0.9999999974695208
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 310000000,
"cumulative_training_bytes": 310000806,
"metrics": {
"loss": 0.4808527825936225,
"ce_loss": 0.4708527921303657,
"lb_loss": 0.9999999975172628
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 311000000,
"cumulative_training_bytes": 311006686,
"metrics": {
"loss": 0.4808801844879821,
"ce_loss": 0.47088019402472525,
"lb_loss": 0.9999999978712627
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 312000000,
"cumulative_training_bytes": 312003061,
"metrics": {
"loss": 0.48086826519901726,
"ce_loss": 0.47086827473576043,
"lb_loss": 0.9999999976900138
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 313000000,
"cumulative_training_bytes": 313000859,
"metrics": {
"loss": 0.4808953347788677,
"ce_loss": 0.47089534431561086,
"lb_loss": 0.9999999976232311
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 314000000,
"cumulative_training_bytes": 314006832,
"metrics": {
"loss": 0.4809533795717628,
"ce_loss": 0.470953389108506,
"lb_loss": 0.9999999977575134
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 315000000,
"cumulative_training_bytes": 315007663,
"metrics": {
"loss": 0.4810331776506746,
"ce_loss": 0.47103318718741777,
"lb_loss": 0.9999999979289779
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 316000000,
"cumulative_training_bytes": 316001449,
"metrics": {
"loss": 0.4811241632807677,
"ce_loss": 0.47112417281751084,
"lb_loss": 0.9999999979064798
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 317000000,
"cumulative_training_bytes": 317001561,
"metrics": {
"loss": 0.48121448784626175,
"ce_loss": 0.4712144973830049,
"lb_loss": 0.9999999978419007
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 318000000,
"cumulative_training_bytes": 318006954,
"metrics": {
"loss": 0.48121900889561187,
"ce_loss": 0.47121901843235503,
"lb_loss": 0.999999997795581
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 319000000,
"cumulative_training_bytes": 319003501,
"metrics": {
"loss": 0.4812446206125278,
"ce_loss": 0.471244630149271,
"lb_loss": 0.999999997834314
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 320000000,
"cumulative_training_bytes": 320000438,
"metrics": {
"loss": 0.48125381685058555,
"ce_loss": 0.4712538263873287,
"lb_loss": 0.999999997991306
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 321000000,
"cumulative_training_bytes": 321001768,
"metrics": {
"loss": 0.48127089427163744,
"ce_loss": 0.4712709038083806,
"lb_loss": 0.9999999977365325
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 322000000,
"cumulative_training_bytes": 322002583,
"metrics": {
"loss": 0.4812634008441183,
"ce_loss": 0.47126341038086145,
"lb_loss": 0.9999999978753278
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 323000000,
"cumulative_training_bytes": 323002023,
"metrics": {
"loss": 0.48125308707762515,
"ce_loss": 0.4712530966143683,
"lb_loss": 0.9999999980927498
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 324000000,
"cumulative_training_bytes": 324002741,
"metrics": {
"loss": 0.4813194785285835,
"ce_loss": 0.47131948806532664,
"lb_loss": 0.99999999827476
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 325000000,
"cumulative_training_bytes": 325006173,
"metrics": {
"loss": 0.48130787273411463,
"ce_loss": 0.4713078822708578,
"lb_loss": 0.9999999981555946
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 326000000,
"cumulative_training_bytes": 326000306,
"metrics": {
"loss": 0.4813103896229207,
"ce_loss": 0.4713103991596639,
"lb_loss": 0.9999999982583059
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 327000000,
"cumulative_training_bytes": 327005036,
"metrics": {
"loss": 0.48133414187465984,
"ce_loss": 0.471334151411403,
"lb_loss": 0.9999999982786063
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 328000000,
"cumulative_training_bytes": 328006990,
"metrics": {
"loss": 0.4813546307870109,
"ce_loss": 0.47135464032375407,
"lb_loss": 0.9999999983087805
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 329000000,
"cumulative_training_bytes": 329005070,
"metrics": {
"loss": 0.4813430519415795,
"ce_loss": 0.47134306147832267,
"lb_loss": 0.9999999982843013
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 330000000,
"cumulative_training_bytes": 330001490,
"metrics": {
"loss": 0.48131808865273235,
"ce_loss": 0.4713180981894755,
"lb_loss": 0.9999999983954985
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 331000000,
"cumulative_training_bytes": 331007691,
"metrics": {
"loss": 0.48127986607881124,
"ce_loss": 0.4712798756155544,
"lb_loss": 0.9999999983502196
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 332000000,
"cumulative_training_bytes": 332005409,
"metrics": {
"loss": 0.4814225170457456,
"ce_loss": 0.4714225265824888,
"lb_loss": 0.9999999983561437
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 333000000,
"cumulative_training_bytes": 333001906,
"metrics": {
"loss": 0.4816718959560199,
"ce_loss": 0.47167190549276306,
"lb_loss": 0.9999999982551901
}
},
{
"epoch": 7,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4817229831126784,
"ce_loss": 0.4717229926494216,
"lb_loss": 0.9999999982283324,
"training_bytes": 47653415
},
"cumulative_training_bytes": 333573817,
"training_bytes_this_epoch": 47653415
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 334000000,
"cumulative_training_bytes": 334004035,
"metrics": {
"loss": 0.4830050127846854,
"ce_loss": 0.47300502232142855,
"lb_loss": 0.9999999872275761
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 335000000,
"cumulative_training_bytes": 335002482,
"metrics": {
"loss": 0.4816376752750848,
"ce_loss": 0.47163768481182794,
"lb_loss": 0.9999999945522636
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 336000000,
"cumulative_training_bytes": 336003192,
"metrics": {
"loss": 0.48123298533707387,
"ce_loss": 0.47123299487381703,
"lb_loss": 0.9999999947352364
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 337000000,
"cumulative_training_bytes": 337002345,
"metrics": {
"loss": 0.48121870550799956,
"ce_loss": 0.4712187150447427,
"lb_loss": 0.9999999915993454
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 338000000,
"cumulative_training_bytes": 338006062,
"metrics": {
"loss": 0.48133054421973354,
"ce_loss": 0.4713305537564767,
"lb_loss": 0.9999999932056881
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 339000000,
"cumulative_training_bytes": 339006251,
"metrics": {
"loss": 0.48139633729424275,
"ce_loss": 0.4713963468309859,
"lb_loss": 0.9999999942074359
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 340000000,
"cumulative_training_bytes": 340006632,
"metrics": {
"loss": 0.48136963969036745,
"ce_loss": 0.4713696492271106,
"lb_loss": 0.9999999957475878
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 341000000,
"cumulative_training_bytes": 341002987,
"metrics": {
"loss": 0.4813286786712898,
"ce_loss": 0.471328688208033,
"lb_loss": 0.9999999964396814
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 342000000,
"cumulative_training_bytes": 342001745,
"metrics": {
"loss": 0.4810277493188427,
"ce_loss": 0.47102775885558584,
"lb_loss": 0.9999999965352432
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 343000000,
"cumulative_training_bytes": 343001355,
"metrics": {
"loss": 0.4808982693015996,
"ce_loss": 0.4708982788383428,
"lb_loss": 0.9999999960780047
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 344000000,
"cumulative_training_bytes": 344002997,
"metrics": {
"loss": 0.4810716566710395,
"ce_loss": 0.4710716662077827,
"lb_loss": 0.9999999964552304
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 345000000,
"cumulative_training_bytes": 345008016,
"metrics": {
"loss": 0.48147442988762096,
"ce_loss": 0.4714744394243641,
"lb_loss": 0.9999999960901906
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 346000000,
"cumulative_training_bytes": 346005271,
"metrics": {
"loss": 0.481584667572247,
"ce_loss": 0.47158467710899016,
"lb_loss": 0.9999999966233822
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 347000000,
"cumulative_training_bytes": 347007029,
"metrics": {
"loss": 0.4815711710188124,
"ce_loss": 0.4715711805555556,
"lb_loss": 0.9999999961282453
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 348000000,
"cumulative_training_bytes": 348004167,
"metrics": {
"loss": 0.48147504019800486,
"ce_loss": 0.471475049734748,
"lb_loss": 0.9999999969960524
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 349000000,
"cumulative_training_bytes": 349004042,
"metrics": {
"loss": 0.4814087606245472,
"ce_loss": 0.47140877016129035,
"lb_loss": 0.9999999970419531
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 350000000,
"cumulative_training_bytes": 350007816,
"metrics": {
"loss": 0.481473079932036,
"ce_loss": 0.47147308946877914,
"lb_loss": 0.9999999975835955
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 351000000,
"cumulative_training_bytes": 351001302,
"metrics": {
"loss": 0.4814454059399704,
"ce_loss": 0.47144541547671354,
"lb_loss": 0.9999999974073551
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 352000000,
"cumulative_training_bytes": 352004241,
"metrics": {
"loss": 0.481444767415895,
"ce_loss": 0.47144477695263814,
"lb_loss": 0.9999999979446674
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 353000000,
"cumulative_training_bytes": 353002044,
"metrics": {
"loss": 0.48147728557752,
"ce_loss": 0.4714772951142632,
"lb_loss": 0.9999999978863601
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 354000000,
"cumulative_training_bytes": 354002090,
"metrics": {
"loss": 0.48136562600486105,
"ce_loss": 0.4713656355416042,
"lb_loss": 0.9999999975648777
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 355000000,
"cumulative_training_bytes": 355007491,
"metrics": {
"loss": 0.48128996009526825,
"ce_loss": 0.4712899696320114,
"lb_loss": 0.9999999974658975
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 356000000,
"cumulative_training_bytes": 356005562,
"metrics": {
"loss": 0.4812372408046703,
"ce_loss": 0.47123725034141345,
"lb_loss": 0.9999999976190702
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 357000000,
"cumulative_training_bytes": 357002412,
"metrics": {
"loss": 0.48114877785292665,
"ce_loss": 0.4711487873896698,
"lb_loss": 0.999999997993044
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 358000000,
"cumulative_training_bytes": 358000907,
"metrics": {
"loss": 0.48121007079241046,
"ce_loss": 0.4712100803291536,
"lb_loss": 0.9999999978512432
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 359000000,
"cumulative_training_bytes": 359004191,
"metrics": {
"loss": 0.4811665489170358,
"ce_loss": 0.47116655845377897,
"lb_loss": 0.9999999974155167
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 360000000,
"cumulative_training_bytes": 360005688,
"metrics": {
"loss": 0.4811812123998154,
"ce_loss": 0.47118122193655854,
"lb_loss": 0.9999999971682614
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 361000000,
"cumulative_training_bytes": 361001697,
"metrics": {
"loss": 0.4811540463328827,
"ce_loss": 0.4711540558696259,
"lb_loss": 0.9999999971379121
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 362000000,
"cumulative_training_bytes": 362004914,
"metrics": {
"loss": 0.4811149918098768,
"ce_loss": 0.47111500134662,
"lb_loss": 0.9999999970623081
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 363000000,
"cumulative_training_bytes": 363004386,
"metrics": {
"loss": 0.48118021857850135,
"ce_loss": 0.4711802281152445,
"lb_loss": 0.9999999971469161
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 364000000,
"cumulative_training_bytes": 364001806,
"metrics": {
"loss": 0.4812275201877158,
"ce_loss": 0.47122752972445897,
"lb_loss": 0.9999999971802533
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 365000000,
"cumulative_training_bytes": 365007448,
"metrics": {
"loss": 0.48119603568250163,
"ce_loss": 0.4711960452192448,
"lb_loss": 0.9999999972121579
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 366000000,
"cumulative_training_bytes": 366007819,
"metrics": {
"loss": 0.4811812530045694,
"ce_loss": 0.47118126254131254,
"lb_loss": 0.9999999974109408
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 367000000,
"cumulative_training_bytes": 367003761,
"metrics": {
"loss": 0.4811495695974758,
"ce_loss": 0.47114957913421895,
"lb_loss": 0.9999999974880315
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 368000000,
"cumulative_training_bytes": 368001579,
"metrics": {
"loss": 0.48113743278591636,
"ce_loss": 0.4711374423226595,
"lb_loss": 0.9999999974816806
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 369000000,
"cumulative_training_bytes": 369002487,
"metrics": {
"loss": 0.4811002396259621,
"ce_loss": 0.47110024916270526,
"lb_loss": 0.9999999975400848
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 370000000,
"cumulative_training_bytes": 370006595,
"metrics": {
"loss": 0.4810897184785962,
"ce_loss": 0.4710897280153394,
"lb_loss": 0.9999999973698308
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 371000000,
"cumulative_training_bytes": 371003993,
"metrics": {
"loss": 0.4811078026436618,
"ce_loss": 0.471107812180405,
"lb_loss": 0.9999999973422351
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 372000000,
"cumulative_training_bytes": 372005909,
"metrics": {
"loss": 0.4811544351843724,
"ce_loss": 0.47115444472111556,
"lb_loss": 0.9999999974828317
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 373000000,
"cumulative_training_bytes": 373004014,
"metrics": {
"loss": 0.4811229425263636,
"ce_loss": 0.4711229520631068,
"lb_loss": 0.9999999972570289
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 374000000,
"cumulative_training_bytes": 374004738,
"metrics": {
"loss": 0.4811517313740692,
"ce_loss": 0.47115174091081236,
"lb_loss": 0.9999999972347778
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 375000000,
"cumulative_training_bytes": 375000531,
"metrics": {
"loss": 0.4811597229526303,
"ce_loss": 0.4711597324893735,
"lb_loss": 0.9999999972461354
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 376000000,
"cumulative_training_bytes": 376004266,
"metrics": {
"loss": 0.48114152894214535,
"ce_loss": 0.4711415384788885,
"lb_loss": 0.9999999971499042
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 377000000,
"cumulative_training_bytes": 377004301,
"metrics": {
"loss": 0.48106536643293246,
"ce_loss": 0.4710653759696756,
"lb_loss": 0.9999999970260729
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 378000000,
"cumulative_training_bytes": 378002475,
"metrics": {
"loss": 0.48104782418438746,
"ce_loss": 0.4710478337211306,
"lb_loss": 0.9999999969180638
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 379000000,
"cumulative_training_bytes": 379003430,
"metrics": {
"loss": 0.4810510765495538,
"ce_loss": 0.471051086086297,
"lb_loss": 0.9999999969760663
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 380000000,
"cumulative_training_bytes": 380004459,
"metrics": {
"loss": 0.4810564429904666,
"ce_loss": 0.4710564525272098,
"lb_loss": 0.9999999969922458
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 381000000,
"cumulative_training_bytes": 381002688,
"metrics": {
"loss": 0.4810578564424302,
"ce_loss": 0.4710578659791734,
"lb_loss": 0.9999999970938646
}
},
{
"epoch": 8,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4810454771880319,
"ce_loss": 0.4710454867247751,
"lb_loss": 0.9999999970791426,
"training_bytes": 47653397
},
"cumulative_training_bytes": 381227214,
"training_bytes_this_epoch": 47653397
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 382000000,
"cumulative_training_bytes": 382003673,
"metrics": {
"loss": 0.47660348675038555,
"ce_loss": 0.4666034962871287,
"lb_loss": 0.99999999763942
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 383000000,
"cumulative_training_bytes": 383005458,
"metrics": {
"loss": 0.47849743119601545,
"ce_loss": 0.4684974407327586,
"lb_loss": 0.9999999971739177
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 384000000,
"cumulative_training_bytes": 384002363,
"metrics": {
"loss": 0.4783237560019309,
"ce_loss": 0.46832376553867405,
"lb_loss": 0.9999999990120777
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 385000000,
"cumulative_training_bytes": 385005560,
"metrics": {
"loss": 0.4780487670859749,
"ce_loss": 0.46804877662271804,
"lb_loss": 0.999999998549177
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 386000000,
"cumulative_training_bytes": 386002393,
"metrics": {
"loss": 0.47821703500579293,
"ce_loss": 0.4682170445425361,
"lb_loss": 0.999999998373549
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 387000000,
"cumulative_training_bytes": 387003159,
"metrics": {
"loss": 0.4784002930163072,
"ce_loss": 0.4684003025530504,
"lb_loss": 0.9999999972332061
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 388000000,
"cumulative_training_bytes": 388000670,
"metrics": {
"loss": 0.47836776124945596,
"ce_loss": 0.4683677707861991,
"lb_loss": 0.999999998044644
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 389000000,
"cumulative_training_bytes": 389001849,
"metrics": {
"loss": 0.47809766595586767,
"ce_loss": 0.46809767549261083,
"lb_loss": 0.9999999978272198
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 390000000,
"cumulative_training_bytes": 390002629,
"metrics": {
"loss": 0.47807849722590956,
"ce_loss": 0.4680785067626527,
"lb_loss": 0.99999999849168
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 391000000,
"cumulative_training_bytes": 391006180,
"metrics": {
"loss": 0.47816267644602894,
"ce_loss": 0.4681626859827721,
"lb_loss": 0.9999999978062504
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 392000000,
"cumulative_training_bytes": 392004730,
"metrics": {
"loss": 0.478326905857433,
"ce_loss": 0.46832691539417615,
"lb_loss": 0.9999999977563593
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 393000000,
"cumulative_training_bytes": 393003485,
"metrics": {
"loss": 0.47875887407274353,
"ce_loss": 0.4687588836094867,
"lb_loss": 0.999999997250208
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 394000000,
"cumulative_training_bytes": 394000207,
"metrics": {
"loss": 0.4788389284500753,
"ce_loss": 0.46883893798681847,
"lb_loss": 0.9999999963930083
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 395000000,
"cumulative_training_bytes": 395003542,
"metrics": {
"loss": 0.4789453029632568,
"ce_loss": 0.4689453125,
"lb_loss": 0.9999999956621064
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 396000000,
"cumulative_training_bytes": 396000470,
"metrics": {
"loss": 0.47899286611092523,
"ce_loss": 0.4689928756476684,
"lb_loss": 0.9999999959542961
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 397000000,
"cumulative_training_bytes": 397005545,
"metrics": {
"loss": 0.4790248115452559,
"ce_loss": 0.46902482108199905,
"lb_loss": 0.9999999956619618
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 398000000,
"cumulative_training_bytes": 398006179,
"metrics": {
"loss": 0.47905739380495393,
"ce_loss": 0.4690574033416971,
"lb_loss": 0.9999999957308717
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 399000000,
"cumulative_training_bytes": 399000874,
"metrics": {
"loss": 0.4790856049658408,
"ce_loss": 0.46908561450258396,
"lb_loss": 0.9999999954051544
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 400000000,
"cumulative_training_bytes": 400002954,
"metrics": {
"loss": 0.4790493687755275,
"ce_loss": 0.46904937831227067,
"lb_loss": 0.9999999951159667
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 401000000,
"cumulative_training_bytes": 401003161,
"metrics": {
"loss": 0.4790388382178058,
"ce_loss": 0.46903884775454896,
"lb_loss": 0.9999999944848974
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 402000000,
"cumulative_training_bytes": 402007261,
"metrics": {
"loss": 0.47903641041535705,
"ce_loss": 0.4690364199521002,
"lb_loss": 0.9999999945973682
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 403000000,
"cumulative_training_bytes": 403002393,
"metrics": {
"loss": 0.47906452325158316,
"ce_loss": 0.4690645327883263,
"lb_loss": 0.99999999488624
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 404000000,
"cumulative_training_bytes": 404005905,
"metrics": {
"loss": 0.4790848118918283,
"ce_loss": 0.46908482142857144,
"lb_loss": 0.9999999945704676
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 405000000,
"cumulative_training_bytes": 405004571,
"metrics": {
"loss": 0.4790582130880556,
"ce_loss": 0.46905822262479874,
"lb_loss": 0.9999999946442203
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 406000000,
"cumulative_training_bytes": 406003780,
"metrics": {
"loss": 0.47909593810622436,
"ce_loss": 0.4690959476429675,
"lb_loss": 0.9999999946936204
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 407000000,
"cumulative_training_bytes": 407001187,
"metrics": {
"loss": 0.4790645799877739,
"ce_loss": 0.4690645895245171,
"lb_loss": 0.9999999947037774
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 408000000,
"cumulative_training_bytes": 408003413,
"metrics": {
"loss": 0.47919748849672367,
"ce_loss": 0.46919749803346683,
"lb_loss": 0.9999999948169874
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 409000000,
"cumulative_training_bytes": 409005684,
"metrics": {
"loss": 0.4793487980177096,
"ce_loss": 0.46934880755445274,
"lb_loss": 0.9999999948891524
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 410000000,
"cumulative_training_bytes": 410004022,
"metrics": {
"loss": 0.4794526581056198,
"ce_loss": 0.469452667642363,
"lb_loss": 0.9999999954638296
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 411000000,
"cumulative_training_bytes": 411000093,
"metrics": {
"loss": 0.479534154075654,
"ce_loss": 0.46953416361239714,
"lb_loss": 0.9999999953702153
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 412000000,
"cumulative_training_bytes": 412003081,
"metrics": {
"loss": 0.479515883720286,
"ce_loss": 0.46951589325702914,
"lb_loss": 0.9999999951948483
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 413000000,
"cumulative_training_bytes": 413002872,
"metrics": {
"loss": 0.47955900778253396,
"ce_loss": 0.4695590173192771,
"lb_loss": 0.9999999952316284
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 414000000,
"cumulative_training_bytes": 414004353,
"metrics": {
"loss": 0.47949838415484564,
"ce_loss": 0.4694983936915888,
"lb_loss": 0.9999999954460937
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 415000000,
"cumulative_training_bytes": 415000550,
"metrics": {
"loss": 0.47950582459950714,
"ce_loss": 0.4695058341362503,
"lb_loss": 0.9999999952840578
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 416000000,
"cumulative_training_bytes": 416004238,
"metrics": {
"loss": 0.47945263384722864,
"ce_loss": 0.4694526433839718,
"lb_loss": 0.9999999952625986
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 417000000,
"cumulative_training_bytes": 417004582,
"metrics": {
"loss": 0.47940799961351366,
"ce_loss": 0.4694080091502568,
"lb_loss": 0.9999999954964812
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 418000000,
"cumulative_training_bytes": 418000528,
"metrics": {
"loss": 0.4794532293480965,
"ce_loss": 0.46945323888483964,
"lb_loss": 0.9999999955563385
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 419000000,
"cumulative_training_bytes": 419005587,
"metrics": {
"loss": 0.4794745432201999,
"ce_loss": 0.46947455275694305,
"lb_loss": 0.9999999953964384
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 420000000,
"cumulative_training_bytes": 420003878,
"metrics": {
"loss": 0.47951866702822527,
"ce_loss": 0.46951867656496843,
"lb_loss": 0.9999999954449057
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 421000000,
"cumulative_training_bytes": 421005882,
"metrics": {
"loss": 0.4795383837500711,
"ce_loss": 0.46953839328681424,
"lb_loss": 0.999999995399141
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 422000000,
"cumulative_training_bytes": 422001156,
"metrics": {
"loss": 0.4795866248529282,
"ce_loss": 0.46958663438967135,
"lb_loss": 0.9999999953547554
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 423000000,
"cumulative_training_bytes": 423005818,
"metrics": {
"loss": 0.47955937749129934,
"ce_loss": 0.4695593870280425,
"lb_loss": 0.9999999953570429
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 424000000,
"cumulative_training_bytes": 424005361,
"metrics": {
"loss": 0.47958130087134704,
"ce_loss": 0.4695813104080902,
"lb_loss": 0.9999999956472714
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 425000000,
"cumulative_training_bytes": 425000747,
"metrics": {
"loss": 0.4796109096516423,
"ce_loss": 0.4696109191883855,
"lb_loss": 0.9999999956836937
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 426000000,
"cumulative_training_bytes": 426002610,
"metrics": {
"loss": 0.479592292318592,
"ce_loss": 0.4695923018553352,
"lb_loss": 0.9999999958517287
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 427000000,
"cumulative_training_bytes": 427005445,
"metrics": {
"loss": 0.4796241437497596,
"ce_loss": 0.46962415328650275,
"lb_loss": 0.9999999959326484
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 428000000,
"cumulative_training_bytes": 428003339,
"metrics": {
"loss": 0.47960202485923,
"ce_loss": 0.46960203439597314,
"lb_loss": 0.9999999960387157
}
},
{
"epoch": 9,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4795953822932697,
"ce_loss": 0.46959539183001286,
"lb_loss": 0.9999999961119078,
"training_bytes": 47653406
},
"cumulative_training_bytes": 428880620,
"training_bytes_this_epoch": 47653406
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 429000000,
"cumulative_training_bytes": 429002185,
"metrics": {
"loss": 0.47997069358825684,
"ce_loss": 0.469970703125,
"lb_loss": 0.9999999925494194
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 430000000,
"cumulative_training_bytes": 430003863,
"metrics": {
"loss": 0.476517847606114,
"ce_loss": 0.46651785714285715,
"lb_loss": 0.9999999862138916
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 431000000,
"cumulative_training_bytes": 431005314,
"metrics": {
"loss": 0.47611537940210574,
"ce_loss": 0.4661153889388489,
"lb_loss": 0.9999999903517661
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 432000000,
"cumulative_training_bytes": 432001200,
"metrics": {
"loss": 0.4762607135024725,
"ce_loss": 0.4662607230392157,
"lb_loss": 0.9999999903580722
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 433000000,
"cumulative_training_bytes": 433004578,
"metrics": {
"loss": 0.4760105424113088,
"ce_loss": 0.46601055194805197,
"lb_loss": 0.9999999920379695
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 434000000,
"cumulative_training_bytes": 434007002,
"metrics": {
"loss": 0.4758844354259434,
"ce_loss": 0.46588444496268655,
"lb_loss": 0.9999999925271789
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 435000000,
"cumulative_training_bytes": 435000122,
"metrics": {
"loss": 0.4758044139613795,
"ce_loss": 0.4658044234981227,
"lb_loss": 0.9999999929130898
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 436000000,
"cumulative_training_bytes": 436000104,
"metrics": {
"loss": 0.4760492176137945,
"ce_loss": 0.46604922715053765,
"lb_loss": 0.9999999934627164
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 437000000,
"cumulative_training_bytes": 437004132,
"metrics": {
"loss": 0.4761286426781485,
"ce_loss": 0.46612865221489164,
"lb_loss": 0.9999999932586641
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 438000000,
"cumulative_training_bytes": 438006173,
"metrics": {
"loss": 0.47627581365956556,
"ce_loss": 0.4662758231963087,
"lb_loss": 0.999999993699509
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 439000000,
"cumulative_training_bytes": 439002702,
"metrics": {
"loss": 0.4764555856788393,
"ce_loss": 0.46645559521558244,
"lb_loss": 0.999999995310981
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 440000000,
"cumulative_training_bytes": 440005689,
"metrics": {
"loss": 0.4764527498404076,
"ce_loss": 0.46645275937715075,
"lb_loss": 0.9999999952414736
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 441000000,
"cumulative_training_bytes": 441000190,
"metrics": {
"loss": 0.47656120729838003,
"ce_loss": 0.4665612168351232,
"lb_loss": 0.9999999955192971
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 442000000,
"cumulative_training_bytes": 442005265,
"metrics": {
"loss": 0.4766282175636069,
"ce_loss": 0.46662822710035007,
"lb_loss": 0.9999999957226539
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 443000000,
"cumulative_training_bytes": 443003341,
"metrics": {
"loss": 0.47682650638466606,
"ce_loss": 0.4668265159214092,
"lb_loss": 0.9999999960586631
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 444000000,
"cumulative_training_bytes": 444007542,
"metrics": {
"loss": 0.47688483516214347,
"ce_loss": 0.46688484469888664,
"lb_loss": 0.9999999959278203
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 445000000,
"cumulative_training_bytes": 445007323,
"metrics": {
"loss": 0.47694339620874593,
"ce_loss": 0.4669434057454891,
"lb_loss": 0.999999996235794
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 446000000,
"cumulative_training_bytes": 446000020,
"metrics": {
"loss": 0.4770030092467989,
"ce_loss": 0.46700301878354206,
"lb_loss": 0.9999999962413887
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 447000000,
"cumulative_training_bytes": 447002709,
"metrics": {
"loss": 0.4770996947302615,
"ce_loss": 0.46709970426700465,
"lb_loss": 0.9999999963990435
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 448000000,
"cumulative_training_bytes": 448004545,
"metrics": {
"loss": 0.47733323205653144,
"ce_loss": 0.4673332415932746,
"lb_loss": 0.9999999962538314
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 449000000,
"cumulative_training_bytes": 449006756,
"metrics": {
"loss": 0.4773651958645501,
"ce_loss": 0.4673652054012933,
"lb_loss": 0.9999999961684348
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 450000000,
"cumulative_training_bytes": 450004303,
"metrics": {
"loss": 0.47728603160135036,
"ce_loss": 0.4672860411380935,
"lb_loss": 0.9999999964785947
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 451000000,
"cumulative_training_bytes": 451006142,
"metrics": {
"loss": 0.47733279215010804,
"ce_loss": 0.4673328016868512,
"lb_loss": 0.9999999963082244
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 452000000,
"cumulative_training_bytes": 452005227,
"metrics": {
"loss": 0.477412995262992,
"ce_loss": 0.4674130047997352,
"lb_loss": 0.9999999966853425
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 453000000,
"cumulative_training_bytes": 453006680,
"metrics": {
"loss": 0.47739544374688625,
"ce_loss": 0.4673954532836294,
"lb_loss": 0.9999999969365633
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 454000000,
"cumulative_training_bytes": 454003038,
"metrics": {
"loss": 0.4773985136579552,
"ce_loss": 0.4673985231946984,
"lb_loss": 0.999999997039745
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 455000000,
"cumulative_training_bytes": 455007108,
"metrics": {
"loss": 0.4773937331383228,
"ce_loss": 0.46739374267506595,
"lb_loss": 0.9999999970835114
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 456000000,
"cumulative_training_bytes": 456006436,
"metrics": {
"loss": 0.47737679419596923,
"ce_loss": 0.4673768037327124,
"lb_loss": 0.9999999970559377
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 457000000,
"cumulative_training_bytes": 457003387,
"metrics": {
"loss": 0.47746527769440306,
"ce_loss": 0.4674652872311462,
"lb_loss": 0.9999999969816324
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 458000000,
"cumulative_training_bytes": 458006870,
"metrics": {
"loss": 0.4774874419945397,
"ce_loss": 0.46748745153128285,
"lb_loss": 0.9999999967565296
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 459000000,
"cumulative_training_bytes": 459003680,
"metrics": {
"loss": 0.47746660761501075,
"ce_loss": 0.4674666171517539,
"lb_loss": 0.9999999966970482
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 460000000,
"cumulative_training_bytes": 460005767,
"metrics": {
"loss": 0.47751757579536014,
"ce_loss": 0.4675175853321033,
"lb_loss": 0.999999996686187
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 461000000,
"cumulative_training_bytes": 461002393,
"metrics": {
"loss": 0.4775231765478814,
"ce_loss": 0.46752318608462456,
"lb_loss": 0.9999999966183777
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 462000000,
"cumulative_training_bytes": 462006702,
"metrics": {
"loss": 0.4775458794195675,
"ce_loss": 0.46754588895631066,
"lb_loss": 0.9999999969136754
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 463000000,
"cumulative_training_bytes": 463005861,
"metrics": {
"loss": 0.47760887816799097,
"ce_loss": 0.46760888770473413,
"lb_loss": 0.9999999970980014
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 464000000,
"cumulative_training_bytes": 464001234,
"metrics": {
"loss": 0.4776744304567166,
"ce_loss": 0.46767443999345976,
"lb_loss": 0.9999999970632986
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 465000000,
"cumulative_training_bytes": 465004728,
"metrics": {
"loss": 0.4777479740137972,
"ce_loss": 0.46774798355054037,
"lb_loss": 0.9999999972338595
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 466000000,
"cumulative_training_bytes": 466001776,
"metrics": {
"loss": 0.477748253636076,
"ce_loss": 0.46774826317281915,
"lb_loss": 0.9999999973694795
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 467000000,
"cumulative_training_bytes": 467002535,
"metrics": {
"loss": 0.477785585970285,
"ce_loss": 0.46778559550702814,
"lb_loss": 0.9999999974626135
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 468000000,
"cumulative_training_bytes": 468004269,
"metrics": {
"loss": 0.4778305581114666,
"ce_loss": 0.46783056764820974,
"lb_loss": 0.9999999974810011
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 469000000,
"cumulative_training_bytes": 469006908,
"metrics": {
"loss": 0.4778490640515819,
"ce_loss": 0.46784907358832506,
"lb_loss": 0.999999997464358
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 470000000,
"cumulative_training_bytes": 470004722,
"metrics": {
"loss": 0.47785450519240796,
"ce_loss": 0.4678545147291511,
"lb_loss": 0.9999999974369559
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 471000000,
"cumulative_training_bytes": 471002302,
"metrics": {
"loss": 0.4778841831431914,
"ce_loss": 0.4678841926799346,
"lb_loss": 0.9999999974975149
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 472000000,
"cumulative_training_bytes": 472007065,
"metrics": {
"loss": 0.4779119462372671,
"ce_loss": 0.4679119557740103,
"lb_loss": 0.9999999975768749
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 473000000,
"cumulative_training_bytes": 473006577,
"metrics": {
"loss": 0.4779428524080868,
"ce_loss": 0.46794286194483,
"lb_loss": 0.9999999976112642
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 474000000,
"cumulative_training_bytes": 474004871,
"metrics": {
"loss": 0.4779470685299348,
"ce_loss": 0.46794707806667796,
"lb_loss": 0.9999999978662063
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 475000000,
"cumulative_training_bytes": 475004074,
"metrics": {
"loss": 0.47796439586249323,
"ce_loss": 0.4679644053992364,
"lb_loss": 0.9999999978924653
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 476000000,
"cumulative_training_bytes": 476004262,
"metrics": {
"loss": 0.4779159292835363,
"ce_loss": 0.4679159388202795,
"lb_loss": 0.9999999980532119
}
},
{
"epoch": 10,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.477952924669555,
"ce_loss": 0.4679529342062982,
"lb_loss": 0.9999999979314583,
"training_bytes": 47653389
},
"cumulative_training_bytes": 476534009,
"training_bytes_this_epoch": 47653389
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 477000000,
"cumulative_training_bytes": 477001318,
"metrics": {
"loss": 0.47401126095506013,
"ce_loss": 0.4640112704918033,
"lb_loss": 1.0000000039085013
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 478000000,
"cumulative_training_bytes": 478000704,
"metrics": {
"loss": 0.47289061546325684,
"ce_loss": 0.462890625,
"lb_loss": 0.9999999971914042
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 479000000,
"cumulative_training_bytes": 479001718,
"metrics": {
"loss": 0.4746253786620146,
"ce_loss": 0.46462538819875776,
"lb_loss": 0.999999995927633
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 480000000,
"cumulative_training_bytes": 480006675,
"metrics": {
"loss": 0.47415389498864313,
"ce_loss": 0.4641539045253863,
"lb_loss": 0.9999999944737415
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 481000000,
"cumulative_training_bytes": 481002129,
"metrics": {
"loss": 0.4743378710378709,
"ce_loss": 0.46433788057461406,
"lb_loss": 0.9999999941724447
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 482000000,
"cumulative_training_bytes": 482003899,
"metrics": {
"loss": 0.47443615555429325,
"ce_loss": 0.4644361650910364,
"lb_loss": 0.999999993655528
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 483000000,
"cumulative_training_bytes": 483006727,
"metrics": {
"loss": 0.47455712803722133,
"ce_loss": 0.4645571375739645,
"lb_loss": 0.9999999937221143
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 484000000,
"cumulative_training_bytes": 484005792,
"metrics": {
"loss": 0.47493579348579784,
"ce_loss": 0.464935803022541,
"lb_loss": 0.999999994320459
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 485000000,
"cumulative_training_bytes": 485004003,
"metrics": {
"loss": 0.47497618564860944,
"ce_loss": 0.4649761951853526,
"lb_loss": 0.999999995473065
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 486000000,
"cumulative_training_bytes": 486002749,
"metrics": {
"loss": 0.4750080810781436,
"ce_loss": 0.46500809061488674,
"lb_loss": 0.9999999949364986
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 487000000,
"cumulative_training_bytes": 487001523,
"metrics": {
"loss": 0.47491094168946474,
"ce_loss": 0.4649109512262079,
"lb_loss": 0.9999999950256738
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 488000000,
"cumulative_training_bytes": 488005452,
"metrics": {
"loss": 0.4753404965547121,
"ce_loss": 0.4653405060914553,
"lb_loss": 0.9999999949069462
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 489000000,
"cumulative_training_bytes": 489002785,
"metrics": {
"loss": 0.4756307490627654,
"ce_loss": 0.4656307585995086,
"lb_loss": 0.9999999955333129
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 490000000,
"cumulative_training_bytes": 490003743,
"metrics": {
"loss": 0.4758719347497833,
"ce_loss": 0.46587194428652645,
"lb_loss": 0.9999999956287668
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 491000000,
"cumulative_training_bytes": 491003982,
"metrics": {
"loss": 0.4757608510376348,
"ce_loss": 0.465760860574378,
"lb_loss": 0.9999999958980392
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 492000000,
"cumulative_training_bytes": 492005024,
"metrics": {
"loss": 0.4758957206612766,
"ce_loss": 0.4658957301980198,
"lb_loss": 0.9999999958984923
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 493000000,
"cumulative_training_bytes": 493004726,
"metrics": {
"loss": 0.4758511895911638,
"ce_loss": 0.46585119912790696,
"lb_loss": 0.9999999962019366
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 494000000,
"cumulative_training_bytes": 494006508,
"metrics": {
"loss": 0.47593461343563737,
"ce_loss": 0.46593462297238053,
"lb_loss": 0.9999999958974445
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 495000000,
"cumulative_training_bytes": 495003897,
"metrics": {
"loss": 0.4759487020974335,
"ce_loss": 0.46594871163417667,
"lb_loss": 0.9999999958219888
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 496000000,
"cumulative_training_bytes": 496001530,
"metrics": {
"loss": 0.4761391648338312,
"ce_loss": 0.46613917437057434,
"lb_loss": 0.9999999956152366
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 497000000,
"cumulative_training_bytes": 497005237,
"metrics": {
"loss": 0.47611367359082885,
"ce_loss": 0.466113683127572,
"lb_loss": 0.999999995807829
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 498000000,
"cumulative_training_bytes": 498004778,
"metrics": {
"loss": 0.4762298741116163,
"ce_loss": 0.4662298836483595,
"lb_loss": 0.9999999961099679
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 499000000,
"cumulative_training_bytes": 499000975,
"metrics": {
"loss": 0.47627962683851244,
"ce_loss": 0.4662796363752556,
"lb_loss": 0.9999999963635885
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 500000000,
"cumulative_training_bytes": 500006091,
"metrics": {
"loss": 0.47628707753503496,
"ce_loss": 0.46628708707177813,
"lb_loss": 0.999999996227308
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 501000000,
"cumulative_training_bytes": 501000599,
"metrics": {
"loss": 0.4763518341643523,
"ce_loss": 0.46635184370109545,
"lb_loss": 0.9999999964181246
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 502000000,
"cumulative_training_bytes": 502000922,
"metrics": {
"loss": 0.4764045962209237,
"ce_loss": 0.46640460575766685,
"lb_loss": 0.9999999965412819
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 503000000,
"cumulative_training_bytes": 503003452,
"metrics": {
"loss": 0.47639347198625365,
"ce_loss": 0.4663934815229968,
"lb_loss": 0.999999996551655
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 504000000,
"cumulative_training_bytes": 504004414,
"metrics": {
"loss": 0.4764996507893438,
"ce_loss": 0.466499660326087,
"lb_loss": 0.9999999965446583
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 505000000,
"cumulative_training_bytes": 505006535,
"metrics": {
"loss": 0.47654373122018073,
"ce_loss": 0.4665437407569239,
"lb_loss": 0.9999999965541816
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 506000000,
"cumulative_training_bytes": 506002066,
"metrics": {
"loss": 0.476615203018466,
"ce_loss": 0.46661521255520916,
"lb_loss": 0.9999999968099359
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 507000000,
"cumulative_training_bytes": 507007439,
"metrics": {
"loss": 0.47662020089039253,
"ce_loss": 0.4666202104271357,
"lb_loss": 0.9999999967951272
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 508000000,
"cumulative_training_bytes": 508001710,
"metrics": {
"loss": 0.47666380585255125,
"ce_loss": 0.4666638153892944,
"lb_loss": 0.9999999968529908
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 509000000,
"cumulative_training_bytes": 509001905,
"metrics": {
"loss": 0.47671074559176424,
"ce_loss": 0.4667107551285074,
"lb_loss": 0.9999999967674915
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 510000000,
"cumulative_training_bytes": 510006108,
"metrics": {
"loss": 0.4766892023514087,
"ce_loss": 0.4666892118881519,
"lb_loss": 0.999999996632583
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 511000000,
"cumulative_training_bytes": 511002126,
"metrics": {
"loss": 0.47665283225579347,
"ce_loss": 0.46665284179253663,
"lb_loss": 0.9999999965179872
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 512000000,
"cumulative_training_bytes": 512004051,
"metrics": {
"loss": 0.47662187693238467,
"ce_loss": 0.46662188646912783,
"lb_loss": 0.9999999966157122
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 513000000,
"cumulative_training_bytes": 513002597,
"metrics": {
"loss": 0.47660819736161886,
"ce_loss": 0.466608206898362,
"lb_loss": 0.9999999966830678
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 514000000,
"cumulative_training_bytes": 514004929,
"metrics": {
"loss": 0.4766356019235062,
"ce_loss": 0.46663561146024934,
"lb_loss": 0.9999999970276858
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 515000000,
"cumulative_training_bytes": 515000236,
"metrics": {
"loss": 0.47663549837685426,
"ce_loss": 0.4666355079135974,
"lb_loss": 0.9999999970452804
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 516000000,
"cumulative_training_bytes": 516005322,
"metrics": {
"loss": 0.4766574106708626,
"ce_loss": 0.46665742020760576,
"lb_loss": 0.9999999971088163
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 517000000,
"cumulative_training_bytes": 517005666,
"metrics": {
"loss": 0.47670004072342714,
"ce_loss": 0.4667000502601703,
"lb_loss": 0.9999999968872504
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 518000000,
"cumulative_training_bytes": 518006360,
"metrics": {
"loss": 0.47669515147433716,
"ce_loss": 0.46669516101108033,
"lb_loss": 0.9999999968519061
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 519000000,
"cumulative_training_bytes": 519004375,
"metrics": {
"loss": 0.4766806289889557,
"ce_loss": 0.46668063852569885,
"lb_loss": 0.9999999968289685
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 520000000,
"cumulative_training_bytes": 520002998,
"metrics": {
"loss": 0.476721162943877,
"ce_loss": 0.4667211724806202,
"lb_loss": 0.9999999969336582
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 521000000,
"cumulative_training_bytes": 521004681,
"metrics": {
"loss": 0.47674204208199283,
"ce_loss": 0.466742051618736,
"lb_loss": 0.9999999968796605
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 522000000,
"cumulative_training_bytes": 522005267,
"metrics": {
"loss": 0.4767827219174502,
"ce_loss": 0.46678273145419336,
"lb_loss": 0.9999999969083478
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 523000000,
"cumulative_training_bytes": 523004469,
"metrics": {
"loss": 0.47677980619743615,
"ce_loss": 0.4667798157341793,
"lb_loss": 0.9999999967781273
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 524000000,
"cumulative_training_bytes": 524001588,
"metrics": {
"loss": 0.4768444411811146,
"ce_loss": 0.46684445071785774,
"lb_loss": 0.9999999967404462
}
},
{
"epoch": 11,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4768498985810268,
"ce_loss": 0.46684990811776994,
"lb_loss": 0.9999999967918451,
"training_bytes": 47653417
},
"cumulative_training_bytes": 524187426,
"training_bytes_this_epoch": 47653417
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 525000000,
"cumulative_training_bytes": 525004346,
"metrics": {
"loss": 0.47418661429503256,
"ce_loss": 0.4641866238317757,
"lb_loss": 1.0000000077987385
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 526000000,
"cumulative_training_bytes": 526005081,
"metrics": {
"loss": 0.4731121884674585,
"ce_loss": 0.46311219800420167,
"lb_loss": 1.000000004007035
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 527000000,
"cumulative_training_bytes": 527001768,
"metrics": {
"loss": 0.47275793034097424,
"ce_loss": 0.4627579398777174,
"lb_loss": 1.0000000037252903
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 528000000,
"cumulative_training_bytes": 528002785,
"metrics": {
"loss": 0.4726709869492006,
"ce_loss": 0.4626709964859438,
"lb_loss": 0.9999999992818718
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 529000000,
"cumulative_training_bytes": 529007587,
"metrics": {
"loss": 0.47280677702923457,
"ce_loss": 0.46280678656597773,
"lb_loss": 0.9999999995261952
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 530000000,
"cumulative_training_bytes": 530006938,
"metrics": {
"loss": 0.4731630250027305,
"ce_loss": 0.46316303453947366,
"lb_loss": 0.9999999993725827
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 531000000,
"cumulative_training_bytes": 531005656,
"metrics": {
"loss": 0.47286867023853774,
"ce_loss": 0.4628686797752809,
"lb_loss": 0.9999999978569116
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 532000000,
"cumulative_training_bytes": 532005754,
"metrics": {
"loss": 0.47314695116355066,
"ce_loss": 0.4631469607002938,
"lb_loss": 0.9999999973145801
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 533000000,
"cumulative_training_bytes": 533000212,
"metrics": {
"loss": 0.4732809010844558,
"ce_loss": 0.46328091062119897,
"lb_loss": 0.999999997307175
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 534000000,
"cumulative_training_bytes": 534000062,
"metrics": {
"loss": 0.47332667615802654,
"ce_loss": 0.4633266856947697,
"lb_loss": 0.9999999969290347
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 535000000,
"cumulative_training_bytes": 535000930,
"metrics": {
"loss": 0.47340379490730783,
"ce_loss": 0.463403804444051,
"lb_loss": 0.9999999974250118
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 536000000,
"cumulative_training_bytes": 536002691,
"metrics": {
"loss": 0.47355009577757956,
"ce_loss": 0.4635501053143227,
"lb_loss": 0.9999999983389503
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 537000000,
"cumulative_training_bytes": 537004758,
"metrics": {
"loss": 0.4735789928527431,
"ce_loss": 0.46357900238948624,
"lb_loss": 0.9999999973651471
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 538000000,
"cumulative_training_bytes": 538000966,
"metrics": {
"loss": 0.4737231837143655,
"ce_loss": 0.46372319325110867,
"lb_loss": 0.9999999968611745
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 539000000,
"cumulative_training_bytes": 539001127,
"metrics": {
"loss": 0.4737859239257891,
"ce_loss": 0.4637859334625323,
"lb_loss": 0.9999999970120669
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 540000000,
"cumulative_training_bytes": 540006347,
"metrics": {
"loss": 0.47382085505667404,
"ce_loss": 0.4638208645934172,
"lb_loss": 0.9999999970861234
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 541000000,
"cumulative_training_bytes": 541005694,
"metrics": {
"loss": 0.4739520771155099,
"ce_loss": 0.46395208665225307,
"lb_loss": 0.9999999976668187
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 542000000,
"cumulative_training_bytes": 542006701,
"metrics": {
"loss": 0.4740450398618823,
"ce_loss": 0.46404504939862545,
"lb_loss": 0.9999999976188866
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 543000000,
"cumulative_training_bytes": 543005643,
"metrics": {
"loss": 0.4742385020533341,
"ce_loss": 0.4642385115900773,
"lb_loss": 0.9999999976730192
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 544000000,
"cumulative_training_bytes": 544004689,
"metrics": {
"loss": 0.4743254720681236,
"ce_loss": 0.46432548160486675,
"lb_loss": 0.9999999975366176
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 545000000,
"cumulative_training_bytes": 545005233,
"metrics": {
"loss": 0.474323673342992,
"ce_loss": 0.4643236828797352,
"lb_loss": 0.9999999969748287
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 546000000,
"cumulative_training_bytes": 546004328,
"metrics": {
"loss": 0.47431586884163524,
"ce_loss": 0.4643158783783784,
"lb_loss": 0.9999999970291823
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 547000000,
"cumulative_training_bytes": 547003640,
"metrics": {
"loss": 0.4743756198690977,
"ce_loss": 0.4643756294058409,
"lb_loss": 0.9999999968386929
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 548000000,
"cumulative_training_bytes": 548000417,
"metrics": {
"loss": 0.47438953995666305,
"ce_loss": 0.4643895494934062,
"lb_loss": 0.999999996817507
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 549000000,
"cumulative_training_bytes": 549005076,
"metrics": {
"loss": 0.4745037520373309,
"ce_loss": 0.4645037615740741,
"lb_loss": 0.9999999967254238
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 550000000,
"cumulative_training_bytes": 550005242,
"metrics": {
"loss": 0.4746403744368552,
"ce_loss": 0.46464038397359836,
"lb_loss": 0.9999999965697712
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 551000000,
"cumulative_training_bytes": 551001002,
"metrics": {
"loss": 0.4746668936694836,
"ce_loss": 0.4646669032062268,
"lb_loss": 0.9999999966120182
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 552000000,
"cumulative_training_bytes": 552003787,
"metrics": {
"loss": 0.47473511401777224,
"ce_loss": 0.4647351235545154,
"lb_loss": 0.999999996668573
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 553000000,
"cumulative_training_bytes": 553002274,
"metrics": {
"loss": 0.4748042938780854,
"ce_loss": 0.4648043034148286,
"lb_loss": 0.9999999963410383
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 554000000,
"cumulative_training_bytes": 554003365,
"metrics": {
"loss": 0.47482116968128457,
"ce_loss": 0.46482117921802774,
"lb_loss": 0.9999999963110633
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 555000000,
"cumulative_training_bytes": 555006363,
"metrics": {
"loss": 0.47514410925207673,
"ce_loss": 0.4651441187888199,
"lb_loss": 0.9999999963274654
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 556000000,
"cumulative_training_bytes": 556003044,
"metrics": {
"loss": 0.47538666690730014,
"ce_loss": 0.4653866764440433,
"lb_loss": 0.9999999961698098
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 557000000,
"cumulative_training_bytes": 557004116,
"metrics": {
"loss": 0.47563164366554334,
"ce_loss": 0.4656316532022865,
"lb_loss": 0.9999999961060895
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 558000000,
"cumulative_training_bytes": 558000464,
"metrics": {
"loss": 0.4758357828941898,
"ce_loss": 0.465835792430933,
"lb_loss": 0.9999999960857457
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 559000000,
"cumulative_training_bytes": 559004482,
"metrics": {
"loss": 0.47595539375113893,
"ce_loss": 0.4659554032878821,
"lb_loss": 0.9999999961854076
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 560000000,
"cumulative_training_bytes": 560002449,
"metrics": {
"loss": 0.47605645694818305,
"ce_loss": 0.4660564664849262,
"lb_loss": 0.9999999963806459
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 561000000,
"cumulative_training_bytes": 561007064,
"metrics": {
"loss": 0.47612497096450473,
"ce_loss": 0.4661249805012479,
"lb_loss": 0.9999999961693354
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 562000000,
"cumulative_training_bytes": 562002742,
"metrics": {
"loss": 0.4761972426402516,
"ce_loss": 0.46619725217699476,
"lb_loss": 0.9999999964391717
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 563000000,
"cumulative_training_bytes": 563003402,
"metrics": {
"loss": 0.47623431614386447,
"ce_loss": 0.46623432568060763,
"lb_loss": 0.9999999965076781
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 564000000,
"cumulative_training_bytes": 564003696,
"metrics": {
"loss": 0.47626276089594916,
"ce_loss": 0.4662627704326923,
"lb_loss": 0.9999999963664091
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 565000000,
"cumulative_training_bytes": 565007276,
"metrics": {
"loss": 0.47630263068085205,
"ce_loss": 0.4663026402175952,
"lb_loss": 0.9999999963550714
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 566000000,
"cumulative_training_bytes": 566006079,
"metrics": {
"loss": 0.47630903127537916,
"ce_loss": 0.4663090408121223,
"lb_loss": 0.9999999962126329
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 567000000,
"cumulative_training_bytes": 567007618,
"metrics": {
"loss": 0.4764510868956603,
"ce_loss": 0.46645109643240346,
"lb_loss": 0.9999999961627911
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 568000000,
"cumulative_training_bytes": 568004347,
"metrics": {
"loss": 0.47650604713050604,
"ce_loss": 0.4665060566672492,
"lb_loss": 0.9999999959478841
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 569000000,
"cumulative_training_bytes": 569001858,
"metrics": {
"loss": 0.47657124815293556,
"ce_loss": 0.4665712576896787,
"lb_loss": 0.9999999959869736
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 570000000,
"cumulative_training_bytes": 570002921,
"metrics": {
"loss": 0.4766179731851355,
"ce_loss": 0.46661798272187865,
"lb_loss": 0.999999996034991
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 571000000,
"cumulative_training_bytes": 571001763,
"metrics": {
"loss": 0.4765936612263846,
"ce_loss": 0.46659367076312774,
"lb_loss": 0.9999999959145516
}
},
{
"epoch": 12,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.47662866575245993,
"ce_loss": 0.4666286752892031,
"lb_loss": 0.9999999958629167,
"training_bytes": 47653410
},
"cumulative_training_bytes": 571840836,
"training_bytes_this_epoch": 47653410
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 572000000,
"cumulative_training_bytes": 572003805,
"metrics": {
"loss": 0.46656621070135207,
"ce_loss": 0.45656622023809523,
"lb_loss": 0.9999999886467343
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 573000000,
"cumulative_training_bytes": 573001015,
"metrics": {
"loss": 0.47032956413875354,
"ce_loss": 0.4603295736754967,
"lb_loss": 0.999999998421069
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 574000000,
"cumulative_training_bytes": 574004305,
"metrics": {
"loss": 0.4710136761902072,
"ce_loss": 0.46101368572695034,
"lb_loss": 0.999999999365908
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 575000000,
"cumulative_training_bytes": 575005268,
"metrics": {
"loss": 0.4715617338046612,
"ce_loss": 0.4615617433414044,
"lb_loss": 0.9999999987011094
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 576000000,
"cumulative_training_bytes": 576005624,
"metrics": {
"loss": 0.47210074873531566,
"ce_loss": 0.4621007582720588,
"lb_loss": 1.0000000005478369
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 577000000,
"cumulative_training_bytes": 577007614,
"metrics": {
"loss": 0.47244212009288644,
"ce_loss": 0.4624421296296296,
"lb_loss": 1.0000000013245476
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 578000000,
"cumulative_training_bytes": 578000160,
"metrics": {
"loss": 0.47264556499741833,
"ce_loss": 0.4626455745341615,
"lb_loss": 1.0000000017029897
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 579000000,
"cumulative_training_bytes": 579000407,
"metrics": {
"loss": 0.47249581265577023,
"ce_loss": 0.4624958221925134,
"lb_loss": 1.0000000014662105
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 580000000,
"cumulative_training_bytes": 580003744,
"metrics": {
"loss": 0.472489363704345,
"ce_loss": 0.4624893732410882,
"lb_loss": 1.0000000013419432
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 581000000,
"cumulative_training_bytes": 581005566,
"metrics": {
"loss": 0.4733132214574089,
"ce_loss": 0.46331323099415206,
"lb_loss": 1.0000000010456955
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 582000000,
"cumulative_training_bytes": 582005047,
"metrics": {
"loss": 0.47375311075338494,
"ce_loss": 0.4637531202901281,
"lb_loss": 1.0000000007635863
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 583000000,
"cumulative_training_bytes": 583001026,
"metrics": {
"loss": 0.4739308522511772,
"ce_loss": 0.46393086178792037,
"lb_loss": 1.000000001022729
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 584000000,
"cumulative_training_bytes": 584004132,
"metrics": {
"loss": 0.47396557216980595,
"ce_loss": 0.4639655817065491,
"lb_loss": 1.000000000638085
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 585000000,
"cumulative_training_bytes": 585007650,
"metrics": {
"loss": 0.4742566674254661,
"ce_loss": 0.4642566769622093,
"lb_loss": 1.0000000008316927
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 586000000,
"cumulative_training_bytes": 586004555,
"metrics": {
"loss": 0.4743739347200136,
"ce_loss": 0.46437394425675677,
"lb_loss": 1.000000000483281
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 587000000,
"cumulative_training_bytes": 587000297,
"metrics": {
"loss": 0.474306137874873,
"ce_loss": 0.4643061474116162,
"lb_loss": 1.000000000752584
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 588000000,
"cumulative_training_bytes": 588002607,
"metrics": {
"loss": 0.4741220741913478,
"ce_loss": 0.46412208372809094,
"lb_loss": 1.0000000010447048
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 589000000,
"cumulative_training_bytes": 589003684,
"metrics": {
"loss": 0.47414856037851105,
"ce_loss": 0.4641485699152542,
"lb_loss": 1.0000000009304917
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 590000000,
"cumulative_training_bytes": 590003539,
"metrics": {
"loss": 0.4741836451198097,
"ce_loss": 0.46418365465655287,
"lb_loss": 1.0000000006279461
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 591000000,
"cumulative_training_bytes": 591000699,
"metrics": {
"loss": 0.4741765723150347,
"ce_loss": 0.46417658185177785,
"lb_loss": 1.0000000000476266
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 592000000,
"cumulative_training_bytes": 592006371,
"metrics": {
"loss": 0.4741393104139022,
"ce_loss": 0.4641393199506454,
"lb_loss": 1.0000000001810316
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 593000000,
"cumulative_training_bytes": 593001411,
"metrics": {
"loss": 0.47416184262497896,
"ce_loss": 0.4641618521617221,
"lb_loss": 0.9999999998490475
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 594000000,
"cumulative_training_bytes": 594006305,
"metrics": {
"loss": 0.47413737712128795,
"ce_loss": 0.4641373866580311,
"lb_loss": 0.9999999995882235
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 595000000,
"cumulative_training_bytes": 595002552,
"metrics": {
"loss": 0.47414900905829815,
"ce_loss": 0.4641490185950413,
"lb_loss": 0.9999999991724314
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 596000000,
"cumulative_training_bytes": 596006850,
"metrics": {
"loss": 0.47415680486439754,
"ce_loss": 0.4641568144011407,
"lb_loss": 0.9999999985079953
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 597000000,
"cumulative_training_bytes": 597001688,
"metrics": {
"loss": 0.4742202382949671,
"ce_loss": 0.4642202478317103,
"lb_loss": 0.999999998893523
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 598000000,
"cumulative_training_bytes": 598005915,
"metrics": {
"loss": 0.47454308382000254,
"ce_loss": 0.4645430933567457,
"lb_loss": 0.999999998848725
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 599000000,
"cumulative_training_bytes": 599003238,
"metrics": {
"loss": 0.47462788903951847,
"ce_loss": 0.46462789857626163,
"lb_loss": 0.9999999990085497
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 600000000,
"cumulative_training_bytes": 600001112,
"metrics": {
"loss": 0.4747784238849534,
"ce_loss": 0.46477843342169656,
"lb_loss": 0.9999999986711308
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 601000000,
"cumulative_training_bytes": 601004350,
"metrics": {
"loss": 0.47477400431203604,
"ce_loss": 0.4647740138487792,
"lb_loss": 0.9999999984664597
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 602000000,
"cumulative_training_bytes": 602000426,
"metrics": {
"loss": 0.47477283493520406,
"ce_loss": 0.4647728444719472,
"lb_loss": 0.9999999983354885
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 603000000,
"cumulative_training_bytes": 603006064,
"metrics": {
"loss": 0.47476264018684405,
"ce_loss": 0.4647626497235872,
"lb_loss": 0.9999999979204276
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 604000000,
"cumulative_training_bytes": 604000130,
"metrics": {
"loss": 0.4748116534096854,
"ce_loss": 0.46481166294642856,
"lb_loss": 0.9999999979706038
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 605000000,
"cumulative_training_bytes": 605006216,
"metrics": {
"loss": 0.4747801545419915,
"ce_loss": 0.4647801640787347,
"lb_loss": 0.9999999979081261
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 606000000,
"cumulative_training_bytes": 606000518,
"metrics": {
"loss": 0.4748008338840145,
"ce_loss": 0.46480084342075767,
"lb_loss": 0.9999999976484157
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 607000000,
"cumulative_training_bytes": 607002557,
"metrics": {
"loss": 0.47481949653359656,
"ce_loss": 0.4648195060703397,
"lb_loss": 0.9999999974948396
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 608000000,
"cumulative_training_bytes": 608005994,
"metrics": {
"loss": 0.47476268763666357,
"ce_loss": 0.46476269717340674,
"lb_loss": 0.9999999975138439
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 609000000,
"cumulative_training_bytes": 609006940,
"metrics": {
"loss": 0.4747737273297587,
"ce_loss": 0.46477373686650186,
"lb_loss": 0.999999997568661
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 610000000,
"cumulative_training_bytes": 610003956,
"metrics": {
"loss": 0.4748123964311605,
"ce_loss": 0.4648124059679037,
"lb_loss": 0.9999999975847266
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 611000000,
"cumulative_training_bytes": 611004009,
"metrics": {
"loss": 0.47486817361415595,
"ce_loss": 0.4648681831508991,
"lb_loss": 0.999999997518415
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 612000000,
"cumulative_training_bytes": 612000579,
"metrics": {
"loss": 0.47488283274785464,
"ce_loss": 0.4648828422845978,
"lb_loss": 0.9999999974549294
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 613000000,
"cumulative_training_bytes": 613001932,
"metrics": {
"loss": 0.4748709832333889,
"ce_loss": 0.46487099277013205,
"lb_loss": 0.9999999975058499
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 614000000,
"cumulative_training_bytes": 614003917,
"metrics": {
"loss": 0.47490614968620526,
"ce_loss": 0.4649061592229484,
"lb_loss": 0.9999999975976341
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 615000000,
"cumulative_training_bytes": 615001118,
"metrics": {
"loss": 0.47489570370376766,
"ce_loss": 0.4648957132405108,
"lb_loss": 0.9999999976635994
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 616000000,
"cumulative_training_bytes": 616000487,
"metrics": {
"loss": 0.4748908077959545,
"ce_loss": 0.46489081733269766,
"lb_loss": 0.9999999977575923
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 617000000,
"cumulative_training_bytes": 617005273,
"metrics": {
"loss": 0.474879167548356,
"ce_loss": 0.4648791770850992,
"lb_loss": 0.9999999978275982
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 618000000,
"cumulative_training_bytes": 618005046,
"metrics": {
"loss": 0.4748712766425569,
"ce_loss": 0.46487128617930007,
"lb_loss": 0.9999999979436447
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 619000000,
"cumulative_training_bytes": 619006994,
"metrics": {
"loss": 0.4749004952319257,
"ce_loss": 0.46490050476866884,
"lb_loss": 0.999999997909967
}
},
{
"epoch": 13,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.47487888667148304,
"ce_loss": 0.4648788962082262,
"lb_loss": 0.9999999979889178,
"training_bytes": 47653395
},
"cumulative_training_bytes": 619494231,
"training_bytes_this_epoch": 47653395
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 620000000,
"cumulative_training_bytes": 620000323,
"metrics": {
"loss": 0.46723839008446894,
"ce_loss": 0.4572383996212121,
"lb_loss": 0.9999999918720939
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 621000000,
"cumulative_training_bytes": 621005489,
"metrics": {
"loss": 0.4674079853871147,
"ce_loss": 0.45740799492385786,
"lb_loss": 0.9999999990923151
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 622000000,
"cumulative_training_bytes": 622007499,
"metrics": {
"loss": 0.46797802971630564,
"ce_loss": 0.4579780392530488,
"lb_loss": 1.0000000014537718
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 623000000,
"cumulative_training_bytes": 623001614,
"metrics": {
"loss": 0.4684726296553966,
"ce_loss": 0.45847263919213976,
"lb_loss": 1.0
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 624000000,
"cumulative_training_bytes": 624004859,
"metrics": {
"loss": 0.46862292127819744,
"ce_loss": 0.4586229308149406,
"lb_loss": 0.9999999988868402
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 625000000,
"cumulative_training_bytes": 625002853,
"metrics": {
"loss": 0.4686638247817547,
"ce_loss": 0.4586638343184979,
"lb_loss": 0.9999999982591133
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 626000000,
"cumulative_training_bytes": 626004267,
"metrics": {
"loss": 0.4690142367867862,
"ce_loss": 0.4590142463235294,
"lb_loss": 0.9999999991585227
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 627000000,
"cumulative_training_bytes": 627004393,
"metrics": {
"loss": 0.4692574043663181,
"ce_loss": 0.45925741390306124,
"lb_loss": 0.9999999992093261
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 628000000,
"cumulative_training_bytes": 628005962,
"metrics": {
"loss": 0.4694238625379643,
"ce_loss": 0.4594238720747075,
"lb_loss": 0.9999999987660605
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 629000000,
"cumulative_training_bytes": 629001986,
"metrics": {
"loss": 0.4696217672561658,
"ce_loss": 0.45962177679290894,
"lb_loss": 0.9999999979827598
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 630000000,
"cumulative_training_bytes": 630004200,
"metrics": {
"loss": 0.4695898199672685,
"ce_loss": 0.45958982950401167,
"lb_loss": 0.9999999979131853
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 631000000,
"cumulative_training_bytes": 631006243,
"metrics": {
"loss": 0.4696956566583301,
"ce_loss": 0.45969566619507324,
"lb_loss": 0.9999999982936087
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 632000000,
"cumulative_training_bytes": 632002297,
"metrics": {
"loss": 0.46990467987808526,
"ce_loss": 0.4599046894148284,
"lb_loss": 0.999999998393012
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 633000000,
"cumulative_training_bytes": 633003176,
"metrics": {
"loss": 0.46981904374078237,
"ce_loss": 0.45981905327752554,
"lb_loss": 0.9999999986807144
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 634000000,
"cumulative_training_bytes": 634003223,
"metrics": {
"loss": 0.46992636394450354,
"ce_loss": 0.4599263734812467,
"lb_loss": 0.9999999983311959
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 635000000,
"cumulative_training_bytes": 635006003,
"metrics": {
"loss": 0.4700912006287707,
"ce_loss": 0.46009121016551385,
"lb_loss": 0.999999997732432
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 636000000,
"cumulative_training_bytes": 636004090,
"metrics": {
"loss": 0.4703039710317023,
"ce_loss": 0.46030398056844546,
"lb_loss": 0.9999999974000758
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 637000000,
"cumulative_training_bytes": 637000138,
"metrics": {
"loss": 0.47044514910658286,
"ce_loss": 0.460445158643326,
"lb_loss": 0.999999997443652
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 638000000,
"cumulative_training_bytes": 638002271,
"metrics": {
"loss": 0.4705825973030747,
"ce_loss": 0.4605826068398179,
"lb_loss": 0.9999999972368707
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 639000000,
"cumulative_training_bytes": 639000648,
"metrics": {
"loss": 0.47067206184974547,
"ce_loss": 0.46067207138648864,
"lb_loss": 0.9999999969565578
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 640000000,
"cumulative_training_bytes": 640003100,
"metrics": {
"loss": 0.47072955597502375,
"ce_loss": 0.4607295655117669,
"lb_loss": 0.9999999970164279
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 641000000,
"cumulative_training_bytes": 641002993,
"metrics": {
"loss": 0.47074683985762805,
"ce_loss": 0.4607468493943712,
"lb_loss": 0.9999999968785598
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 642000000,
"cumulative_training_bytes": 642000407,
"metrics": {
"loss": 0.4708656697278125,
"ce_loss": 0.4608656792645557,
"lb_loss": 0.9999999967123077
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 643000000,
"cumulative_training_bytes": 643000364,
"metrics": {
"loss": 0.47096550138242244,
"ce_loss": 0.4609655109191656,
"lb_loss": 0.9999999967166933
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 644000000,
"cumulative_training_bytes": 644000345,
"metrics": {
"loss": 0.47106452298358203,
"ce_loss": 0.4610645325203252,
"lb_loss": 0.9999999966078658
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 645000000,
"cumulative_training_bytes": 645004285,
"metrics": {
"loss": 0.4711627833440018,
"ce_loss": 0.46116279288074497,
"lb_loss": 0.9999999969383015
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 646000000,
"cumulative_training_bytes": 646004246,
"metrics": {
"loss": 0.47127110227684066,
"ce_loss": 0.4612711118135838,
"lb_loss": 0.9999999970886748
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 647000000,
"cumulative_training_bytes": 647002840,
"metrics": {
"loss": 0.4712862239242596,
"ce_loss": 0.4612862334610028,
"lb_loss": 0.9999999970280693
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 648000000,
"cumulative_training_bytes": 648003976,
"metrics": {
"loss": 0.4713705267774197,
"ce_loss": 0.46137053631416286,
"lb_loss": 0.9999999969564949
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 649000000,
"cumulative_training_bytes": 649000496,
"metrics": {
"loss": 0.47139901831316594,
"ce_loss": 0.4613990278499091,
"lb_loss": 0.9999999971211467
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 650000000,
"cumulative_training_bytes": 650001399,
"metrics": {
"loss": 0.4714765372676265,
"ce_loss": 0.46147654680436967,
"lb_loss": 0.9999999974403831
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 651000000,
"cumulative_training_bytes": 651002904,
"metrics": {
"loss": 0.4715733364090385,
"ce_loss": 0.46157334594578164,
"lb_loss": 0.9999999976088582
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 652000000,
"cumulative_training_bytes": 652005210,
"metrics": {
"loss": 0.47166369952310605,
"ce_loss": 0.4616637090598492,
"lb_loss": 0.999999997626488
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 653000000,
"cumulative_training_bytes": 653002239,
"metrics": {
"loss": 0.4716997182581814,
"ce_loss": 0.4616997277949246,
"lb_loss": 0.9999999975743881
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 654000000,
"cumulative_training_bytes": 654006342,
"metrics": {
"loss": 0.4717738008128684,
"ce_loss": 0.46177381034961157,
"lb_loss": 0.9999999973670757
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 655000000,
"cumulative_training_bytes": 655002681,
"metrics": {
"loss": 0.47183420027986955,
"ce_loss": 0.4618342098166127,
"lb_loss": 0.9999999973637643
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 656000000,
"cumulative_training_bytes": 656006752,
"metrics": {
"loss": 0.471994152803303,
"ce_loss": 0.46199416234004614,
"lb_loss": 0.999999997549295
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 657000000,
"cumulative_training_bytes": 657006557,
"metrics": {
"loss": 0.4720288957562336,
"ce_loss": 0.46202890529297674,
"lb_loss": 0.9999999973592879
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 658000000,
"cumulative_training_bytes": 658003676,
"metrics": {
"loss": 0.47203408391492746,
"ce_loss": 0.4620340934516706,
"lb_loss": 0.9999999974157096
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 659000000,
"cumulative_training_bytes": 659005764,
"metrics": {
"loss": 0.47206756957258034,
"ce_loss": 0.4620675791093235,
"lb_loss": 0.9999999975159918
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 660000000,
"cumulative_training_bytes": 660004372,
"metrics": {
"loss": 0.472095185088895,
"ce_loss": 0.46209519462563814,
"lb_loss": 0.9999999974530819
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 661000000,
"cumulative_training_bytes": 661000728,
"metrics": {
"loss": 0.47210958171625556,
"ce_loss": 0.4621095912529987,
"lb_loss": 0.9999999973271952
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 662000000,
"cumulative_training_bytes": 662006203,
"metrics": {
"loss": 0.47222807999667427,
"ce_loss": 0.46222808953341743,
"lb_loss": 0.9999999972082133
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 663000000,
"cumulative_training_bytes": 663003652,
"metrics": {
"loss": 0.47226043096668935,
"ce_loss": 0.4622604405034325,
"lb_loss": 0.9999999970307842
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 664000000,
"cumulative_training_bytes": 664002530,
"metrics": {
"loss": 0.47231932976126095,
"ce_loss": 0.4623193392980041,
"lb_loss": 0.9999999970874537
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 665000000,
"cumulative_training_bytes": 665004691,
"metrics": {
"loss": 0.47233488024956005,
"ce_loss": 0.4623348897863032,
"lb_loss": 0.9999999971215661
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 666000000,
"cumulative_training_bytes": 666005317,
"metrics": {
"loss": 0.47233496844317124,
"ce_loss": 0.4623349779799144,
"lb_loss": 0.99999999716402
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 667000000,
"cumulative_training_bytes": 667007057,
"metrics": {
"loss": 0.4723920256163592,
"ce_loss": 0.46239203515310234,
"lb_loss": 0.9999999972623169
}
},
{
"epoch": 14,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4724123759870358,
"ce_loss": 0.46241238552377895,
"lb_loss": 0.9999999972610977,
"training_bytes": 47653399
},
"cumulative_training_bytes": 667147630,
"training_bytes_this_epoch": 47653399
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 668000000,
"cumulative_training_bytes": 668001125,
"metrics": {
"loss": 0.46842632974897114,
"ce_loss": 0.4584263392857143,
"lb_loss": 0.9999999936137881
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 669000000,
"cumulative_training_bytes": 669005783,
"metrics": {
"loss": 0.4678189205044091,
"ce_loss": 0.45781893004115226,
"lb_loss": 0.9999999923961153
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 670000000,
"cumulative_training_bytes": 670007110,
"metrics": {
"loss": 0.4679608026290322,
"ce_loss": 0.4579608121657754,
"lb_loss": 0.9999999939439131
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 671000000,
"cumulative_training_bytes": 671002720,
"metrics": {
"loss": 0.46782954155452666,
"ce_loss": 0.4578295510912698,
"lb_loss": 0.9999999952694726
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 672000000,
"cumulative_training_bytes": 672007653,
"metrics": {
"loss": 0.4680907186472191,
"ce_loss": 0.45809072818396224,
"lb_loss": 0.9999999966261521
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 673000000,
"cumulative_training_bytes": 673000283,
"metrics": {
"loss": 0.4684463636682177,
"ce_loss": 0.45844637320496084,
"lb_loss": 0.9999999954090417
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 674000000,
"cumulative_training_bytes": 674004321,
"metrics": {
"loss": 0.46855324088131706,
"ce_loss": 0.4585532504180602,
"lb_loss": 0.999999994484743
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 675000000,
"cumulative_training_bytes": 675006849,
"metrics": {
"loss": 0.4686119797628677,
"ce_loss": 0.4586119892996109,
"lb_loss": 0.9999999941439016
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 676000000,
"cumulative_training_bytes": 676003287,
"metrics": {
"loss": 0.4687279967240513,
"ce_loss": 0.45872800626079446,
"lb_loss": 0.9999999935659926
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 677000000,
"cumulative_training_bytes": 677005219,
"metrics": {
"loss": 0.46872374676659273,
"ce_loss": 0.4587237563033359,
"lb_loss": 0.9999999942198754
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 678000000,
"cumulative_training_bytes": 678005845,
"metrics": {
"loss": 0.4687448702201279,
"ce_loss": 0.45874487975687106,
"lb_loss": 0.9999999951274569
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 679000000,
"cumulative_training_bytes": 679002372,
"metrics": {
"loss": 0.46877505703846667,
"ce_loss": 0.45877506657520983,
"lb_loss": 0.9999999955363855
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 680000000,
"cumulative_training_bytes": 680006963,
"metrics": {
"loss": 0.4687262739453997,
"ce_loss": 0.45872628348214284,
"lb_loss": 0.9999999950329462
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 681000000,
"cumulative_training_bytes": 681000974,
"metrics": {
"loss": 0.468684216900515,
"ce_loss": 0.45868422643725815,
"lb_loss": 0.9999999951894538
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 682000000,
"cumulative_training_bytes": 682006359,
"metrics": {
"loss": 0.46886864708355563,
"ce_loss": 0.4588686566202988,
"lb_loss": 0.9999999951481021
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 683000000,
"cumulative_training_bytes": 683003602,
"metrics": {
"loss": 0.4691238743918283,
"ce_loss": 0.45912388392857145,
"lb_loss": 0.9999999956274584
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 684000000,
"cumulative_training_bytes": 684002920,
"metrics": {
"loss": 0.46929555333207207,
"ce_loss": 0.45929556286881523,
"lb_loss": 0.9999999955898515
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 685000000,
"cumulative_training_bytes": 685007066,
"metrics": {
"loss": 0.4694569631613296,
"ce_loss": 0.4594569726980728,
"lb_loss": 0.9999999959412683
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 686000000,
"cumulative_training_bytes": 686003334,
"metrics": {
"loss": 0.46950572702512294,
"ce_loss": 0.4595057365618661,
"lb_loss": 0.9999999965180248
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 687000000,
"cumulative_training_bytes": 687002972,
"metrics": {
"loss": 0.4696742790322091,
"ce_loss": 0.45967428856895226,
"lb_loss": 0.9999999965100517
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 688000000,
"cumulative_training_bytes": 688007217,
"metrics": {
"loss": 0.46969413696123996,
"ce_loss": 0.4596941464979831,
"lb_loss": 0.9999999968744172
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 689000000,
"cumulative_training_bytes": 689000339,
"metrics": {
"loss": 0.46979924995748157,
"ce_loss": 0.45979925949422473,
"lb_loss": 0.9999999969331876
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 690000000,
"cumulative_training_bytes": 690005975,
"metrics": {
"loss": 0.46974848966841076,
"ce_loss": 0.4597484992051539,
"lb_loss": 0.999999997187331
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 691000000,
"cumulative_training_bytes": 691000262,
"metrics": {
"loss": 0.4697124794350246,
"ce_loss": 0.45971248897176775,
"lb_loss": 0.999999997131634
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 692000000,
"cumulative_training_bytes": 692001292,
"metrics": {
"loss": 0.46974805658086766,
"ce_loss": 0.4597480661176108,
"lb_loss": 0.9999999971555665
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 693000000,
"cumulative_training_bytes": 693001879,
"metrics": {
"loss": 0.46981497825550306,
"ce_loss": 0.4598149877922462,
"lb_loss": 0.9999999971600037
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 694000000,
"cumulative_training_bytes": 694001241,
"metrics": {
"loss": 0.4697958947451035,
"ce_loss": 0.4597959042818467,
"lb_loss": 0.9999999970953565
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 695000000,
"cumulative_training_bytes": 695005882,
"metrics": {
"loss": 0.469869174014081,
"ce_loss": 0.4598691835508242,
"lb_loss": 0.9999999976092643
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 696000000,
"cumulative_training_bytes": 696001244,
"metrics": {
"loss": 0.46987648250569714,
"ce_loss": 0.4598764920424403,
"lb_loss": 0.9999999974071189
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 697000000,
"cumulative_training_bytes": 697002626,
"metrics": {
"loss": 0.4699041015886093,
"ce_loss": 0.45990411112535245,
"lb_loss": 0.9999999974483528
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 698000000,
"cumulative_training_bytes": 698000141,
"metrics": {
"loss": 0.46995632496462125,
"ce_loss": 0.4599563345013644,
"lb_loss": 0.9999999972940585
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 699000000,
"cumulative_training_bytes": 699000609,
"metrics": {
"loss": 0.46998604024095453,
"ce_loss": 0.4599860497776977,
"lb_loss": 0.9999999971923792
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 700000000,
"cumulative_training_bytes": 700001376,
"metrics": {
"loss": 0.46995432803025755,
"ce_loss": 0.4599543375670007,
"lb_loss": 0.9999999970968607
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 701000000,
"cumulative_training_bytes": 701000514,
"metrics": {
"loss": 0.46995761458675833,
"ce_loss": 0.4599576241235015,
"lb_loss": 0.9999999969934775
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 702000000,
"cumulative_training_bytes": 702001728,
"metrics": {
"loss": 0.4700420221251637,
"ce_loss": 0.4600420316619069,
"lb_loss": 0.9999999970669068
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 703000000,
"cumulative_training_bytes": 703001158,
"metrics": {
"loss": 0.4700812518074806,
"ce_loss": 0.4600812613442238,
"lb_loss": 0.999999997110772
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 704000000,
"cumulative_training_bytes": 704004853,
"metrics": {
"loss": 0.47010860920546704,
"ce_loss": 0.4601086187422102,
"lb_loss": 0.9999999968055674
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 705000000,
"cumulative_training_bytes": 705003445,
"metrics": {
"loss": 0.470210351307544,
"ce_loss": 0.4602103608442872,
"lb_loss": 0.9999999965647475
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 706000000,
"cumulative_training_bytes": 706001098,
"metrics": {
"loss": 0.47026130511842923,
"ce_loss": 0.4602613146551724,
"lb_loss": 0.9999999964413384
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 707000000,
"cumulative_training_bytes": 707007487,
"metrics": {
"loss": 0.4703792403672138,
"ce_loss": 0.46037924990395696,
"lb_loss": 0.9999999965079875
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 708000000,
"cumulative_training_bytes": 708003669,
"metrics": {
"loss": 0.47049825695501096,
"ce_loss": 0.4604982664917541,
"lb_loss": 0.9999999966154034
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 709000000,
"cumulative_training_bytes": 709000496,
"metrics": {
"loss": 0.4705623017969561,
"ce_loss": 0.46056231133369924,
"lb_loss": 0.9999999967613283
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 710000000,
"cumulative_training_bytes": 710001059,
"metrics": {
"loss": 0.4706237760850185,
"ce_loss": 0.46062378562176165,
"lb_loss": 0.999999996901027
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 711000000,
"cumulative_training_bytes": 711002755,
"metrics": {
"loss": 0.47068721222477916,
"ce_loss": 0.4606872217615223,
"lb_loss": 0.9999999968678425
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 712000000,
"cumulative_training_bytes": 712004688,
"metrics": {
"loss": 0.47075681223318344,
"ce_loss": 0.4607568217699266,
"lb_loss": 0.9999999968870078
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 713000000,
"cumulative_training_bytes": 713007034,
"metrics": {
"loss": 0.4708357584098345,
"ce_loss": 0.46083576794657766,
"lb_loss": 0.9999999969550882
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 714000000,
"cumulative_training_bytes": 714005137,
"metrics": {
"loss": 0.47085419542649215,
"ce_loss": 0.4608542049632353,
"lb_loss": 0.9999999971366396
}
},
{
"epoch": 15,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4708282861734111,
"ce_loss": 0.46082829571015427,
"lb_loss": 0.999999997136602,
"training_bytes": 47653389
},
"cumulative_training_bytes": 714801019,
"training_bytes_this_epoch": 47653389
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 715000000,
"cumulative_training_bytes": 715000925,
"metrics": {
"loss": 0.4613972260401799,
"ce_loss": 0.4513972355769231,
"lb_loss": 0.9999999954150274
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 716000000,
"cumulative_training_bytes": 716000809,
"metrics": {
"loss": 0.46277442956582093,
"ce_loss": 0.4527744391025641,
"lb_loss": 0.9999999912121357
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 717000000,
"cumulative_training_bytes": 717007272,
"metrics": {
"loss": 0.4638031588660346,
"ce_loss": 0.4538031684027778,
"lb_loss": 0.9999999913076559
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 718000000,
"cumulative_training_bytes": 718001268,
"metrics": {
"loss": 0.4635652632450314,
"ce_loss": 0.45356527278177455,
"lb_loss": 0.9999999925672866
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 719000000,
"cumulative_training_bytes": 719000209,
"metrics": {
"loss": 0.4637926949879369,
"ce_loss": 0.45379270452468007,
"lb_loss": 0.9999999930261476
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 720000000,
"cumulative_training_bytes": 720006860,
"metrics": {
"loss": 0.46441365025707126,
"ce_loss": 0.45441365979381443,
"lb_loss": 0.9999999941185402
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 721000000,
"cumulative_training_bytes": 721002611,
"metrics": {
"loss": 0.4646387226944064,
"ce_loss": 0.4546387322311496,
"lb_loss": 0.9999999942531986
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 722000000,
"cumulative_training_bytes": 722004482,
"metrics": {
"loss": 0.4649596812877249,
"ce_loss": 0.4549596908244681,
"lb_loss": 0.9999999962588574
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 723000000,
"cumulative_training_bytes": 723006477,
"metrics": {
"loss": 0.46530060029052106,
"ce_loss": 0.4553006098272642,
"lb_loss": 0.9999999972729902
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 724000000,
"cumulative_training_bytes": 724002028,
"metrics": {
"loss": 0.46535132528840256,
"ce_loss": 0.4553513348251457,
"lb_loss": 0.9999999976177993
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 725000000,
"cumulative_training_bytes": 725004555,
"metrics": {
"loss": 0.4655385368221157,
"ce_loss": 0.4555385463588589,
"lb_loss": 0.9999999973598543
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 726000000,
"cumulative_training_bytes": 726004006,
"metrics": {
"loss": 0.4657723225719376,
"ce_loss": 0.4557723321086808,
"lb_loss": 0.9999999966592065
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 727000000,
"cumulative_training_bytes": 727000132,
"metrics": {
"loss": 0.4658971283320578,
"ce_loss": 0.455897137868801,
"lb_loss": 0.9999999966699226
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 728000000,
"cumulative_training_bytes": 728001078,
"metrics": {
"loss": 0.4661883605328624,
"ce_loss": 0.45618837006960555,
"lb_loss": 0.9999999971304029
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 729000000,
"cumulative_training_bytes": 729008228,
"metrics": {
"loss": 0.4664945520203689,
"ce_loss": 0.45649456155711204,
"lb_loss": 0.9999999964995117
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 730000000,
"cumulative_training_bytes": 730005608,
"metrics": {
"loss": 0.46660442462740587,
"ce_loss": 0.45660443416414903,
"lb_loss": 0.9999999962184364
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 731000000,
"cumulative_training_bytes": 731002789,
"metrics": {
"loss": 0.46665003033802055,
"ce_loss": 0.4566500398747637,
"lb_loss": 0.9999999959437292
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 732000000,
"cumulative_training_bytes": 732003542,
"metrics": {
"loss": 0.4667574378931634,
"ce_loss": 0.45675744742990654,
"lb_loss": 0.9999999955701043
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 733000000,
"cumulative_training_bytes": 733005914,
"metrics": {
"loss": 0.4668850435540895,
"ce_loss": 0.45688505309083266,
"lb_loss": 0.9999999954130993
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 734000000,
"cumulative_training_bytes": 734004082,
"metrics": {
"loss": 0.4669946388384562,
"ce_loss": 0.4569946483751994,
"lb_loss": 0.999999995555794
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 735000000,
"cumulative_training_bytes": 735003332,
"metrics": {
"loss": 0.46714373572661416,
"ce_loss": 0.4571437452633573,
"lb_loss": 0.9999999958667488
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 736000000,
"cumulative_training_bytes": 736001573,
"metrics": {
"loss": 0.467144097090198,
"ce_loss": 0.45714410662694116,
"lb_loss": 0.9999999956733356
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 737000000,
"cumulative_training_bytes": 737001233,
"metrics": {
"loss": 0.4672433902477396,
"ce_loss": 0.45724339978448275,
"lb_loss": 0.9999999959098882
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 738000000,
"cumulative_training_bytes": 738001247,
"metrics": {
"loss": 0.4672973706595617,
"ce_loss": 0.45729738019630484,
"lb_loss": 0.9999999956343678
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 739000000,
"cumulative_training_bytes": 739005713,
"metrics": {
"loss": 0.46737158577476856,
"ce_loss": 0.4573715953115117,
"lb_loss": 0.9999999955513295
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 740000000,
"cumulative_training_bytes": 740004035,
"metrics": {
"loss": 0.46742578120596645,
"ce_loss": 0.4574257907427096,
"lb_loss": 0.9999999952924643
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 741000000,
"cumulative_training_bytes": 741006421,
"metrics": {
"loss": 0.4675053996438002,
"ce_loss": 0.4575054091805434,
"lb_loss": 0.9999999957860578
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 742000000,
"cumulative_training_bytes": 742000099,
"metrics": {
"loss": 0.467600741589066,
"ce_loss": 0.45760075112580917,
"lb_loss": 0.9999999958563616
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 743000000,
"cumulative_training_bytes": 743007078,
"metrics": {
"loss": 0.4677033062705838,
"ce_loss": 0.457703315807327,
"lb_loss": 0.9999999960209653
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 744000000,
"cumulative_training_bytes": 744007281,
"metrics": {
"loss": 0.4678138212088019,
"ce_loss": 0.45781383074554505,
"lb_loss": 0.9999999962512802
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 745000000,
"cumulative_training_bytes": 745003239,
"metrics": {
"loss": 0.46785139457501557,
"ce_loss": 0.45785140411175873,
"lb_loss": 0.9999999964805164
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 746000000,
"cumulative_training_bytes": 746000194,
"metrics": {
"loss": 0.46787027919608315,
"ce_loss": 0.4578702887328263,
"lb_loss": 0.9999999962856772
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 747000000,
"cumulative_training_bytes": 747000581,
"metrics": {
"loss": 0.4678921757342982,
"ce_loss": 0.45789218527104136,
"lb_loss": 0.9999999963154523
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 748000000,
"cumulative_training_bytes": 748000684,
"metrics": {
"loss": 0.46796389480381484,
"ce_loss": 0.457963904340558,
"lb_loss": 0.9999999963167985
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 749000000,
"cumulative_training_bytes": 749004115,
"metrics": {
"loss": 0.468031408323597,
"ce_loss": 0.4580314178603402,
"lb_loss": 0.9999999968650198
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 750000000,
"cumulative_training_bytes": 750006305,
"metrics": {
"loss": 0.4680848832388602,
"ce_loss": 0.4580848927756034,
"lb_loss": 0.9999999969413577
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 751000000,
"cumulative_training_bytes": 751001387,
"metrics": {
"loss": 0.4680606794437983,
"ce_loss": 0.4580606889805415,
"lb_loss": 0.9999999969491701
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 752000000,
"cumulative_training_bytes": 752007732,
"metrics": {
"loss": 0.4681460488480305,
"ce_loss": 0.4581460583847737,
"lb_loss": 0.9999999967254238
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 753000000,
"cumulative_training_bytes": 753007023,
"metrics": {
"loss": 0.4681992013851005,
"ce_loss": 0.4581992109218437,
"lb_loss": 0.9999999967748989
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 754000000,
"cumulative_training_bytes": 754007126,
"metrics": {
"loss": 0.4682459842877052,
"ce_loss": 0.45824599382444836,
"lb_loss": 0.9999999969039571
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 755000000,
"cumulative_training_bytes": 755004818,
"metrics": {
"loss": 0.46830369154400336,
"ce_loss": 0.45830370108074653,
"lb_loss": 0.9999999969692553
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 756000000,
"cumulative_training_bytes": 756003553,
"metrics": {
"loss": 0.4682925505125042,
"ce_loss": 0.45829256004924734,
"lb_loss": 0.9999999967987842
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 757000000,
"cumulative_training_bytes": 757001249,
"metrics": {
"loss": 0.46836309380203384,
"ce_loss": 0.458363103338777,
"lb_loss": 0.9999999967877736
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 758000000,
"cumulative_training_bytes": 758007153,
"metrics": {
"loss": 0.46841179035691155,
"ce_loss": 0.4584117998936547,
"lb_loss": 0.9999999969046153
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 759000000,
"cumulative_training_bytes": 759008102,
"metrics": {
"loss": 0.4684474506226194,
"ce_loss": 0.45844746015936255,
"lb_loss": 0.9999999969232316
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 760000000,
"cumulative_training_bytes": 760000290,
"metrics": {
"loss": 0.46847780365146624,
"ce_loss": 0.4584778131882094,
"lb_loss": 0.9999999969707957
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 761000000,
"cumulative_training_bytes": 761003802,
"metrics": {
"loss": 0.46849883554943517,
"ce_loss": 0.45849884508617833,
"lb_loss": 0.9999999969871699
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 762000000,
"cumulative_training_bytes": 762007779,
"metrics": {
"loss": 0.4685566738574174,
"ce_loss": 0.45855668339416056,
"lb_loss": 0.9999999969931801
}
},
{
"epoch": 16,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.46860215044879666,
"ce_loss": 0.4586021599855398,
"lb_loss": 0.9999999970504129,
"training_bytes": 47653395
},
"cumulative_training_bytes": 762454414,
"training_bytes_this_epoch": 47653395
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 763000000,
"cumulative_training_bytes": 763000457,
"metrics": {
"loss": 0.4623547439843836,
"ce_loss": 0.45235475352112675,
"lb_loss": 0.9999999974814939
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 764000000,
"cumulative_training_bytes": 764004913,
"metrics": {
"loss": 0.46177133947315785,
"ce_loss": 0.451771349009901,
"lb_loss": 0.99999999527884
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 765000000,
"cumulative_training_bytes": 765007589,
"metrics": {
"loss": 0.46142407079358716,
"ce_loss": 0.4514240803303303,
"lb_loss": 0.9999999946302122
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 766000000,
"cumulative_training_bytes": 766002665,
"metrics": {
"loss": 0.46156417782826764,
"ce_loss": 0.4515641873650108,
"lb_loss": 0.9999999921471202
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 767000000,
"cumulative_training_bytes": 767006783,
"metrics": {
"loss": 0.4617538572561861,
"ce_loss": 0.4517538667929293,
"lb_loss": 0.9999999925745056
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 768000000,
"cumulative_training_bytes": 768006387,
"metrics": {
"loss": 0.46248921460118786,
"ce_loss": 0.452489224137931,
"lb_loss": 0.9999999929296559
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 769000000,
"cumulative_training_bytes": 769007261,
"metrics": {
"loss": 0.46278501893872415,
"ce_loss": 0.4527850284754673,
"lb_loss": 0.9999999917834719
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 770000000,
"cumulative_training_bytes": 770004386,
"metrics": {
"loss": 0.46281795762857125,
"ce_loss": 0.4528179671653144,
"lb_loss": 0.9999999905696505
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 771000000,
"cumulative_training_bytes": 771004582,
"metrics": {
"loss": 0.46292215921885216,
"ce_loss": 0.4529221687555953,
"lb_loss": 0.9999999913554588
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 772000000,
"cumulative_training_bytes": 772000420,
"metrics": {
"loss": 0.4632846208719107,
"ce_loss": 0.45328463040865385,
"lb_loss": 0.9999999921673384
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 773000000,
"cumulative_training_bytes": 773003781,
"metrics": {
"loss": 0.46348049037261146,
"ce_loss": 0.45348049990935463,
"lb_loss": 0.9999999921333972
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 774000000,
"cumulative_training_bytes": 774007887,
"metrics": {
"loss": 0.4635867557778264,
"ce_loss": 0.45358676531456954,
"lb_loss": 0.9999999925000778
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 775000000,
"cumulative_training_bytes": 775006162,
"metrics": {
"loss": 0.4637526105089885,
"ce_loss": 0.4537526200457317,
"lb_loss": 0.999999993857814
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 776000000,
"cumulative_training_bytes": 776001976,
"metrics": {
"loss": 0.46378485981353934,
"ce_loss": 0.4537848693502825,
"lb_loss": 0.9999999951844835
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 777000000,
"cumulative_training_bytes": 777005637,
"metrics": {
"loss": 0.46390480207556617,
"ce_loss": 0.45390481161230933,
"lb_loss": 0.9999999949519475
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 778000000,
"cumulative_training_bytes": 778003374,
"metrics": {
"loss": 0.4639972131983627,
"ce_loss": 0.45399722273510584,
"lb_loss": 0.9999999953337575
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 779000000,
"cumulative_training_bytes": 779004889,
"metrics": {
"loss": 0.4640907123064576,
"ce_loss": 0.45409072184320076,
"lb_loss": 0.9999999953132332
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 780000000,
"cumulative_training_bytes": 780004536,
"metrics": {
"loss": 0.4642408185269289,
"ce_loss": 0.45424082806367205,
"lb_loss": 0.9999999955290018
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 781000000,
"cumulative_training_bytes": 781001322,
"metrics": {
"loss": 0.4643268515342432,
"ce_loss": 0.45432686107098635,
"lb_loss": 0.9999999953752896
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 782000000,
"cumulative_training_bytes": 782005205,
"metrics": {
"loss": 0.4643638559683469,
"ce_loss": 0.45436386550509006,
"lb_loss": 0.9999999956125007
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 783000000,
"cumulative_training_bytes": 783003504,
"metrics": {
"loss": 0.4644428404539424,
"ce_loss": 0.45444284999068557,
"lb_loss": 0.9999999955585213
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 784000000,
"cumulative_training_bytes": 784007012,
"metrics": {
"loss": 0.4645556631986032,
"ce_loss": 0.45455567273534636,
"lb_loss": 0.9999999957228639
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 785000000,
"cumulative_training_bytes": 785003493,
"metrics": {
"loss": 0.4646576464479767,
"ce_loss": 0.45465765598471986,
"lb_loss": 0.9999999954259254
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 786000000,
"cumulative_training_bytes": 786001543,
"metrics": {
"loss": 0.4646982882274845,
"ce_loss": 0.45469829776422765,
"lb_loss": 0.9999999956386846
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 787000000,
"cumulative_training_bytes": 787006273,
"metrics": {
"loss": 0.46475888979263924,
"ce_loss": 0.4547588993293824,
"lb_loss": 0.9999999954264683
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 788000000,
"cumulative_training_bytes": 788002324,
"metrics": {
"loss": 0.46486149188711773,
"ce_loss": 0.4548615014238609,
"lb_loss": 0.9999999956582948
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 789000000,
"cumulative_training_bytes": 789000949,
"metrics": {
"loss": 0.46496146893267404,
"ce_loss": 0.4549614784694172,
"lb_loss": 0.9999999959587157
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 790000000,
"cumulative_training_bytes": 790004342,
"metrics": {
"loss": 0.46504716519219763,
"ce_loss": 0.4550471747289408,
"lb_loss": 0.9999999959567603
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 791000000,
"cumulative_training_bytes": 791006479,
"metrics": {
"loss": 0.46513784085220533,
"ce_loss": 0.4551378503889485,
"lb_loss": 0.9999999957790702
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 792000000,
"cumulative_training_bytes": 792007573,
"metrics": {
"loss": 0.4652086949527619,
"ce_loss": 0.45520870448950507,
"lb_loss": 0.9999999958760196
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 793000000,
"cumulative_training_bytes": 793000192,
"metrics": {
"loss": 0.46531999156002296,
"ce_loss": 0.4553200010967661,
"lb_loss": 0.9999999958012271
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 794000000,
"cumulative_training_bytes": 794002455,
"metrics": {
"loss": 0.4653909947108297,
"ce_loss": 0.45539100424757284,
"lb_loss": 0.9999999960070675
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 795000000,
"cumulative_training_bytes": 795002572,
"metrics": {
"loss": 0.46549575616544453,
"ce_loss": 0.4554957657021877,
"lb_loss": 0.9999999956814325
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 796000000,
"cumulative_training_bytes": 796000398,
"metrics": {
"loss": 0.4655890219343822,
"ce_loss": 0.45558903147112534,
"lb_loss": 0.9999999957279483
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 797000000,
"cumulative_training_bytes": 797004240,
"metrics": {
"loss": 0.4656447468074501,
"ce_loss": 0.45564475634419327,
"lb_loss": 0.999999996076556
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 798000000,
"cumulative_training_bytes": 798001241,
"metrics": {
"loss": 0.4657365903932439,
"ce_loss": 0.45573659992998705,
"lb_loss": 0.9999999960323491
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 799000000,
"cumulative_training_bytes": 799006684,
"metrics": {
"loss": 0.46579053755104227,
"ce_loss": 0.45579054708778544,
"lb_loss": 0.9999999962786121
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 800000000,
"cumulative_training_bytes": 800005908,
"metrics": {
"loss": 0.46589497096386856,
"ce_loss": 0.4558949805006117,
"lb_loss": 0.9999999962564783
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 801000000,
"cumulative_training_bytes": 801004990,
"metrics": {
"loss": 0.4659691438892747,
"ce_loss": 0.45596915342601785,
"lb_loss": 0.9999999963183626
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 802000000,
"cumulative_training_bytes": 802006894,
"metrics": {
"loss": 0.4660316155116502,
"ce_loss": 0.45603162504839334,
"lb_loss": 0.9999999962848053
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 803000000,
"cumulative_training_bytes": 803004532,
"metrics": {
"loss": 0.4660554155482266,
"ce_loss": 0.45605542508496977,
"lb_loss": 0.9999999960833806
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 804000000,
"cumulative_training_bytes": 804005297,
"metrics": {
"loss": 0.4661466299510033,
"ce_loss": 0.4561466394877465,
"lb_loss": 0.9999999963426669
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 805000000,
"cumulative_training_bytes": 805002675,
"metrics": {
"loss": 0.4661778685899439,
"ce_loss": 0.4561778781266871,
"lb_loss": 0.9999999964389523
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 806000000,
"cumulative_training_bytes": 806004255,
"metrics": {
"loss": 0.46626139055995164,
"ce_loss": 0.4562614000966948,
"lb_loss": 0.9999999964371344
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 807000000,
"cumulative_training_bytes": 807001576,
"metrics": {
"loss": 0.4663353263819574,
"ce_loss": 0.45633533591870057,
"lb_loss": 0.9999999966601729
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 808000000,
"cumulative_training_bytes": 808007129,
"metrics": {
"loss": 0.4663545905010783,
"ce_loss": 0.45635460003782147,
"lb_loss": 0.9999999965934477
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 809000000,
"cumulative_training_bytes": 809006953,
"metrics": {
"loss": 0.4663725949212269,
"ce_loss": 0.45637260445797007,
"lb_loss": 0.9999999967741523
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 810000000,
"cumulative_training_bytes": 810005884,
"metrics": {
"loss": 0.46644152880866746,
"ce_loss": 0.4564415383454106,
"lb_loss": 0.9999999967942107
}
},
{
"epoch": 17,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4664472485569265,
"ce_loss": 0.45644725809366965,
"lb_loss": 0.9999999967535388,
"training_bytes": 47653413
},
"cumulative_training_bytes": 810107827,
"training_bytes_this_epoch": 47653413
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 811000000,
"cumulative_training_bytes": 811000418,
"metrics": {
"loss": 0.4582590152477396,
"ce_loss": 0.44825902478448276,
"lb_loss": 0.9999999866403383
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 812000000,
"cumulative_training_bytes": 812004574,
"metrics": {
"loss": 0.458675330685031,
"ce_loss": 0.4486753402217742,
"lb_loss": 0.9999999891846411
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 813000000,
"cumulative_training_bytes": 813005709,
"metrics": {
"loss": 0.45849726948700353,
"ce_loss": 0.4484972790237467,
"lb_loss": 0.9999999976409771
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 814000000,
"cumulative_training_bytes": 814001225,
"metrics": {
"loss": 0.45856258255559473,
"ce_loss": 0.4485625920923379,
"lb_loss": 0.9999999977750722
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 815000000,
"cumulative_training_bytes": 815003952,
"metrics": {
"loss": 0.4582559323646653,
"ce_loss": 0.44825594190140844,
"lb_loss": 0.9999999988806639
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 816000000,
"cumulative_training_bytes": 816003923,
"metrics": {
"loss": 0.4589549417619581,
"ce_loss": 0.4489549512987013,
"lb_loss": 0.9999999982196015
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 817000000,
"cumulative_training_bytes": 817007266,
"metrics": {
"loss": 0.4592534241480515,
"ce_loss": 0.44925343368479465,
"lb_loss": 0.9999999976846142
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 818000000,
"cumulative_training_bytes": 818004841,
"metrics": {
"loss": 0.4594157579220347,
"ce_loss": 0.44941576745877787,
"lb_loss": 0.999999997109377
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 819000000,
"cumulative_training_bytes": 819004493,
"metrics": {
"loss": 0.45983224358287816,
"ce_loss": 0.44983225311962133,
"lb_loss": 0.9999999966145382
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 820000000,
"cumulative_training_bytes": 820003619,
"metrics": {
"loss": 0.46002989910594827,
"ce_loss": 0.45002990864269143,
"lb_loss": 0.9999999966809479
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 821000000,
"cumulative_training_bytes": 821007131,
"metrics": {
"loss": 0.460265253367049,
"ce_loss": 0.45026526290379215,
"lb_loss": 0.9999999963165669
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 822000000,
"cumulative_training_bytes": 822001154,
"metrics": {
"loss": 0.4605283648197562,
"ce_loss": 0.45052837435649934,
"lb_loss": 0.9999999952439023
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 823000000,
"cumulative_training_bytes": 823004363,
"metrics": {
"loss": 0.46066069150183253,
"ce_loss": 0.4506607010385757,
"lb_loss": 0.9999999947646959
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 824000000,
"cumulative_training_bytes": 824005319,
"metrics": {
"loss": 0.46092772483825684,
"ce_loss": 0.450927734375,
"lb_loss": 0.999999995011065
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 825000000,
"cumulative_training_bytes": 825005826,
"metrics": {
"loss": 0.46111766774999885,
"ce_loss": 0.451117677286742,
"lb_loss": 0.9999999947011287
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 826000000,
"cumulative_training_bytes": 826000335,
"metrics": {
"loss": 0.46198096348830503,
"ce_loss": 0.4519809730250482,
"lb_loss": 0.9999999948606785
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 827000000,
"cumulative_training_bytes": 827007343,
"metrics": {
"loss": 0.46243060498997784,
"ce_loss": 0.452430614526721,
"lb_loss": 0.9999999950869359
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 828000000,
"cumulative_training_bytes": 828002571,
"metrics": {
"loss": 0.4624639199134066,
"ce_loss": 0.45246392945014974,
"lb_loss": 0.9999999954601512
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 829000000,
"cumulative_training_bytes": 829005468,
"metrics": {
"loss": 0.4625939747116361,
"ce_loss": 0.45259398424837927,
"lb_loss": 0.9999999954113118
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 830000000,
"cumulative_training_bytes": 830003520,
"metrics": {
"loss": 0.4626671563119866,
"ce_loss": 0.4526671658487298,
"lb_loss": 0.9999999956409228
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 831000000,
"cumulative_training_bytes": 831006123,
"metrics": {
"loss": 0.46278288941891826,
"ce_loss": 0.45278289895566143,
"lb_loss": 0.9999999959156949
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 832000000,
"cumulative_training_bytes": 832004095,
"metrics": {
"loss": 0.4628742745223684,
"ce_loss": 0.45287428405911156,
"lb_loss": 0.9999999959763216
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 833000000,
"cumulative_training_bytes": 833003109,
"metrics": {
"loss": 0.46298912089803945,
"ce_loss": 0.4529891304347826,
"lb_loss": 0.9999999960330019
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 834000000,
"cumulative_training_bytes": 834000963,
"metrics": {
"loss": 0.46311497764685183,
"ce_loss": 0.453114987183595,
"lb_loss": 0.9999999959130427
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 835000000,
"cumulative_training_bytes": 835000614,
"metrics": {
"loss": 0.4632120762950527,
"ce_loss": 0.45321208583179584,
"lb_loss": 0.9999999960226913
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 836000000,
"cumulative_training_bytes": 836003119,
"metrics": {
"loss": 0.46328433497995797,
"ce_loss": 0.45328434451670113,
"lb_loss": 0.9999999961943236
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 837000000,
"cumulative_training_bytes": 837001175,
"metrics": {
"loss": 0.46331735631865106,
"ce_loss": 0.4533173658553942,
"lb_loss": 0.9999999961315517
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 838000000,
"cumulative_training_bytes": 838002124,
"metrics": {
"loss": 0.4633801187563414,
"ce_loss": 0.45338012829308455,
"lb_loss": 0.999999996156122
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 839000000,
"cumulative_training_bytes": 839006609,
"metrics": {
"loss": 0.46342972970166746,
"ce_loss": 0.4534297392384106,
"lb_loss": 0.9999999963684587
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 840000000,
"cumulative_training_bytes": 840000100,
"metrics": {
"loss": 0.46349160778079845,
"ce_loss": 0.4534916173175416,
"lb_loss": 0.9999999965046188
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 841000000,
"cumulative_training_bytes": 841000215,
"metrics": {
"loss": 0.46359052719516963,
"ce_loss": 0.4535905367319128,
"lb_loss": 0.9999999966623762
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 842000000,
"cumulative_training_bytes": 842004728,
"metrics": {
"loss": 0.46364198035406556,
"ce_loss": 0.45364198989080873,
"lb_loss": 0.9999999965670471
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 843000000,
"cumulative_training_bytes": 843006127,
"metrics": {
"loss": 0.4637348307959697,
"ce_loss": 0.4537348403327129,
"lb_loss": 0.9999999966439451
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 844000000,
"cumulative_training_bytes": 844003435,
"metrics": {
"loss": 0.46380382049374463,
"ce_loss": 0.4538038300304878,
"lb_loss": 0.999999996850161
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 845000000,
"cumulative_training_bytes": 845006695,
"metrics": {
"loss": 0.4638635707440202,
"ce_loss": 0.45386358028076335,
"lb_loss": 0.999999997019114
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 846000000,
"cumulative_training_bytes": 846002322,
"metrics": {
"loss": 0.4639164038776309,
"ce_loss": 0.45391641341437405,
"lb_loss": 0.9999999970381995
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 847000000,
"cumulative_training_bytes": 847004506,
"metrics": {
"loss": 0.46396459108566346,
"ce_loss": 0.4539646006224066,
"lb_loss": 0.9999999970939644
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 848000000,
"cumulative_training_bytes": 848000971,
"metrics": {
"loss": 0.4640376325087114,
"ce_loss": 0.45403764204545455,
"lb_loss": 0.9999999971100778
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 849000000,
"cumulative_training_bytes": 849004995,
"metrics": {
"loss": 0.4640902132294446,
"ce_loss": 0.45409022276618777,
"lb_loss": 0.9999999971142014
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 850000000,
"cumulative_training_bytes": 850005350,
"metrics": {
"loss": 0.4640859984271793,
"ce_loss": 0.45408600796392246,
"lb_loss": 0.9999999969002382
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 851000000,
"cumulative_training_bytes": 851000453,
"metrics": {
"loss": 0.464119288698606,
"ce_loss": 0.4541192982353492,
"lb_loss": 0.9999999969198873
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 852000000,
"cumulative_training_bytes": 852004187,
"metrics": {
"loss": 0.4641575910891706,
"ce_loss": 0.45415760062591376,
"lb_loss": 0.9999999968846988
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 853000000,
"cumulative_training_bytes": 853003588,
"metrics": {
"loss": 0.4642327954338083,
"ce_loss": 0.45423280497055146,
"lb_loss": 0.9999999971702953
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 854000000,
"cumulative_training_bytes": 854000038,
"metrics": {
"loss": 0.464275811368542,
"ce_loss": 0.4542758209052852,
"lb_loss": 0.9999999970785095
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 855000000,
"cumulative_training_bytes": 855002152,
"metrics": {
"loss": 0.464321046376456,
"ce_loss": 0.45432105591319916,
"lb_loss": 0.9999999970218006
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 856000000,
"cumulative_training_bytes": 856002523,
"metrics": {
"loss": 0.4643201960855458,
"ce_loss": 0.454320205622289,
"lb_loss": 0.9999999968079595
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 857000000,
"cumulative_training_bytes": 857006768,
"metrics": {
"loss": 0.4643526690346854,
"ce_loss": 0.4543526785714286,
"lb_loss": 0.9999999967789164
}
},
{
"epoch": 18,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4643767603258854,
"ce_loss": 0.45437676986262854,
"lb_loss": 0.9999999966960793,
"training_bytes": 47653406
},
"cumulative_training_bytes": 857761233,
"training_bytes_this_epoch": 47653406
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 858000000,
"cumulative_training_bytes": 858005351,
"metrics": {
"loss": 0.45714354515075684,
"ce_loss": 0.4471435546875,
"lb_loss": 1.0000000167638063
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 859000000,
"cumulative_training_bytes": 859002822,
"metrics": {
"loss": 0.4560117574385655,
"ce_loss": 0.44601176697530864,
"lb_loss": 1.0000000051510187
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 860000000,
"cumulative_training_bytes": 860005316,
"metrics": {
"loss": 0.45533248833015105,
"ce_loss": 0.4453324978668942,
"lb_loss": 0.9999999993897135
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 861000000,
"cumulative_training_bytes": 861001866,
"metrics": {
"loss": 0.4553494289975358,
"ce_loss": 0.44534943853427894,
"lb_loss": 1.0000000015500026
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 862000000,
"cumulative_training_bytes": 862004590,
"metrics": {
"loss": 0.4559224013387081,
"ce_loss": 0.44592241087545126,
"lb_loss": 0.9999999984937454
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 863000000,
"cumulative_training_bytes": 863001335,
"metrics": {
"loss": 0.45603777372349075,
"ce_loss": 0.4460377832602339,
"lb_loss": 1.0000000004357064
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 864000000,
"cumulative_training_bytes": 864007195,
"metrics": {
"loss": 0.4564063362046784,
"ce_loss": 0.4464063457414216,
"lb_loss": 0.9999999999269551
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 865000000,
"cumulative_training_bytes": 865006139,
"metrics": {
"loss": 0.45668132965458874,
"ce_loss": 0.4466813391913319,
"lb_loss": 0.9999999995589508
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 866000000,
"cumulative_training_bytes": 866001721,
"metrics": {
"loss": 0.45680093144838696,
"ce_loss": 0.4468009409851301,
"lb_loss": 0.9999999989475016
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 867000000,
"cumulative_training_bytes": 867002974,
"metrics": {
"loss": 0.4570681993489238,
"ce_loss": 0.44706820888566695,
"lb_loss": 0.999999999061733
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 868000000,
"cumulative_training_bytes": 868004509,
"metrics": {
"loss": 0.45706402140940494,
"ce_loss": 0.4470640309461481,
"lb_loss": 0.9999999983950881
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 869000000,
"cumulative_training_bytes": 869007120,
"metrics": {
"loss": 0.45723368426434674,
"ce_loss": 0.4472336938010899,
"lb_loss": 0.9999999979292664
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 870000000,
"cumulative_training_bytes": 870003043,
"metrics": {
"loss": 0.4574648391021805,
"ce_loss": 0.44746484863892366,
"lb_loss": 0.9999999979858255
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 871000000,
"cumulative_training_bytes": 871006700,
"metrics": {
"loss": 0.4577389291561429,
"ce_loss": 0.44773893869288606,
"lb_loss": 0.9999999981039587
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 872000000,
"cumulative_training_bytes": 872007461,
"metrics": {
"loss": 0.4579849114982031,
"ce_loss": 0.44798492103494625,
"lb_loss": 0.9999999977247689
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 873000000,
"cumulative_training_bytes": 873006742,
"metrics": {
"loss": 0.45822795379826436,
"ce_loss": 0.44822796333500753,
"lb_loss": 0.999999997754722
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 874000000,
"cumulative_training_bytes": 874001226,
"metrics": {
"loss": 0.45845995922799043,
"ce_loss": 0.4484599687647336,
"lb_loss": 0.9999999979766457
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 875000000,
"cumulative_training_bytes": 875004724,
"metrics": {
"loss": 0.45866020915775063,
"ce_loss": 0.4486602186944938,
"lb_loss": 0.999999998014943
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 876000000,
"cumulative_training_bytes": 876004731,
"metrics": {
"loss": 0.45884581898822535,
"ce_loss": 0.4488458285249685,
"lb_loss": 0.9999999977488804
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 877000000,
"cumulative_training_bytes": 877006989,
"metrics": {
"loss": 0.45900198554082244,
"ce_loss": 0.4490019950775656,
"lb_loss": 0.999999997818764
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 878000000,
"cumulative_training_bytes": 878005540,
"metrics": {
"loss": 0.45909980962929314,
"ce_loss": 0.4490998191660363,
"lb_loss": 0.9999999976329471
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 879000000,
"cumulative_training_bytes": 879001520,
"metrics": {
"loss": 0.45923634252706363,
"ce_loss": 0.4492363520638068,
"lb_loss": 0.9999999974000858
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 880000000,
"cumulative_training_bytes": 880003773,
"metrics": {
"loss": 0.45938951283847196,
"ce_loss": 0.4493895223752151,
"lb_loss": 0.9999999969017895
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 881000000,
"cumulative_training_bytes": 881006052,
"metrics": {
"loss": 0.4595082352755098,
"ce_loss": 0.449508244812253,
"lb_loss": 0.999999996976576
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 882000000,
"cumulative_training_bytes": 882002195,
"metrics": {
"loss": 0.4596030730201109,
"ce_loss": 0.44960308255685405,
"lb_loss": 0.9999999969124568
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 883000000,
"cumulative_training_bytes": 883005745,
"metrics": {
"loss": 0.4596205058607083,
"ce_loss": 0.44962051539745146,
"lb_loss": 0.9999999968895634
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 884000000,
"cumulative_training_bytes": 884007789,
"metrics": {
"loss": 0.4597436392449901,
"ce_loss": 0.4497436487817333,
"lb_loss": 0.9999999968693213
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 885000000,
"cumulative_training_bytes": 885004439,
"metrics": {
"loss": 0.45980132849390065,
"ce_loss": 0.4498013380306438,
"lb_loss": 0.9999999972518522
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 886000000,
"cumulative_training_bytes": 886001168,
"metrics": {
"loss": 0.4598883227795031,
"ce_loss": 0.44988833231624625,
"lb_loss": 0.9999999972194199
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 887000000,
"cumulative_training_bytes": 887003211,
"metrics": {
"loss": 0.45996510226917614,
"ce_loss": 0.4499651118059193,
"lb_loss": 0.9999999972523789
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 888000000,
"cumulative_training_bytes": 888000146,
"metrics": {
"loss": 0.46011987840957236,
"ce_loss": 0.4501198879463155,
"lb_loss": 0.9999999971624023
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 889000000,
"cumulative_training_bytes": 889004224,
"metrics": {
"loss": 0.4601905158921784,
"ce_loss": 0.4501905254289216,
"lb_loss": 0.9999999969613318
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 890000000,
"cumulative_training_bytes": 890005885,
"metrics": {
"loss": 0.46025722226686644,
"ce_loss": 0.4502572318036096,
"lb_loss": 0.9999999972964885
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 891000000,
"cumulative_training_bytes": 891002684,
"metrics": {
"loss": 0.46033590456714996,
"ce_loss": 0.4503359141038931,
"lb_loss": 0.9999999971577606
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 892000000,
"cumulative_training_bytes": 892004908,
"metrics": {
"loss": 0.4604197900495717,
"ce_loss": 0.45041979958631484,
"lb_loss": 0.999999997360975
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 893000000,
"cumulative_training_bytes": 893001735,
"metrics": {
"loss": 0.4605195505729917,
"ce_loss": 0.4505195601097349,
"lb_loss": 0.9999999973060048
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 894000000,
"cumulative_training_bytes": 894003726,
"metrics": {
"loss": 0.4606263274852302,
"ce_loss": 0.45062633702197336,
"lb_loss": 0.9999999972042614
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 895000000,
"cumulative_training_bytes": 895004512,
"metrics": {
"loss": 0.46070968477349533,
"ce_loss": 0.4507096943102385,
"lb_loss": 0.9999999971202526
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 896000000,
"cumulative_training_bytes": 896007365,
"metrics": {
"loss": 0.46081095768047403,
"ce_loss": 0.4508109672172172,
"lb_loss": 0.9999999972196432
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 897000000,
"cumulative_training_bytes": 897002215,
"metrics": {
"loss": 0.4608837557071593,
"ce_loss": 0.45088376524390245,
"lb_loss": 0.999999997197128
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 898000000,
"cumulative_training_bytes": 898006553,
"metrics": {
"loss": 0.4609418456049996,
"ce_loss": 0.4509418551417428,
"lb_loss": 0.9999999970855415
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 899000000,
"cumulative_training_bytes": 899002574,
"metrics": {
"loss": 0.4610199235072598,
"ce_loss": 0.45101993304400295,
"lb_loss": 0.9999999969566882
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 900000000,
"cumulative_training_bytes": 900001892,
"metrics": {
"loss": 0.4611180545152779,
"ce_loss": 0.45111806405202104,
"lb_loss": 0.9999999968668938
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 901000000,
"cumulative_training_bytes": 901002457,
"metrics": {
"loss": 0.4611618370239863,
"ce_loss": 0.45116184656072944,
"lb_loss": 0.999999996834031
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 902000000,
"cumulative_training_bytes": 902005917,
"metrics": {
"loss": 0.4612114078797649,
"ce_loss": 0.45121141741650805,
"lb_loss": 0.999999996926426
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 903000000,
"cumulative_training_bytes": 903002754,
"metrics": {
"loss": 0.4612944935517659,
"ce_loss": 0.45129450308850905,
"lb_loss": 0.999999996913349
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 904000000,
"cumulative_training_bytes": 904007438,
"metrics": {
"loss": 0.46135489009074027,
"ce_loss": 0.45135489962748343,
"lb_loss": 0.999999997118451
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 905000000,
"cumulative_training_bytes": 905006302,
"metrics": {
"loss": 0.4613947179754124,
"ce_loss": 0.4513947275121556,
"lb_loss": 0.9999999970922208
}
},
{
"epoch": 19,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4614244788348828,
"ce_loss": 0.45142448837162596,
"lb_loss": 0.999999997069566,
"training_bytes": 47653396
},
"cumulative_training_bytes": 905414629,
"training_bytes_this_epoch": 47653396
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 906000000,
"cumulative_training_bytes": 906007139,
"metrics": {
"loss": 0.4505945521515685,
"ce_loss": 0.4405945616883117,
"lb_loss": 0.9999999984518274
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 907000000,
"cumulative_training_bytes": 907003717,
"metrics": {
"loss": 0.45137793430383655,
"ce_loss": 0.4413779438405797,
"lb_loss": 0.9999999956808229
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 908000000,
"cumulative_training_bytes": 908005720,
"metrics": {
"loss": 0.45178184142479527,
"ce_loss": 0.44178185096153844,
"lb_loss": 0.9999999961204077
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 909000000,
"cumulative_training_bytes": 909006677,
"metrics": {
"loss": 0.45235156962103934,
"ce_loss": 0.4423515791577825,
"lb_loss": 0.999999996949869
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 910000000,
"cumulative_training_bytes": 910005425,
"metrics": {
"loss": 0.45257681120616167,
"ce_loss": 0.44257682074290483,
"lb_loss": 0.9999999954226817
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 911000000,
"cumulative_training_bytes": 911000290,
"metrics": {
"loss": 0.45285567890632955,
"ce_loss": 0.4428556884430727,
"lb_loss": 0.9999999962389388
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 912000000,
"cumulative_training_bytes": 912003476,
"metrics": {
"loss": 0.4531254446783731,
"ce_loss": 0.4431254542151163,
"lb_loss": 0.9999999967425368
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 913000000,
"cumulative_training_bytes": 913000462,
"metrics": {
"loss": 0.4532784784683073,
"ce_loss": 0.4432784880050505,
"lb_loss": 0.9999999971100778
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 914000000,
"cumulative_training_bytes": 914002700,
"metrics": {
"loss": 0.4536259382777082,
"ce_loss": 0.44362594781445136,
"lb_loss": 0.999999997447794
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 915000000,
"cumulative_training_bytes": 915005428,
"metrics": {
"loss": 0.4537899265654933,
"ce_loss": 0.44378993610223644,
"lb_loss": 0.999999997191155
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 916000000,
"cumulative_training_bytes": 916000562,
"metrics": {
"loss": 0.4539811997613755,
"ce_loss": 0.4439812092981187,
"lb_loss": 0.999999998059183
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 917000000,
"cumulative_training_bytes": 917005115,
"metrics": {
"loss": 0.45415455713543135,
"ce_loss": 0.4441545666721745,
"lb_loss": 0.9999999975575096
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 918000000,
"cumulative_training_bytes": 918000290,
"metrics": {
"loss": 0.45433057856429154,
"ce_loss": 0.4443305881010347,
"lb_loss": 0.9999999979684357
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 919000000,
"cumulative_training_bytes": 919001157,
"metrics": {
"loss": 0.45467502745592875,
"ce_loss": 0.4446750369926719,
"lb_loss": 0.9999999979168613
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 920000000,
"cumulative_training_bytes": 920005392,
"metrics": {
"loss": 0.4549423707126007,
"ce_loss": 0.44494238024934385,
"lb_loss": 0.9999999978723801
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 921000000,
"cumulative_training_bytes": 921006037,
"metrics": {
"loss": 0.455245339800192,
"ce_loss": 0.44524534933693516,
"lb_loss": 0.9999999982142027
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 922000000,
"cumulative_training_bytes": 922000214,
"metrics": {
"loss": 0.45546488110268435,
"ce_loss": 0.4454648906394275,
"lb_loss": 0.9999999985965665
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 923000000,
"cumulative_training_bytes": 923002745,
"metrics": {
"loss": 0.4556951227009582,
"ce_loss": 0.44569513223770135,
"lb_loss": 0.9999999987804012
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 924000000,
"cumulative_training_bytes": 924007204,
"metrics": {
"loss": 0.4558257086469471,
"ce_loss": 0.44582571818369027,
"lb_loss": 0.9999999989689476
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 925000000,
"cumulative_training_bytes": 925003699,
"metrics": {
"loss": 0.4559782938252584,
"ce_loss": 0.44597830336200156,
"lb_loss": 0.9999999988116353
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 926000000,
"cumulative_training_bytes": 926005686,
"metrics": {
"loss": 0.4561811916532903,
"ce_loss": 0.44618120119003346,
"lb_loss": 0.9999999985813696
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 927000000,
"cumulative_training_bytes": 927002982,
"metrics": {
"loss": 0.4563115703674782,
"ce_loss": 0.4463115799042214,
"lb_loss": 0.9999999980547615
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 928000000,
"cumulative_training_bytes": 928004103,
"metrics": {
"loss": 0.45646053071749415,
"ce_loss": 0.4464605402542373,
"lb_loss": 0.999999998282578
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 929000000,
"cumulative_training_bytes": 929006434,
"metrics": {
"loss": 0.4565663954454055,
"ce_loss": 0.44656640498214867,
"lb_loss": 0.9999999982201794
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 930000000,
"cumulative_training_bytes": 930003003,
"metrics": {
"loss": 0.45671574782077784,
"ce_loss": 0.446715757357521,
"lb_loss": 0.999999998180861
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 931000000,
"cumulative_training_bytes": 931005136,
"metrics": {
"loss": 0.45679983430676374,
"ce_loss": 0.4467998438435069,
"lb_loss": 0.9999999983770129
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 932000000,
"cumulative_training_bytes": 932004010,
"metrics": {
"loss": 0.4569366266567495,
"ce_loss": 0.44693663619349266,
"lb_loss": 0.9999999983867445
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 933000000,
"cumulative_training_bytes": 933006103,
"metrics": {
"loss": 0.4570423392423911,
"ce_loss": 0.4470423487791343,
"lb_loss": 0.9999999979161528
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 934000000,
"cumulative_training_bytes": 934001056,
"metrics": {
"loss": 0.4571594332524909,
"ce_loss": 0.4471594427892341,
"lb_loss": 0.9999999978131129
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 935000000,
"cumulative_training_bytes": 935003223,
"metrics": {
"loss": 0.45730806371681443,
"ce_loss": 0.4473080732535576,
"lb_loss": 0.9999999977175971
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 936000000,
"cumulative_training_bytes": 936001763,
"metrics": {
"loss": 0.45738783855462106,
"ce_loss": 0.4473878480913642,
"lb_loss": 0.9999999975233114
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 937000000,
"cumulative_training_bytes": 937006395,
"metrics": {
"loss": 0.45747957874488554,
"ce_loss": 0.4474795882816287,
"lb_loss": 0.9999999975441615
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 938000000,
"cumulative_training_bytes": 938001495,
"metrics": {
"loss": 0.45755059916273993,
"ce_loss": 0.4475506086994831,
"lb_loss": 0.9999999975771608
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 939000000,
"cumulative_training_bytes": 939003006,
"metrics": {
"loss": 0.45756441776033846,
"ce_loss": 0.4475644272970816,
"lb_loss": 0.9999999975810244
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 940000000,
"cumulative_training_bytes": 940003165,
"metrics": {
"loss": 0.45759776781046674,
"ce_loss": 0.4475977773472099,
"lb_loss": 0.9999999973602903
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 941000000,
"cumulative_training_bytes": 941001772,
"metrics": {
"loss": 0.45767297192580525,
"ce_loss": 0.4476729814625484,
"lb_loss": 0.9999999972288843
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 942000000,
"cumulative_training_bytes": 942003089,
"metrics": {
"loss": 0.45774275527380737,
"ce_loss": 0.44774276481055053,
"lb_loss": 0.9999999972424898
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 943000000,
"cumulative_training_bytes": 943006108,
"metrics": {
"loss": 0.45780045653905144,
"ce_loss": 0.4478004660757946,
"lb_loss": 0.9999999973889571
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 944000000,
"cumulative_training_bytes": 944003340,
"metrics": {
"loss": 0.4579041221871552,
"ce_loss": 0.44790413172389837,
"lb_loss": 0.999999997290698
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 945000000,
"cumulative_training_bytes": 945007101,
"metrics": {
"loss": 0.45798618218312526,
"ce_loss": 0.4479861917198684,
"lb_loss": 0.9999999974977349
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 946000000,
"cumulative_training_bytes": 946006954,
"metrics": {
"loss": 0.45804723397740776,
"ce_loss": 0.44804724351415093,
"lb_loss": 0.9999999973683987
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 947000000,
"cumulative_training_bytes": 947006874,
"metrics": {
"loss": 0.45816000358238773,
"ce_loss": 0.4481600131191309,
"lb_loss": 0.9999999972123771
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 948000000,
"cumulative_training_bytes": 948002304,
"metrics": {
"loss": 0.4582202268191281,
"ce_loss": 0.44822023635587127,
"lb_loss": 0.9999999972346704
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 949000000,
"cumulative_training_bytes": 949000281,
"metrics": {
"loss": 0.45827323714661655,
"ce_loss": 0.4482732466833597,
"lb_loss": 0.9999999969312667
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 950000000,
"cumulative_training_bytes": 950005460,
"metrics": {
"loss": 0.4582935053636347,
"ce_loss": 0.4482935149003779,
"lb_loss": 0.9999999967238945
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 951000000,
"cumulative_training_bytes": 951004754,
"metrics": {
"loss": 0.45839260763107137,
"ce_loss": 0.44839261716781453,
"lb_loss": 0.9999999967359123
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 952000000,
"cumulative_training_bytes": 952003496,
"metrics": {
"loss": 0.4585031724775566,
"ce_loss": 0.4485031820142998,
"lb_loss": 0.9999999967180216
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 953000000,
"cumulative_training_bytes": 953005419,
"metrics": {
"loss": 0.4585650799644636,
"ce_loss": 0.44856508950120677,
"lb_loss": 0.9999999965953904
}
},
{
"epoch": 20,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.45858579490975426,
"ce_loss": 0.4485858044464974,
"lb_loss": 0.9999999965715838,
"training_bytes": 47653413
},
"cumulative_training_bytes": 953068042,
"training_bytes_this_epoch": 47653413
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 954000000,
"cumulative_training_bytes": 954000428,
"metrics": {
"loss": 0.4476760918976831,
"ce_loss": 0.43767610143442626,
"lb_loss": 1.0000000063513146
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 955000000,
"cumulative_training_bytes": 955002200,
"metrics": {
"loss": 0.44804038077946234,
"ce_loss": 0.4380403903162055,
"lb_loss": 0.999999999528817
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 956000000,
"cumulative_training_bytes": 956005963,
"metrics": {
"loss": 0.4483036200205485,
"ce_loss": 0.4383036295572917,
"lb_loss": 0.9999999996895591
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 957000000,
"cumulative_training_bytes": 957000742,
"metrics": {
"loss": 0.448704544208393,
"ce_loss": 0.43870455374513617,
"lb_loss": 0.9999999988403766
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 958000000,
"cumulative_training_bytes": 958002438,
"metrics": {
"loss": 0.44925628891286923,
"ce_loss": 0.4392562984496124,
"lb_loss": 0.9999999988910764
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 959000000,
"cumulative_training_bytes": 959002429,
"metrics": {
"loss": 0.44934474852777295,
"ce_loss": 0.4393447580645161,
"lb_loss": 0.9999999976158143
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 960000000,
"cumulative_training_bytes": 960003553,
"metrics": {
"loss": 0.4496341999003429,
"ce_loss": 0.4396342094370861,
"lb_loss": 0.9999999972368707
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 961000000,
"cumulative_training_bytes": 961003867,
"metrics": {
"loss": 0.44992774178919703,
"ce_loss": 0.4399277513259402,
"lb_loss": 0.9999999952868073
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 962000000,
"cumulative_training_bytes": 962003623,
"metrics": {
"loss": 0.45012032169185273,
"ce_loss": 0.4401203312285959,
"lb_loss": 0.9999999951520194
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 963000000,
"cumulative_training_bytes": 963004917,
"metrics": {
"loss": 0.4505898071491691,
"ce_loss": 0.44058981668591224,
"lb_loss": 0.9999999956409228
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 964000000,
"cumulative_training_bytes": 964007286,
"metrics": {
"loss": 0.4508954231555645,
"ce_loss": 0.4408954326923077,
"lb_loss": 0.999999996415385
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 965000000,
"cumulative_training_bytes": 965001346,
"metrics": {
"loss": 0.4511721158638979,
"ce_loss": 0.44117212540064105,
"lb_loss": 0.9999999975164732
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 966000000,
"cumulative_training_bytes": 966000376,
"metrics": {
"loss": 0.451245598821245,
"ce_loss": 0.44124560835798815,
"lb_loss": 0.9999999979543968
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 967000000,
"cumulative_training_bytes": 967005281,
"metrics": {
"loss": 0.45132150851652425,
"ce_loss": 0.4413215180532674,
"lb_loss": 0.9999999973814544
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 968000000,
"cumulative_training_bytes": 968007618,
"metrics": {
"loss": 0.4515903465083388,
"ce_loss": 0.44159035604508196,
"lb_loss": 0.9999999970075537
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 969000000,
"cumulative_training_bytes": 969006307,
"metrics": {
"loss": 0.4518647513249947,
"ce_loss": 0.44186476086173787,
"lb_loss": 0.9999999977966598
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 970000000,
"cumulative_training_bytes": 970004567,
"metrics": {
"loss": 0.4520443377858958,
"ce_loss": 0.442044347322639,
"lb_loss": 0.9999999973604812
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 971000000,
"cumulative_training_bytes": 971001385,
"metrics": {
"loss": 0.4521723189417033,
"ce_loss": 0.44217232847844645,
"lb_loss": 0.9999999969981442
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 972000000,
"cumulative_training_bytes": 972005775,
"metrics": {
"loss": 0.45231254098872165,
"ce_loss": 0.4423125505254648,
"lb_loss": 0.9999999970848172
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 973000000,
"cumulative_training_bytes": 973005099,
"metrics": {
"loss": 0.4524783973730457,
"ce_loss": 0.44247840690978885,
"lb_loss": 0.9999999974831052
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 974000000,
"cumulative_training_bytes": 974000735,
"metrics": {
"loss": 0.4526559548142623,
"ce_loss": 0.44265596435100546,
"lb_loss": 0.9999999972758389
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 975000000,
"cumulative_training_bytes": 975000397,
"metrics": {
"loss": 0.45285711384950944,
"ce_loss": 0.4428571233862526,
"lb_loss": 0.9999999974211529
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 976000000,
"cumulative_training_bytes": 976004243,
"metrics": {
"loss": 0.45301266459572265,
"ce_loss": 0.4430126741324658,
"lb_loss": 0.9999999969372322
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 977000000,
"cumulative_training_bytes": 977001859,
"metrics": {
"loss": 0.4531138975627132,
"ce_loss": 0.44311390709945636,
"lb_loss": 0.9999999971026844
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 978000000,
"cumulative_training_bytes": 978005532,
"metrics": {
"loss": 0.4532364708576706,
"ce_loss": 0.44323648039441377,
"lb_loss": 0.9999999968166948
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 979000000,
"cumulative_training_bytes": 979005081,
"metrics": {
"loss": 0.45334783579973853,
"ce_loss": 0.4433478453364817,
"lb_loss": 0.9999999970971764
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 980000000,
"cumulative_training_bytes": 980007499,
"metrics": {
"loss": 0.4534764752600741,
"ce_loss": 0.4434764847968173,
"lb_loss": 0.9999999970527967
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 981000000,
"cumulative_training_bytes": 981004891,
"metrics": {
"loss": 0.4536125384216016,
"ce_loss": 0.44361254795834476,
"lb_loss": 0.999999996733097
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 982000000,
"cumulative_training_bytes": 982006800,
"metrics": {
"loss": 0.45371898847912984,
"ce_loss": 0.443718998015873,
"lb_loss": 0.9999999963101887
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 983000000,
"cumulative_training_bytes": 983004665,
"metrics": {
"loss": 0.4538384055847402,
"ce_loss": 0.44383841512148337,
"lb_loss": 0.9999999963109145
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 984000000,
"cumulative_training_bytes": 984006393,
"metrics": {
"loss": 0.4539954239883744,
"ce_loss": 0.44399543352511756,
"lb_loss": 0.9999999966517559
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 985000000,
"cumulative_training_bytes": 985007069,
"metrics": {
"loss": 0.45418143821014084,
"ce_loss": 0.444181447746884,
"lb_loss": 0.9999999966140218
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 986000000,
"cumulative_training_bytes": 986006787,
"metrics": {
"loss": 0.45425647727171803,
"ce_loss": 0.4442564868084612,
"lb_loss": 0.9999999965916452
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 987000000,
"cumulative_training_bytes": 987002711,
"metrics": {
"loss": 0.45432535147408715,
"ce_loss": 0.4443253610108303,
"lb_loss": 0.9999999966378247
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 988000000,
"cumulative_training_bytes": 988007659,
"metrics": {
"loss": 0.4544178967201054,
"ce_loss": 0.44441790625684857,
"lb_loss": 0.9999999963816597
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 989000000,
"cumulative_training_bytes": 989002984,
"metrics": {
"loss": 0.45448096611848804,
"ce_loss": 0.4444809756552312,
"lb_loss": 0.9999999967232052
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 990000000,
"cumulative_training_bytes": 990002219,
"metrics": {
"loss": 0.4545410405099083,
"ce_loss": 0.44454105004665145,
"lb_loss": 0.999999996885679
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 991000000,
"cumulative_training_bytes": 991003464,
"metrics": {
"loss": 0.45466355147456083,
"ce_loss": 0.444663561011304,
"lb_loss": 0.9999999970522532
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 992000000,
"cumulative_training_bytes": 992005888,
"metrics": {
"loss": 0.45475785673660984,
"ce_loss": 0.444757866273353,
"lb_loss": 0.9999999972219664
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 993000000,
"cumulative_training_bytes": 993002370,
"metrics": {
"loss": 0.4548222429800354,
"ce_loss": 0.44482225251677854,
"lb_loss": 0.9999999970283399
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 994000000,
"cumulative_training_bytes": 994003864,
"metrics": {
"loss": 0.454938013634787,
"ce_loss": 0.44493802317153014,
"lb_loss": 0.9999999967555272
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 995000000,
"cumulative_training_bytes": 995006555,
"metrics": {
"loss": 0.45501115932851155,
"ce_loss": 0.4450111688652547,
"lb_loss": 0.9999999965828267
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 996000000,
"cumulative_training_bytes": 996000608,
"metrics": {
"loss": 0.4550933865529661,
"ce_loss": 0.44509339608970927,
"lb_loss": 0.9999999965132292
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 997000000,
"cumulative_training_bytes": 997007326,
"metrics": {
"loss": 0.4551511483144245,
"ce_loss": 0.44515115785116766,
"lb_loss": 0.9999999964058545
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 998000000,
"cumulative_training_bytes": 998006581,
"metrics": {
"loss": 0.4552532544349726,
"ce_loss": 0.4452532639717158,
"lb_loss": 0.9999999961610913
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 999000000,
"cumulative_training_bytes": 999000135,
"metrics": {
"loss": 0.4552769968612228,
"ce_loss": 0.44527700639796597,
"lb_loss": 0.9999999962337178
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1000000000,
"cumulative_training_bytes": 1000002883,
"metrics": {
"loss": 0.4553679389458804,
"ce_loss": 0.44536794848262357,
"lb_loss": 0.9999999961586172
}
},
{
"epoch": 21,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4554653137079548,
"ce_loss": 0.44546532324469795,
"lb_loss": 0.9999999964087819,
"training_bytes": 47653407
},
"cumulative_training_bytes": 1000721449,
"training_bytes_this_epoch": 47653407
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1001000000,
"cumulative_training_bytes": 1001004765,
"metrics": {
"loss": 0.4417461897875812,
"ce_loss": 0.43174619932432434,
"lb_loss": 0.9999999967781273
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1002000000,
"cumulative_training_bytes": 1002000171,
"metrics": {
"loss": 0.4442837666608616,
"ce_loss": 0.4342837761976048,
"lb_loss": 1.0000000014276562
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1003000000,
"cumulative_training_bytes": 1003000999,
"metrics": {
"loss": 0.4452322681478206,
"ce_loss": 0.43523227768456374,
"lb_loss": 0.999999996599735
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1004000000,
"cumulative_training_bytes": 1004003831,
"metrics": {
"loss": 0.4486882553233967,
"ce_loss": 0.43868826486013984,
"lb_loss": 0.9999999962486588
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1005000000,
"cumulative_training_bytes": 1005001152,
"metrics": {
"loss": 0.44896745468508154,
"ce_loss": 0.4389674642218247,
"lb_loss": 0.999999996054791
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1006000000,
"cumulative_training_bytes": 1006005315,
"metrics": {
"loss": 0.44883037995601044,
"ce_loss": 0.4388303894927536,
"lb_loss": 0.9999999951625216
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1007000000,
"cumulative_training_bytes": 1007001983,
"metrics": {
"loss": 0.44876475799374466,
"ce_loss": 0.4387647675304878,
"lb_loss": 0.9999999939668469
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1008000000,
"cumulative_training_bytes": 1008002750,
"metrics": {
"loss": 0.448804127555791,
"ce_loss": 0.4388041370925342,
"lb_loss": 0.9999999941084788
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1009000000,
"cumulative_training_bytes": 1009004840,
"metrics": {
"loss": 0.4488484048578964,
"ce_loss": 0.43884841439463956,
"lb_loss": 0.9999999951523024
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1010000000,
"cumulative_training_bytes": 1010007632,
"metrics": {
"loss": 0.44896201643192957,
"ce_loss": 0.43896202596867273,
"lb_loss": 0.999999996216358
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1011000000,
"cumulative_training_bytes": 1011001073,
"metrics": {
"loss": 0.44882147891333135,
"ce_loss": 0.4388214884500745,
"lb_loss": 0.999999996446817
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1012000000,
"cumulative_training_bytes": 1012002820,
"metrics": {
"loss": 0.44899433661057525,
"ce_loss": 0.4389943461473184,
"lb_loss": 0.9999999968032811
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1013000000,
"cumulative_training_bytes": 1013000900,
"metrics": {
"loss": 0.4489474717483473,
"ce_loss": 0.4389474812850905,
"lb_loss": 0.9999999971368948
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1014000000,
"cumulative_training_bytes": 1014005352,
"metrics": {
"loss": 0.449001441227386,
"ce_loss": 0.43900145076412916,
"lb_loss": 0.9999999966657148
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1015000000,
"cumulative_training_bytes": 1015002241,
"metrics": {
"loss": 0.4491251497309607,
"ce_loss": 0.43912515926770385,
"lb_loss": 0.9999999973139537
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1016000000,
"cumulative_training_bytes": 1016006378,
"metrics": {
"loss": 0.4492504603880689,
"ce_loss": 0.43925046992481204,
"lb_loss": 0.9999999975799618
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1017000000,
"cumulative_training_bytes": 1017004553,
"metrics": {
"loss": 0.44930973311031563,
"ce_loss": 0.4393097426470588,
"lb_loss": 0.9999999975597157
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1018000000,
"cumulative_training_bytes": 1018005638,
"metrics": {
"loss": 0.4494938055674235,
"ce_loss": 0.4394938151041667,
"lb_loss": 0.9999999973579501
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1019000000,
"cumulative_training_bytes": 1019003819,
"metrics": {
"loss": 0.44971085923158527,
"ce_loss": 0.43971086876832843,
"lb_loss": 0.9999999975029474
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1020000000,
"cumulative_training_bytes": 1020001116,
"metrics": {
"loss": 0.44981704697895014,
"ce_loss": 0.4398170565156933,
"lb_loss": 0.9999999973714281
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1021000000,
"cumulative_training_bytes": 1021000064,
"metrics": {
"loss": 0.4500520344587251,
"ce_loss": 0.44005204399546827,
"lb_loss": 0.9999999973439018
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1022000000,
"cumulative_training_bytes": 1022006171,
"metrics": {
"loss": 0.4501109510156138,
"ce_loss": 0.440110960552357,
"lb_loss": 0.999999997533453
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1023000000,
"cumulative_training_bytes": 1023007389,
"metrics": {
"loss": 0.4502652396041503,
"ce_loss": 0.4402652491408935,
"lb_loss": 0.9999999975420765
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1024000000,
"cumulative_training_bytes": 1024005719,
"metrics": {
"loss": 0.45038213478891476,
"ce_loss": 0.4403821443256579,
"lb_loss": 0.9999999971178017
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1025000000,
"cumulative_training_bytes": 1025000620,
"metrics": {
"loss": 0.45051531972193193,
"ce_loss": 0.4405153292586751,
"lb_loss": 0.9999999971607882
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1026000000,
"cumulative_training_bytes": 1026003600,
"metrics": {
"loss": 0.45060333509367184,
"ce_loss": 0.440603344630415,
"lb_loss": 0.9999999973637449
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1027000000,
"cumulative_training_bytes": 1027002396,
"metrics": {
"loss": 0.45068954434696423,
"ce_loss": 0.4406895538837074,
"lb_loss": 0.9999999975852388
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1028000000,
"cumulative_training_bytes": 1028006189,
"metrics": {
"loss": 0.45083050176308836,
"ce_loss": 0.44083051129983153,
"lb_loss": 0.9999999974063111
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1029000000,
"cumulative_training_bytes": 1029000329,
"metrics": {
"loss": 0.45092642346027745,
"ce_loss": 0.4409264329970206,
"lb_loss": 0.999999997368484
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1030000000,
"cumulative_training_bytes": 1030002716,
"metrics": {
"loss": 0.45103073774941954,
"ce_loss": 0.4410307472861627,
"lb_loss": 0.9999999974430652
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1031000000,
"cumulative_training_bytes": 1031006184,
"metrics": {
"loss": 0.4510980082933024,
"ce_loss": 0.44109801783004554,
"lb_loss": 0.9999999974674809
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1032000000,
"cumulative_training_bytes": 1032004553,
"metrics": {
"loss": 0.45115468714543117,
"ce_loss": 0.44115469668217433,
"lb_loss": 0.9999999971978227
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1033000000,
"cumulative_training_bytes": 1033007149,
"metrics": {
"loss": 0.4512519366376341,
"ce_loss": 0.44125194617437724,
"lb_loss": 0.9999999973839006
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1034000000,
"cumulative_training_bytes": 1034001070,
"metrics": {
"loss": 0.4513257780064099,
"ce_loss": 0.44132578754315305,
"lb_loss": 0.9999999975444807
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1035000000,
"cumulative_training_bytes": 1035006816,
"metrics": {
"loss": 0.4513804955179932,
"ce_loss": 0.44138050505473636,
"lb_loss": 0.9999999975364479
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1036000000,
"cumulative_training_bytes": 1036006156,
"metrics": {
"loss": 0.4515215541163934,
"ce_loss": 0.44152156365313655,
"lb_loss": 0.9999999976582503
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1037000000,
"cumulative_training_bytes": 1037000394,
"metrics": {
"loss": 0.45160786134672737,
"ce_loss": 0.44160787088347053,
"lb_loss": 0.9999999976470195
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1038000000,
"cumulative_training_bytes": 1038006020,
"metrics": {
"loss": 0.45169471558137514,
"ce_loss": 0.4416947251181183,
"lb_loss": 0.9999999977225834
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1039000000,
"cumulative_training_bytes": 1039004839,
"metrics": {
"loss": 0.45180788329182253,
"ce_loss": 0.4418078928285657,
"lb_loss": 0.9999999978418802
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1040000000,
"cumulative_training_bytes": 1040005527,
"metrics": {
"loss": 0.4519148905607227,
"ce_loss": 0.44191490009746587,
"lb_loss": 0.999999997618138
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1041000000,
"cumulative_training_bytes": 1041000015,
"metrics": {
"loss": 0.4519806671505192,
"ce_loss": 0.4419806766872624,
"lb_loss": 0.9999999976656736
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1042000000,
"cumulative_training_bytes": 1042000408,
"metrics": {
"loss": 0.4521246687465067,
"ce_loss": 0.44212467828324986,
"lb_loss": 0.9999999977776788
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1043000000,
"cumulative_training_bytes": 1043006070,
"metrics": {
"loss": 0.4522623903138815,
"ce_loss": 0.44226239985062465,
"lb_loss": 0.9999999981221785
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1044000000,
"cumulative_training_bytes": 1044005567,
"metrics": {
"loss": 0.4523166392559333,
"ce_loss": 0.44231664879267646,
"lb_loss": 0.9999999979228524
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1045000000,
"cumulative_training_bytes": 1045001996,
"metrics": {
"loss": 0.4523748661765544,
"ce_loss": 0.4423748757132976,
"lb_loss": 0.9999999980416941
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1046000000,
"cumulative_training_bytes": 1046006987,
"metrics": {
"loss": 0.45245876149386216,
"ce_loss": 0.4424587710306053,
"lb_loss": 0.9999999979238152
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1047000000,
"cumulative_training_bytes": 1047002167,
"metrics": {
"loss": 0.452550329069543,
"ce_loss": 0.4425503386062862,
"lb_loss": 0.9999999979293672
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 1048000000,
"cumulative_training_bytes": 1048003428,
"metrics": {
"loss": 0.4525869717385605,
"ce_loss": 0.44258698127530366,
"lb_loss": 0.9999999980694851
}
},
{
"epoch": 22,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4525974458777813,
"ce_loss": 0.44259745541452444,
"lb_loss": 0.9999999981517197,
"training_bytes": 47653407
},
"cumulative_training_bytes": 1048374856,
"training_bytes_this_epoch": 47653407
}
]
}