| { |
| "run_name": "run_large_20260115_191350", |
| "timestamp": "20260115_191350", |
| "phase": "large", |
| "config": { |
| "arch_layout": [ |
| "m4", |
| [ |
| "T1m4", |
| [ |
| "T22" |
| ], |
| "m4T1" |
| ], |
| "m4" |
| ], |
| "d_model": [ |
| 1024, |
| 1024, |
| 1536 |
| ], |
| "d_intermediate": [ |
| 0, |
| 2816, |
| 4096 |
| ], |
| "vocab_size": 256, |
| "ssm_cfg": { |
| "chunk_size": 256, |
| "d_conv": 4, |
| "d_state": 128, |
| "expand": 2 |
| }, |
| "attn_cfg": { |
| "num_heads": [ |
| 16, |
| 16, |
| 16 |
| ], |
| "rotary_emb_dim": [ |
| 32, |
| 32, |
| 48 |
| ], |
| "window_size": [ |
| 1023, |
| 1023, |
| -1 |
| ] |
| }, |
| "tie_embeddings": false |
| }, |
| "training_args": { |
| "data": "datasets/PI1M/PI1M_v2.csv", |
| "max_samples": null, |
| "batch_size": 16, |
| "epochs": 5, |
| "lr": 0.0001, |
| "weight_decay": 0.1, |
| "gradient_accumulation": 8, |
| "concatenate": true, |
| "num_concatenate": 10, |
| "concatenate_separator": " ", |
| "checkpoint_bytes": 1000000, |
| "num_test_samples": 5, |
| "num_visualize": 5, |
| "skip_visualization": false |
| }, |
| "dataset_info": { |
| "train_size": 99574, |
| "test_size": 5, |
| "test_smiles_file": "checkpoints/run_large_20260115_191350/test_smiles.txt" |
| }, |
| "model_info": { |
| "num_parameters": 622923776, |
| "device": "cuda", |
| "dtype": "torch.bfloat16", |
| "use_amp": true |
| }, |
| "training_history": [ |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 1000000, |
| "cumulative_training_bytes": 1000166, |
| "metrics": { |
| "loss": 3.0352404484382043, |
| "ce_loss": 3.0252403846153846, |
| "lb_loss": 0.9999999889960656 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 2000000, |
| "cumulative_training_bytes": 2000240, |
| "metrics": { |
| "loss": 2.107340772335346, |
| "ce_loss": 2.097340745192308, |
| "lb_loss": 0.9999999871620765 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 3000000, |
| "cumulative_training_bytes": 3001794, |
| "metrics": { |
| "loss": 1.7094185730380476, |
| "ce_loss": 1.6994185581841432, |
| "lb_loss": 0.9999999873473516 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 4000000, |
| "cumulative_training_bytes": 4002359, |
| "metrics": { |
| "loss": 1.47650072853762, |
| "ce_loss": 1.4665007197696738, |
| "lb_loss": 0.9999999890171863 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 5000000, |
| "cumulative_training_bytes": 5005670, |
| "metrics": { |
| "loss": 1.3171558716545808, |
| "ce_loss": 1.3071558665644172, |
| "lb_loss": 0.9999999897611653 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 6000000, |
| "cumulative_training_bytes": 6001321, |
| "metrics": { |
| "loss": 1.2017559169808312, |
| "ce_loss": 1.1917559143222507, |
| "lb_loss": 0.9999999908535072 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 7000000, |
| "cumulative_training_bytes": 7001673, |
| "metrics": { |
| "loss": 1.1151093587948484, |
| "ce_loss": 1.1051093578860898, |
| "lb_loss": 0.9999999904684795 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 8000000, |
| "cumulative_training_bytes": 8004669, |
| "metrics": { |
| "loss": 1.0468063034773787, |
| "ce_loss": 1.0368063038793103, |
| "lb_loss": 0.9999999897804297 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 9000000, |
| "cumulative_training_bytes": 9006752, |
| "metrics": { |
| "loss": 0.9919913549626127, |
| "ce_loss": 0.9819913563829787, |
| "lb_loss": 0.9999999897023465 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 10000000, |
| "cumulative_training_bytes": 10007281, |
| "metrics": { |
| "loss": 0.9471440684010387, |
| "ce_loss": 0.9371440706355283, |
| "lb_loss": 0.9999999893660932 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 11000000, |
| "cumulative_training_bytes": 11001365, |
| "metrics": { |
| "loss": 0.9100927569407938, |
| "ce_loss": 0.900092759836351, |
| "lb_loss": 0.999999989540132 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 12000000, |
| "cumulative_training_bytes": 12005386, |
| "metrics": { |
| "loss": 0.8784949809940438, |
| "ce_loss": 0.868494984444799, |
| "lb_loss": 0.999999989882045 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 13000000, |
| "cumulative_training_bytes": 13001269, |
| "metrics": { |
| "loss": 0.8592479796569771, |
| "ce_loss": 0.849247983573954, |
| "lb_loss": 0.999999989954668 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 14000000, |
| "cumulative_training_bytes": 14005280, |
| "metrics": { |
| "loss": 0.8378439935604906, |
| "ce_loss": 0.8278439978801969, |
| "lb_loss": 0.9999999899245978 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 15000000, |
| "cumulative_training_bytes": 15001797, |
| "metrics": { |
| "loss": 0.8179623213681307, |
| "ce_loss": 0.8079623260342186, |
| "lb_loss": 0.9999999895889742 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 16000000, |
| "cumulative_training_bytes": 16003308, |
| "metrics": { |
| "loss": 0.7999628585397256, |
| "ce_loss": 0.7899628635112494, |
| "lb_loss": 0.999999989471463 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 17000000, |
| "cumulative_training_bytes": 17001780, |
| "metrics": { |
| "loss": 0.783798369592028, |
| "ce_loss": 0.773798374831005, |
| "lb_loss": 0.9999999887720858 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 18000000, |
| "cumulative_training_bytes": 18002585, |
| "metrics": { |
| "loss": 0.7691971354788922, |
| "ce_loss": 0.7591971409574468, |
| "lb_loss": 0.9999999888399814 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 19000000, |
| "cumulative_training_bytes": 19004388, |
| "metrics": { |
| "loss": 0.7562685100266358, |
| "ce_loss": 0.746268515719468, |
| "lb_loss": 0.9999999887325359 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 20000000, |
| "cumulative_training_bytes": 20001795, |
| "metrics": { |
| "loss": 0.7443181650561906, |
| "ce_loss": 0.7343181709418071, |
| "lb_loss": 0.9999999887043265 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 21000000, |
| "cumulative_training_bytes": 21006219, |
| "metrics": { |
| "loss": 0.7334088699425653, |
| "ce_loss": 0.723408876002552, |
| "lb_loss": 0.9999999888743791 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 22000000, |
| "cumulative_training_bytes": 22003647, |
| "metrics": { |
| "loss": 0.7233542565306926, |
| "ce_loss": 0.7133542627479986, |
| "lb_loss": 0.9999999891080966 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 23000000, |
| "cumulative_training_bytes": 23000855, |
| "metrics": { |
| "loss": 0.7141935865044633, |
| "ce_loss": 0.7041935928654679, |
| "lb_loss": 0.9999999891627919 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 24000000, |
| "cumulative_training_bytes": 24007583, |
| "metrics": { |
| "loss": 0.7056202586567953, |
| "ce_loss": 0.6956202651515152, |
| "lb_loss": 0.9999999891818045 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 25000000, |
| "cumulative_training_bytes": 25004319, |
| "metrics": { |
| "loss": 0.6978230217149393, |
| "ce_loss": 0.687823028330781, |
| "lb_loss": 0.9999999895577774 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 26000000, |
| "cumulative_training_bytes": 26000600, |
| "metrics": { |
| "loss": 0.6906206210337261, |
| "ce_loss": 0.6806206277614139, |
| "lb_loss": 0.9999999897293911 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 27000000, |
| "cumulative_training_bytes": 27007515, |
| "metrics": { |
| "loss": 0.6838098439610576, |
| "ce_loss": 0.6738098507938758, |
| "lb_loss": 0.9999999897926835 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 28000000, |
| "cumulative_training_bytes": 28003023, |
| "metrics": { |
| "loss": 0.6774992880874688, |
| "ce_loss": 0.6674992950164069, |
| "lb_loss": 0.9999999895687797 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 29000000, |
| "cumulative_training_bytes": 29003935, |
| "metrics": { |
| "loss": 0.6715684946638226, |
| "ce_loss": 0.6615685016829461, |
| "lb_loss": 0.9999999895046732 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 30000000, |
| "cumulative_training_bytes": 30001066, |
| "metrics": { |
| "loss": 0.6660281601701846, |
| "ce_loss": 0.6560281672728433, |
| "lb_loss": 0.9999999894573715 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 31000000, |
| "cumulative_training_bytes": 31004436, |
| "metrics": { |
| "loss": 0.6609612641201458, |
| "ce_loss": 0.6509612713015559, |
| "lb_loss": 0.9999999894746058 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 32000000, |
| "cumulative_training_bytes": 32006649, |
| "metrics": { |
| "loss": 0.6561554203763533, |
| "ce_loss": 0.646155427631579, |
| "lb_loss": 0.9999999895050194 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 33000000, |
| "cumulative_training_bytes": 33004203, |
| "metrics": { |
| "loss": 0.6516305574961438, |
| "ce_loss": 0.6416305648201857, |
| "lb_loss": 0.9999999895588151 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 34000000, |
| "cumulative_training_bytes": 34006104, |
| "metrics": { |
| "loss": 0.6472530922785559, |
| "ce_loss": 0.6372530996678676, |
| "lb_loss": 0.9999999896520646 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 35000000, |
| "cumulative_training_bytes": 35005618, |
| "metrics": { |
| "loss": 0.6431124474281974, |
| "ce_loss": 0.6331124548785824, |
| "lb_loss": 0.9999999896725271 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 36000000, |
| "cumulative_training_bytes": 36002823, |
| "metrics": { |
| "loss": 0.6391829455870056, |
| "ce_loss": 0.6291829530950862, |
| "lb_loss": 0.9999999896918579 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 37000000, |
| "cumulative_training_bytes": 37006427, |
| "metrics": { |
| "loss": 0.6354130913090232, |
| "ce_loss": 0.6254130988721026, |
| "lb_loss": 0.9999999896752716 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 38000000, |
| "cumulative_training_bytes": 38005922, |
| "metrics": { |
| "loss": 0.6318843585695924, |
| "ce_loss": 0.6218843661847673, |
| "lb_loss": 0.9999999897196099 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 39000000, |
| "cumulative_training_bytes": 39004443, |
| "metrics": { |
| "loss": 0.6285198655931632, |
| "ce_loss": 0.6185198732577543, |
| "lb_loss": 0.9999999895276488 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 40000000, |
| "cumulative_training_bytes": 40005613, |
| "metrics": { |
| "loss": 0.6254313996155814, |
| "ce_loss": 0.615431407326761, |
| "lb_loss": 0.9999999897083863 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 41000000, |
| "cumulative_training_bytes": 41003596, |
| "metrics": { |
| "loss": 0.6224746753085582, |
| "ce_loss": 0.6124746830640643, |
| "lb_loss": 0.9999999896242941 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 42000000, |
| "cumulative_training_bytes": 42004130, |
| "metrics": { |
| "loss": 0.619576180100767, |
| "ce_loss": 0.609576187898815, |
| "lb_loss": 0.9999999894482935 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 43000000, |
| "cumulative_training_bytes": 43002856, |
| "metrics": { |
| "loss": 0.6168661168497852, |
| "ce_loss": 0.6068661246883903, |
| "lb_loss": 0.9999999894715442 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 44000000, |
| "cumulative_training_bytes": 44000615, |
| "metrics": { |
| "loss": 0.6142508432585481, |
| "ce_loss": 0.6042508511355725, |
| "lb_loss": 0.9999999894192938 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 45000000, |
| "cumulative_training_bytes": 45002728, |
| "metrics": { |
| "loss": 0.6117183565789184, |
| "ce_loss": 0.6017183644929386, |
| "lb_loss": 0.9999999893305962 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 46000000, |
| "cumulative_training_bytes": 46000713, |
| "metrics": { |
| "loss": 0.6093004826243594, |
| "ce_loss": 0.5993004905734975, |
| "lb_loss": 0.9999999892538988 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 47000000, |
| "cumulative_training_bytes": 47001586, |
| "metrics": { |
| "loss": 0.6069603338424916, |
| "ce_loss": 0.5969603418255132, |
| "lb_loss": 0.999999989075395 |
| } |
| }, |
| { |
| "epoch": 1, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.6054869050538325, |
| "ce_loss": 0.5954869130583226, |
| "lb_loss": 0.9999999890922734, |
| "training_bytes": 47653409 |
| }, |
| "cumulative_training_bytes": 47653409, |
| "training_bytes_this_epoch": 47653409 |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 48000000, |
| "cumulative_training_bytes": 48006676, |
| "metrics": { |
| "loss": 0.49496941981108294, |
| "ce_loss": 0.4849694293478261, |
| "lb_loss": 0.9999999935212343 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 49000000, |
| "cumulative_training_bytes": 49000759, |
| "metrics": { |
| "loss": 0.49630592086098413, |
| "ce_loss": 0.4863059303977273, |
| "lb_loss": 0.9999999932267449 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 50000000, |
| "cumulative_training_bytes": 50005240, |
| "metrics": { |
| "loss": 0.4959718451049506, |
| "ce_loss": 0.4859718546416938, |
| "lb_loss": 0.9999999914573148 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 51000000, |
| "cumulative_training_bytes": 51007539, |
| "metrics": { |
| "loss": 0.49752317824864495, |
| "ce_loss": 0.4875231877853881, |
| "lb_loss": 0.9999999910184781 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 52000000, |
| "cumulative_training_bytes": 52002554, |
| "metrics": { |
| "loss": 0.4988107849174822, |
| "ce_loss": 0.4888107944542254, |
| "lb_loss": 0.9999999891914112 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 53000000, |
| "cumulative_training_bytes": 53005306, |
| "metrics": { |
| "loss": 0.49884286868214095, |
| "ce_loss": 0.4888428782188841, |
| "lb_loss": 0.9999999886589159 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 54000000, |
| "cumulative_training_bytes": 54000123, |
| "metrics": { |
| "loss": 0.49843673654287085, |
| "ce_loss": 0.488436746079614, |
| "lb_loss": 0.9999999882803895 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 55000000, |
| "cumulative_training_bytes": 55003152, |
| "metrics": { |
| "loss": 0.4980025132497152, |
| "ce_loss": 0.48800252278645834, |
| "lb_loss": 0.9999999890724818 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 56000000, |
| "cumulative_training_bytes": 56002937, |
| "metrics": { |
| "loss": 0.4978086235979956, |
| "ce_loss": 0.48780863313473877, |
| "lb_loss": 0.9999999890733924 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 57000000, |
| "cumulative_training_bytes": 57004703, |
| "metrics": { |
| "loss": 0.4975252436342879, |
| "ce_loss": 0.48752525317103107, |
| "lb_loss": 0.9999999889765551 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 58000000, |
| "cumulative_training_bytes": 58002959, |
| "metrics": { |
| "loss": 0.49715732681680713, |
| "ce_loss": 0.4871573363535503, |
| "lb_loss": 0.9999999886698271 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 59000000, |
| "cumulative_training_bytes": 59000108, |
| "metrics": { |
| "loss": 0.4970432515893526, |
| "ce_loss": 0.48704326112609575, |
| "lb_loss": 0.9999999883443378 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 60000000, |
| "cumulative_training_bytes": 60007478, |
| "metrics": { |
| "loss": 0.4969303793951454, |
| "ce_loss": 0.48693038893188856, |
| "lb_loss": 0.9999999884481401 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 61000000, |
| "cumulative_training_bytes": 61002660, |
| "metrics": { |
| "loss": 0.49673105242600757, |
| "ce_loss": 0.48673106196275073, |
| "lb_loss": 0.9999999883864875 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 62000000, |
| "cumulative_training_bytes": 62003465, |
| "metrics": { |
| "loss": 0.49654987219300095, |
| "ce_loss": 0.4865498817297441, |
| "lb_loss": 0.9999999883713753 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 63000000, |
| "cumulative_training_bytes": 63000868, |
| "metrics": { |
| "loss": 0.4964099013555799, |
| "ce_loss": 0.48640991089232305, |
| "lb_loss": 0.9999999887089905 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 64000000, |
| "cumulative_training_bytes": 64003546, |
| "metrics": { |
| "loss": 0.49635096437528303, |
| "ce_loss": 0.4863509739120262, |
| "lb_loss": 0.9999999889827633 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 65000000, |
| "cumulative_training_bytes": 65001846, |
| "metrics": { |
| "loss": 0.4962221452934289, |
| "ce_loss": 0.48622215483017206, |
| "lb_loss": 0.9999999886680185 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 66000000, |
| "cumulative_training_bytes": 66004938, |
| "metrics": { |
| "loss": 0.4961587034532485, |
| "ce_loss": 0.48615871298999164, |
| "lb_loss": 0.9999999882679765 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 67000000, |
| "cumulative_training_bytes": 67000216, |
| "metrics": { |
| "loss": 0.49601907669743406, |
| "ce_loss": 0.4860190862341772, |
| "lb_loss": 0.9999999884704623 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 68000000, |
| "cumulative_training_bytes": 68000224, |
| "metrics": { |
| "loss": 0.4964207015242049, |
| "ce_loss": 0.4864207110609481, |
| "lb_loss": 0.9999999881822244 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 69000000, |
| "cumulative_training_bytes": 69005372, |
| "metrics": { |
| "loss": 0.49684213258408866, |
| "ce_loss": 0.4868421421208318, |
| "lb_loss": 0.9999999881602821 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 70000000, |
| "cumulative_training_bytes": 70001864, |
| "metrics": { |
| "loss": 0.497037369488608, |
| "ce_loss": 0.48703737902535116, |
| "lb_loss": 0.9999999881770848 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 71000000, |
| "cumulative_training_bytes": 71000907, |
| "metrics": { |
| "loss": 0.49706029712117744, |
| "ce_loss": 0.4870603066579206, |
| "lb_loss": 0.9999999880360634 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 72000000, |
| "cumulative_training_bytes": 72005398, |
| "metrics": { |
| "loss": 0.49712042088778513, |
| "ce_loss": 0.4871204304245283, |
| "lb_loss": 0.9999999880790711 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 73000000, |
| "cumulative_training_bytes": 73003962, |
| "metrics": { |
| "loss": 0.49715716096929913, |
| "ce_loss": 0.4871571705060423, |
| "lb_loss": 0.9999999879890338 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 74000000, |
| "cumulative_training_bytes": 74006324, |
| "metrics": { |
| "loss": 0.4971806565123705, |
| "ce_loss": 0.48718066604911364, |
| "lb_loss": 0.9999999879612822 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 75000000, |
| "cumulative_training_bytes": 75002178, |
| "metrics": { |
| "loss": 0.4972360369138309, |
| "ce_loss": 0.48723604645057406, |
| "lb_loss": 0.999999987898805 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 76000000, |
| "cumulative_training_bytes": 76006119, |
| "metrics": { |
| "loss": 0.49723345379388895, |
| "ce_loss": 0.4872334633306321, |
| "lb_loss": 0.9999999879728066 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 77000000, |
| "cumulative_training_bytes": 77005284, |
| "metrics": { |
| "loss": 0.4972499007815454, |
| "ce_loss": 0.48724991031828857, |
| "lb_loss": 0.9999999881039516 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 78000000, |
| "cumulative_training_bytes": 78007177, |
| "metrics": { |
| "loss": 0.4972263361683527, |
| "ce_loss": 0.4872263457050959, |
| "lb_loss": 0.9999999881362097 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 79000000, |
| "cumulative_training_bytes": 79001491, |
| "metrics": { |
| "loss": 0.4971963830499691, |
| "ce_loss": 0.48719639258671227, |
| "lb_loss": 0.9999999881780725 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 80000000, |
| "cumulative_training_bytes": 80002957, |
| "metrics": { |
| "loss": 0.49715744238633375, |
| "ce_loss": 0.4871574519230769, |
| "lb_loss": 0.9999999881778243 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 81000000, |
| "cumulative_training_bytes": 81002131, |
| "metrics": { |
| "loss": 0.4970846991314543, |
| "ce_loss": 0.4870847086681975, |
| "lb_loss": 0.9999999881201305 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 82000000, |
| "cumulative_training_bytes": 82000379, |
| "metrics": { |
| "loss": 0.497049108552869, |
| "ce_loss": 0.48704911808961215, |
| "lb_loss": 0.9999999881481625 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 83000000, |
| "cumulative_training_bytes": 83002326, |
| "metrics": { |
| "loss": 0.49690102084670573, |
| "ce_loss": 0.4869010303834489, |
| "lb_loss": 0.9999999881849545 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 84000000, |
| "cumulative_training_bytes": 84004823, |
| "metrics": { |
| "loss": 0.4968436548828903, |
| "ce_loss": 0.48684366441963345, |
| "lb_loss": 0.9999999882473252 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 85000000, |
| "cumulative_training_bytes": 85001132, |
| "metrics": { |
| "loss": 0.496751819840948, |
| "ce_loss": 0.4867518293776912, |
| "lb_loss": 0.9999999883161697 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 86000000, |
| "cumulative_training_bytes": 86000628, |
| "metrics": { |
| "loss": 0.4967399565175699, |
| "ce_loss": 0.4867399660543131, |
| "lb_loss": 0.9999999883718574 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 87000000, |
| "cumulative_training_bytes": 87000672, |
| "metrics": { |
| "loss": 0.49681193101589355, |
| "ce_loss": 0.4868119405526367, |
| "lb_loss": 0.9999999883783122 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 88000000, |
| "cumulative_training_bytes": 88002075, |
| "metrics": { |
| "loss": 0.49670176321425324, |
| "ce_loss": 0.4867017727509964, |
| "lb_loss": 0.9999999882917427 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 89000000, |
| "cumulative_training_bytes": 89004728, |
| "metrics": { |
| "loss": 0.49663121152807166, |
| "ce_loss": 0.4866312210648148, |
| "lb_loss": 0.9999999883770943 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 90000000, |
| "cumulative_training_bytes": 90003725, |
| "metrics": { |
| "loss": 0.49656294246108723, |
| "ce_loss": 0.4865629519978304, |
| "lb_loss": 0.999999988555391 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 91000000, |
| "cumulative_training_bytes": 91002611, |
| "metrics": { |
| "loss": 0.4965044176845958, |
| "ce_loss": 0.48650442722133896, |
| "lb_loss": 0.9999999886813296 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 92000000, |
| "cumulative_training_bytes": 92003164, |
| "metrics": { |
| "loss": 0.4964984069213024, |
| "ce_loss": 0.4864984164580456, |
| "lb_loss": 0.9999999888961651 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 93000000, |
| "cumulative_training_bytes": 93001402, |
| "metrics": { |
| "loss": 0.49645113397473944, |
| "ce_loss": 0.4864511435114826, |
| "lb_loss": 0.999999989119787 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 94000000, |
| "cumulative_training_bytes": 94007638, |
| "metrics": { |
| "loss": 0.4963942520052126, |
| "ce_loss": 0.48639426154195575, |
| "lb_loss": 0.9999999891207247 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 95000000, |
| "cumulative_training_bytes": 95004271, |
| "metrics": { |
| "loss": 0.4963107445261611, |
| "ce_loss": 0.48631075406290425, |
| "lb_loss": 0.9999999891373812 |
| } |
| }, |
| { |
| "epoch": 2, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.4962876345627106, |
| "ce_loss": 0.48628764409945374, |
| "lb_loss": 0.999999989168886, |
| "training_bytes": 47653416 |
| }, |
| "cumulative_training_bytes": 95306825, |
| "training_bytes_this_epoch": 47653416 |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 96000000, |
| "cumulative_training_bytes": 96003218, |
| "metrics": { |
| "loss": 0.49025411134237773, |
| "ce_loss": 0.4802541208791209, |
| "lb_loss": 0.9999999908300546 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 97000000, |
| "cumulative_training_bytes": 97000816, |
| "metrics": { |
| "loss": 0.4910255136533021, |
| "ce_loss": 0.48102552319004527, |
| "lb_loss": 0.9999999905603504 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 98000000, |
| "cumulative_training_bytes": 98005358, |
| "metrics": { |
| "loss": 0.49233333855107553, |
| "ce_loss": 0.4823333480878187, |
| "lb_loss": 0.9999999910508607 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 99000000, |
| "cumulative_training_bytes": 99000141, |
| "metrics": { |
| "loss": 0.4918436110636709, |
| "ce_loss": 0.4818436206004141, |
| "lb_loss": 0.999999992102076 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 100000000, |
| "cumulative_training_bytes": 100005926, |
| "metrics": { |
| "loss": 0.4912067290626054, |
| "ce_loss": 0.48120673859934854, |
| "lb_loss": 0.9999999912631629 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 101000000, |
| "cumulative_training_bytes": 101001458, |
| "metrics": { |
| "loss": 0.4909990244014289, |
| "ce_loss": 0.48099903393817206, |
| "lb_loss": 0.999999990947144 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 102000000, |
| "cumulative_training_bytes": 102004630, |
| "metrics": { |
| "loss": 0.49028549532595705, |
| "ce_loss": 0.4802855048627002, |
| "lb_loss": 0.9999999912707156 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 103000000, |
| "cumulative_training_bytes": 103004382, |
| "metrics": { |
| "loss": 0.490558137229426, |
| "ce_loss": 0.48055814676616915, |
| "lb_loss": 0.99999999092586 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 104000000, |
| "cumulative_training_bytes": 104002283, |
| "metrics": { |
| "loss": 0.49042572008880747, |
| "ce_loss": 0.48042572962555063, |
| "lb_loss": 0.9999999908623717 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 105000000, |
| "cumulative_training_bytes": 105006513, |
| "metrics": { |
| "loss": 0.49059360480816605, |
| "ce_loss": 0.4805936143449092, |
| "lb_loss": 0.9999999903559967 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 106000000, |
| "cumulative_training_bytes": 106006613, |
| "metrics": { |
| "loss": 0.4903415147116462, |
| "ce_loss": 0.4803415242483894, |
| "lb_loss": 0.9999999906561079 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 107000000, |
| "cumulative_training_bytes": 107005607, |
| "metrics": { |
| "loss": 0.4906465298378475, |
| "ce_loss": 0.4806465393745907, |
| "lb_loss": 0.9999999903976801 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 108000000, |
| "cumulative_training_bytes": 108001197, |
| "metrics": { |
| "loss": 0.4906608704421343, |
| "ce_loss": 0.48066087997887746, |
| "lb_loss": 0.9999999902877164 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 109000000, |
| "cumulative_training_bytes": 109001691, |
| "metrics": { |
| "loss": 0.49069485728372664, |
| "ce_loss": 0.4806948668204698, |
| "lb_loss": 0.9999999900325566 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 110000000, |
| "cumulative_training_bytes": 110007304, |
| "metrics": { |
| "loss": 0.4906437990875403, |
| "ce_loss": 0.48064380862428346, |
| "lb_loss": 0.9999999899985953 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 111000000, |
| "cumulative_training_bytes": 111006246, |
| "metrics": { |
| "loss": 0.49070311546325684, |
| "ce_loss": 0.480703125, |
| "lb_loss": 0.9999999900562008 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 112000000, |
| "cumulative_training_bytes": 112006808, |
| "metrics": { |
| "loss": 0.4907320227878786, |
| "ce_loss": 0.48073203232462175, |
| "lb_loss": 0.9999999894783181 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 113000000, |
| "cumulative_training_bytes": 113006280, |
| "metrics": { |
| "loss": 0.4907356900739835, |
| "ce_loss": 0.48073569961072665, |
| "lb_loss": 0.999999989610436 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 114000000, |
| "cumulative_training_bytes": 114000244, |
| "metrics": { |
| "loss": 0.4906710912515451, |
| "ce_loss": 0.4806711007882883, |
| "lb_loss": 0.9999999897974031 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 115000000, |
| "cumulative_training_bytes": 115000090, |
| "metrics": { |
| "loss": 0.49064408903496304, |
| "ce_loss": 0.4806440985717062, |
| "lb_loss": 0.9999999897608811 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 116000000, |
| "cumulative_training_bytes": 116003964, |
| "metrics": { |
| "loss": 0.4908688999492036, |
| "ce_loss": 0.48086890948594674, |
| "lb_loss": 0.9999999897499409 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 117000000, |
| "cumulative_training_bytes": 117001141, |
| "metrics": { |
| "loss": 0.49077886969755463, |
| "ce_loss": 0.4807788792342978, |
| "lb_loss": 0.9999999896522636 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 118000000, |
| "cumulative_training_bytes": 118002964, |
| "metrics": { |
| "loss": 0.49081061967910844, |
| "ce_loss": 0.4808106292158516, |
| "lb_loss": 0.9999999897073936 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 119000000, |
| "cumulative_training_bytes": 119004829, |
| "metrics": { |
| "loss": 0.49074190038735244, |
| "ce_loss": 0.4807419099240956, |
| "lb_loss": 0.9999999899118753 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 120000000, |
| "cumulative_training_bytes": 120005174, |
| "metrics": { |
| "loss": 0.49069510202198013, |
| "ce_loss": 0.4806951115587233, |
| "lb_loss": 0.999999989785755 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 121000000, |
| "cumulative_training_bytes": 121000398, |
| "metrics": { |
| "loss": 0.4906328099449369, |
| "ce_loss": 0.4806328194816801, |
| "lb_loss": 0.9999999898084403 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 122000000, |
| "cumulative_training_bytes": 122005153, |
| "metrics": { |
| "loss": 0.4905734521533371, |
| "ce_loss": 0.48057346169008025, |
| "lb_loss": 0.9999999895931111 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 123000000, |
| "cumulative_training_bytes": 123002062, |
| "metrics": { |
| "loss": 0.49056105234136627, |
| "ce_loss": 0.48056106187810943, |
| "lb_loss": 0.9999999894398626 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 124000000, |
| "cumulative_training_bytes": 124006089, |
| "metrics": { |
| "loss": 0.4904723872690717, |
| "ce_loss": 0.4804723968058149, |
| "lb_loss": 0.9999999896498737 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 125000000, |
| "cumulative_training_bytes": 125006477, |
| "metrics": { |
| "loss": 0.4903383307249222, |
| "ce_loss": 0.4803383402616654, |
| "lb_loss": 0.9999999898584517 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 126000000, |
| "cumulative_training_bytes": 126002630, |
| "metrics": { |
| "loss": 0.49058030584739254, |
| "ce_loss": 0.4805803153841357, |
| "lb_loss": 0.9999999897561486 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 127000000, |
| "cumulative_training_bytes": 127007067, |
| "metrics": { |
| "loss": 0.49066594004055153, |
| "ce_loss": 0.4806659495772947, |
| "lb_loss": 0.9999999898067419 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 128000000, |
| "cumulative_training_bytes": 128000583, |
| "metrics": { |
| "loss": 0.49058034760611396, |
| "ce_loss": 0.48058035714285713, |
| "lb_loss": 0.999999989768102 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 129000000, |
| "cumulative_training_bytes": 129007289, |
| "metrics": { |
| "loss": 0.4905069065050655, |
| "ce_loss": 0.4805069160418087, |
| "lb_loss": 0.9999999897476218 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 130000000, |
| "cumulative_training_bytes": 130006166, |
| "metrics": { |
| "loss": 0.49045753542133275, |
| "ce_loss": 0.4804575449580759, |
| "lb_loss": 0.9999999899782128 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 131000000, |
| "cumulative_training_bytes": 131001304, |
| "metrics": { |
| "loss": 0.4904289406187695, |
| "ce_loss": 0.4804289501555127, |
| "lb_loss": 0.9999999901426038 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 132000000, |
| "cumulative_training_bytes": 132007108, |
| "metrics": { |
| "loss": 0.4903701265992885, |
| "ce_loss": 0.4803701361360317, |
| "lb_loss": 0.9999999899394623 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 133000000, |
| "cumulative_training_bytes": 133003089, |
| "metrics": { |
| "loss": 0.49030012820954977, |
| "ce_loss": 0.48030013774629293, |
| "lb_loss": 0.9999999899266576 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 134000000, |
| "cumulative_training_bytes": 134000170, |
| "metrics": { |
| "loss": 0.49024726003084046, |
| "ce_loss": 0.4802472695675836, |
| "lb_loss": 0.999999989902716 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 135000000, |
| "cumulative_training_bytes": 135007268, |
| "metrics": { |
| "loss": 0.4902310506127265, |
| "ce_loss": 0.48023106014946965, |
| "lb_loss": 0.999999989883879 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 136000000, |
| "cumulative_training_bytes": 136002367, |
| "metrics": { |
| "loss": 0.49015822482355786, |
| "ce_loss": 0.48015823436030103, |
| "lb_loss": 0.9999999898845927 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 137000000, |
| "cumulative_training_bytes": 137002293, |
| "metrics": { |
| "loss": 0.49018864670178053, |
| "ce_loss": 0.4801886562385237, |
| "lb_loss": 0.9999999900512997 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 138000000, |
| "cumulative_training_bytes": 138004174, |
| "metrics": { |
| "loss": 0.4901451457887006, |
| "ce_loss": 0.4801451553254438, |
| "lb_loss": 0.9999999901139867 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 139000000, |
| "cumulative_training_bytes": 139006240, |
| "metrics": { |
| "loss": 0.4903390567955974, |
| "ce_loss": 0.4803390663323406, |
| "lb_loss": 0.999999990163354 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 140000000, |
| "cumulative_training_bytes": 140006436, |
| "metrics": { |
| "loss": 0.49048212032282185, |
| "ce_loss": 0.480482129859565, |
| "lb_loss": 0.9999999901594661 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 141000000, |
| "cumulative_training_bytes": 141007445, |
| "metrics": { |
| "loss": 0.4905080058343408, |
| "ce_loss": 0.48050801537108395, |
| "lb_loss": 0.9999999901041711 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 142000000, |
| "cumulative_training_bytes": 142004918, |
| "metrics": { |
| "loss": 0.4905039665249063, |
| "ce_loss": 0.48050397606164946, |
| "lb_loss": 0.9999999901685075 |
| } |
| }, |
| { |
| "epoch": 3, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.49051486986155374, |
| "ce_loss": 0.4805148793982969, |
| "lb_loss": 0.9999999901265442, |
| "training_bytes": 47653391 |
| }, |
| "cumulative_training_bytes": 142960216, |
| "training_bytes_this_epoch": 47653391 |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 143000000, |
| "cumulative_training_bytes": 143005202, |
| "metrics": { |
| "loss": 0.4950260321299235, |
| "ce_loss": 0.4850260416666667, |
| "lb_loss": 0.9999999701976776 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 144000000, |
| "cumulative_training_bytes": 144006005, |
| "metrics": { |
| "loss": 0.4904259713026729, |
| "ce_loss": 0.48042598083941607, |
| "lb_loss": 0.9999999908635216 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 145000000, |
| "cumulative_training_bytes": 145001749, |
| "metrics": { |
| "loss": 0.4900371510437812, |
| "ce_loss": 0.48003716058052437, |
| "lb_loss": 0.9999999908472268 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 146000000, |
| "cumulative_training_bytes": 146005280, |
| "metrics": { |
| "loss": 0.4904491602627556, |
| "ce_loss": 0.48044916979949875, |
| "lb_loss": 0.9999999887961194 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 147000000, |
| "cumulative_training_bytes": 147006364, |
| "metrics": { |
| "loss": 0.49022183598212477, |
| "ce_loss": 0.48022184551886793, |
| "lb_loss": 0.9999999902158413 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 148000000, |
| "cumulative_training_bytes": 148004606, |
| "metrics": { |
| "loss": 0.4898206580768932, |
| "ce_loss": 0.47982066761363634, |
| "lb_loss": 0.9999999900658926 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 149000000, |
| "cumulative_training_bytes": 149001684, |
| "metrics": { |
| "loss": 0.48951690106452267, |
| "ce_loss": 0.47951691060126583, |
| "lb_loss": 0.9999999901161918 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 150000000, |
| "cumulative_training_bytes": 150003252, |
| "metrics": { |
| "loss": 0.4902524334599995, |
| "ce_loss": 0.4802524429967427, |
| "lb_loss": 0.9999999897099473 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 151000000, |
| "cumulative_training_bytes": 151004021, |
| "metrics": { |
| "loss": 0.4901546794499362, |
| "ce_loss": 0.48015468898667935, |
| "lb_loss": 0.9999999898484954 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 152000000, |
| "cumulative_training_bytes": 152003583, |
| "metrics": { |
| "loss": 0.4901396364200731, |
| "ce_loss": 0.48013964595681624, |
| "lb_loss": 0.9999999896032542 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 153000000, |
| "cumulative_training_bytes": 153004258, |
| "metrics": { |
| "loss": 0.49013379143505564, |
| "ce_loss": 0.4801338009717988, |
| "lb_loss": 0.9999999890058506 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 154000000, |
| "cumulative_training_bytes": 154004288, |
| "metrics": { |
| "loss": 0.4900680994376158, |
| "ce_loss": 0.480068108974359, |
| "lb_loss": 0.999999989632179 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 155000000, |
| "cumulative_training_bytes": 155004149, |
| "metrics": { |
| "loss": 0.4901411515178947, |
| "ce_loss": 0.4801411610546379, |
| "lb_loss": 0.9999999897755691 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 156000000, |
| "cumulative_training_bytes": 156001930, |
| "metrics": { |
| "loss": 0.4899712896123179, |
| "ce_loss": 0.47997129914906106, |
| "lb_loss": 0.9999999894012868 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 157000000, |
| "cumulative_training_bytes": 157005966, |
| "metrics": { |
| "loss": 0.4899014294959544, |
| "ce_loss": 0.47990143903269755, |
| "lb_loss": 0.9999999894758012 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 158000000, |
| "cumulative_training_bytes": 158006659, |
| "metrics": { |
| "loss": 0.48980809543528125, |
| "ce_loss": 0.4798081049720244, |
| "lb_loss": 0.9999999895403854 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 159000000, |
| "cumulative_training_bytes": 159001028, |
| "metrics": { |
| "loss": 0.4895588359286506, |
| "ce_loss": 0.4795588454653938, |
| "lb_loss": 0.9999999895585181 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 160000000, |
| "cumulative_training_bytes": 160001860, |
| "metrics": { |
| "loss": 0.4894983198657726, |
| "ce_loss": 0.47949832940251574, |
| "lb_loss": 0.9999999894232549 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 161000000, |
| "cumulative_training_bytes": 161000396, |
| "metrics": { |
| "loss": 0.4892045148159733, |
| "ce_loss": 0.4792045243527165, |
| "lb_loss": 0.9999999891972906 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 162000000, |
| "cumulative_training_bytes": 162002358, |
| "metrics": { |
| "loss": 0.4891760811347486, |
| "ce_loss": 0.47917609067149175, |
| "lb_loss": 0.9999999890233505 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 163000000, |
| "cumulative_training_bytes": 163000910, |
| "metrics": { |
| "loss": 0.4890335630177085, |
| "ce_loss": 0.47903357255445167, |
| "lb_loss": 0.9999999890675471 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 164000000, |
| "cumulative_training_bytes": 164005597, |
| "metrics": { |
| "loss": 0.48890226029586237, |
| "ce_loss": 0.47890226983260553, |
| "lb_loss": 0.9999999888729321 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 165000000, |
| "cumulative_training_bytes": 165002975, |
| "metrics": { |
| "loss": 0.4889194060730553, |
| "ce_loss": 0.47891941560979845, |
| "lb_loss": 0.9999999890234671 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 166000000, |
| "cumulative_training_bytes": 166007294, |
| "metrics": { |
| "loss": 0.48903683825322025, |
| "ce_loss": 0.4790368477899634, |
| "lb_loss": 0.9999999888872696 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 167000000, |
| "cumulative_training_bytes": 167001945, |
| "metrics": { |
| "loss": 0.4890494737780068, |
| "ce_loss": 0.47904948331474995, |
| "lb_loss": 0.9999999891006479 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 168000000, |
| "cumulative_training_bytes": 168005336, |
| "metrics": { |
| "loss": 0.48906435342565363, |
| "ce_loss": 0.4790643629623968, |
| "lb_loss": 0.9999999890849336 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 169000000, |
| "cumulative_training_bytes": 169002071, |
| "metrics": { |
| "loss": 0.48898078195840533, |
| "ce_loss": 0.4789807914951485, |
| "lb_loss": 0.9999999892392673 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 170000000, |
| "cumulative_training_bytes": 170002507, |
| "metrics": { |
| "loss": 0.48883532836328514, |
| "ce_loss": 0.4788353379000283, |
| "lb_loss": 0.9999999893484761 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 171000000, |
| "cumulative_training_bytes": 171005319, |
| "metrics": { |
| "loss": 0.48872788846981063, |
| "ce_loss": 0.4787278980065538, |
| "lb_loss": 0.9999999894365335 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 172000000, |
| "cumulative_training_bytes": 172007475, |
| "metrics": { |
| "loss": 0.4886464073825819, |
| "ce_loss": 0.4786464169193251, |
| "lb_loss": 0.999999989424222 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 173000000, |
| "cumulative_training_bytes": 173006995, |
| "metrics": { |
| "loss": 0.48865697313400097, |
| "ce_loss": 0.47865698267074414, |
| "lb_loss": 0.9999999893671633 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 174000000, |
| "cumulative_training_bytes": 174002372, |
| "metrics": { |
| "loss": 0.48858499138826916, |
| "ce_loss": 0.4785850009250123, |
| "lb_loss": 0.9999999893993713 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 175000000, |
| "cumulative_training_bytes": 175000872, |
| "metrics": { |
| "loss": 0.48849087463510193, |
| "ce_loss": 0.4784908841718451, |
| "lb_loss": 0.9999999894580696 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 176000000, |
| "cumulative_training_bytes": 176007018, |
| "metrics": { |
| "loss": 0.4885006819310511, |
| "ce_loss": 0.4785006914677943, |
| "lb_loss": 0.9999999893523677 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 177000000, |
| "cumulative_training_bytes": 177003062, |
| "metrics": { |
| "loss": 0.4884071085188124, |
| "ce_loss": 0.4784071180555556, |
| "lb_loss": 0.9999999894492003 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 178000000, |
| "cumulative_training_bytes": 178005739, |
| "metrics": { |
| "loss": 0.4883760760542553, |
| "ce_loss": 0.4783760855909985, |
| "lb_loss": 0.9999999893214313 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 179000000, |
| "cumulative_training_bytes": 179002039, |
| "metrics": { |
| "loss": 0.48841644468038026, |
| "ce_loss": 0.4784164542171234, |
| "lb_loss": 0.9999999892871193 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 180000000, |
| "cumulative_training_bytes": 180001975, |
| "metrics": { |
| "loss": 0.4885168265783871, |
| "ce_loss": 0.47851683611513024, |
| "lb_loss": 0.9999999893307933 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 181000000, |
| "cumulative_training_bytes": 181002156, |
| "metrics": { |
| "loss": 0.4885435228641423, |
| "ce_loss": 0.4785435324008855, |
| "lb_loss": 0.9999999895041126 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 182000000, |
| "cumulative_training_bytes": 182006789, |
| "metrics": { |
| "loss": 0.48842715038972745, |
| "ce_loss": 0.4784271599264706, |
| "lb_loss": 0.9999999895516564 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 183000000, |
| "cumulative_training_bytes": 183001003, |
| "metrics": { |
| "loss": 0.4883744527003505, |
| "ce_loss": 0.4783744622370937, |
| "lb_loss": 0.9999999895720363 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 184000000, |
| "cumulative_training_bytes": 184002846, |
| "metrics": { |
| "loss": 0.4883268971737586, |
| "ce_loss": 0.47832690671050176, |
| "lb_loss": 0.9999999894599509 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 185000000, |
| "cumulative_training_bytes": 185004724, |
| "metrics": { |
| "loss": 0.4882663181899502, |
| "ce_loss": 0.47826632772669336, |
| "lb_loss": 0.9999999894400365 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 186000000, |
| "cumulative_training_bytes": 186007260, |
| "metrics": { |
| "loss": 0.48828150444600626, |
| "ce_loss": 0.47828151398274943, |
| "lb_loss": 0.9999999894740508 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 187000000, |
| "cumulative_training_bytes": 187007019, |
| "metrics": { |
| "loss": 0.4882290801773191, |
| "ce_loss": 0.47822908971406225, |
| "lb_loss": 0.9999999894010861 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 188000000, |
| "cumulative_training_bytes": 188003736, |
| "metrics": { |
| "loss": 0.488216156216274, |
| "ce_loss": 0.4782161657530172, |
| "lb_loss": 0.9999999894326629 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 189000000, |
| "cumulative_training_bytes": 189007403, |
| "metrics": { |
| "loss": 0.4881525303701408, |
| "ce_loss": 0.47815253990688394, |
| "lb_loss": 0.9999999895538252 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 190000000, |
| "cumulative_training_bytes": 190003337, |
| "metrics": { |
| "loss": 0.4880743821461995, |
| "ce_loss": 0.4780743916829427, |
| "lb_loss": 0.9999999895905299 |
| } |
| }, |
| { |
| "epoch": 4, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.488021058104645, |
| "ce_loss": 0.4780210676413882, |
| "lb_loss": 0.9999999895423727, |
| "training_bytes": 47653398 |
| }, |
| "cumulative_training_bytes": 190613614, |
| "training_bytes_this_epoch": 47653398 |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 191000000, |
| "cumulative_training_bytes": 191004295, |
| "metrics": { |
| "loss": 0.48361365467894313, |
| "ce_loss": 0.4736136642156863, |
| "lb_loss": 0.9999999906502518 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 192000000, |
| "cumulative_training_bytes": 192003486, |
| "metrics": { |
| "loss": 0.4822246106290027, |
| "ce_loss": 0.47222462016574585, |
| "lb_loss": 0.9999999911086994 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 193000000, |
| "cumulative_training_bytes": 193000756, |
| "metrics": { |
| "loss": 0.48206590686197065, |
| "ce_loss": 0.4720659163987138, |
| "lb_loss": 0.9999999875424376 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 194000000, |
| "cumulative_training_bytes": 194006438, |
| "metrics": { |
| "loss": 0.4826827534723066, |
| "ce_loss": 0.47268276300904977, |
| "lb_loss": 0.9999999888072726 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 195000000, |
| "cumulative_training_bytes": 195005382, |
| "metrics": { |
| "loss": 0.48297037944927085, |
| "ce_loss": 0.472970388986014, |
| "lb_loss": 0.9999999882249565 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 196000000, |
| "cumulative_training_bytes": 196002015, |
| "metrics": { |
| "loss": 0.4832766776071315, |
| "ce_loss": 0.47327668714387466, |
| "lb_loss": 0.999999988792289 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 197000000, |
| "cumulative_training_bytes": 197006361, |
| "metrics": { |
| "loss": 0.48392085377260935, |
| "ce_loss": 0.4739208633093525, |
| "lb_loss": 0.9999999878503721 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 198000000, |
| "cumulative_training_bytes": 198003880, |
| "metrics": { |
| "loss": 0.483928608201846, |
| "ce_loss": 0.4739286177385892, |
| "lb_loss": 0.99999998751023 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 199000000, |
| "cumulative_training_bytes": 199006196, |
| "metrics": { |
| "loss": 0.48404037288334817, |
| "ce_loss": 0.47404038242009133, |
| "lb_loss": 0.9999999879702041 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 200000000, |
| "cumulative_training_bytes": 200002073, |
| "metrics": { |
| "loss": 0.4839540720959099, |
| "ce_loss": 0.47395408163265307, |
| "lb_loss": 0.999999988760267 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 201000000, |
| "cumulative_training_bytes": 201002611, |
| "metrics": { |
| "loss": 0.4842147074617819, |
| "ce_loss": 0.4742147169985251, |
| "lb_loss": 0.9999999887032495 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 202000000, |
| "cumulative_training_bytes": 202000755, |
| "metrics": { |
| "loss": 0.48400739288586786, |
| "ce_loss": 0.474007402422611, |
| "lb_loss": 0.9999999884480903 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 203000000, |
| "cumulative_training_bytes": 203001562, |
| "metrics": { |
| "loss": 0.4841745324391381, |
| "ce_loss": 0.47417454197588127, |
| "lb_loss": 0.9999999887573181 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 204000000, |
| "cumulative_training_bytes": 204005682, |
| "metrics": { |
| "loss": 0.48423728844666647, |
| "ce_loss": 0.47423729798340963, |
| "lb_loss": 0.9999999889179008 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 205000000, |
| "cumulative_training_bytes": 205003502, |
| "metrics": { |
| "loss": 0.484211044443555, |
| "ce_loss": 0.47421105398029817, |
| "lb_loss": 0.9999999888598348 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 206000000, |
| "cumulative_training_bytes": 206008019, |
| "metrics": { |
| "loss": 0.48419132477137033, |
| "ce_loss": 0.4741913343081135, |
| "lb_loss": 0.9999999889632016 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 207000000, |
| "cumulative_training_bytes": 207007717, |
| "metrics": { |
| "loss": 0.4842689376011073, |
| "ce_loss": 0.47426894713785045, |
| "lb_loss": 0.9999999892488818 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 208000000, |
| "cumulative_training_bytes": 208005208, |
| "metrics": { |
| "loss": 0.4841757200888075, |
| "ce_loss": 0.47417572962555066, |
| "lb_loss": 0.999999989313176 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 209000000, |
| "cumulative_training_bytes": 209005329, |
| "metrics": { |
| "loss": 0.4841251532236735, |
| "ce_loss": 0.47412516276041666, |
| "lb_loss": 0.9999999895443519 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 210000000, |
| "cumulative_training_bytes": 210006121, |
| "metrics": { |
| "loss": 0.4841085443945093, |
| "ce_loss": 0.47410855393125245, |
| "lb_loss": 0.9999999895909708 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 211000000, |
| "cumulative_training_bytes": 211003532, |
| "metrics": { |
| "loss": 0.48414475511940114, |
| "ce_loss": 0.4741447646561443, |
| "lb_loss": 0.9999999894947094 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 212000000, |
| "cumulative_training_bytes": 212007723, |
| "metrics": { |
| "loss": 0.48415256366347176, |
| "ce_loss": 0.4741525732002149, |
| "lb_loss": 0.9999999894966027 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 213000000, |
| "cumulative_training_bytes": 213005205, |
| "metrics": { |
| "loss": 0.48423584617656507, |
| "ce_loss": 0.47423585571330823, |
| "lb_loss": 0.9999999894371515 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 214000000, |
| "cumulative_training_bytes": 214007542, |
| "metrics": { |
| "loss": 0.4842972747625365, |
| "ce_loss": 0.47429728429927964, |
| "lb_loss": 0.9999999894023176 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 215000000, |
| "cumulative_training_bytes": 215006636, |
| "metrics": { |
| "loss": 0.48420266889447544, |
| "ce_loss": 0.4742026784312186, |
| "lb_loss": 0.9999999897414117 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 216000000, |
| "cumulative_training_bytes": 216002411, |
| "metrics": { |
| "loss": 0.48442725453235763, |
| "ce_loss": 0.4744272640691008, |
| "lb_loss": 0.9999999896582165 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 217000000, |
| "cumulative_training_bytes": 217003351, |
| "metrics": { |
| "loss": 0.48465302021652246, |
| "ce_loss": 0.4746530297532656, |
| "lb_loss": 0.9999999896708351 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 218000000, |
| "cumulative_training_bytes": 218001934, |
| "metrics": { |
| "loss": 0.48466454465906106, |
| "ce_loss": 0.4746645541958042, |
| "lb_loss": 0.9999999894795718 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 219000000, |
| "cumulative_training_bytes": 219001498, |
| "metrics": { |
| "loss": 0.48470003685327034, |
| "ce_loss": 0.4747000463900135, |
| "lb_loss": 0.9999999894626067 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 220000000, |
| "cumulative_training_bytes": 220000802, |
| "metrics": { |
| "loss": 0.48471733482440416, |
| "ce_loss": 0.4747173443611473, |
| "lb_loss": 0.9999999896022145 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 221000000, |
| "cumulative_training_bytes": 221002631, |
| "metrics": { |
| "loss": 0.4847172157847394, |
| "ce_loss": 0.4747172253214826, |
| "lb_loss": 0.9999999895699386 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 222000000, |
| "cumulative_training_bytes": 222001546, |
| "metrics": { |
| "loss": 0.48476429971392804, |
| "ce_loss": 0.4747643092506712, |
| "lb_loss": 0.9999999897434038 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 223000000, |
| "cumulative_training_bytes": 223002095, |
| "metrics": { |
| "loss": 0.4846939223298289, |
| "ce_loss": 0.47469393186657205, |
| "lb_loss": 0.9999999896217131 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 224000000, |
| "cumulative_training_bytes": 224004704, |
| "metrics": { |
| "loss": 0.4846698684175937, |
| "ce_loss": 0.47466987795433685, |
| "lb_loss": 0.9999999897695562 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 225000000, |
| "cumulative_training_bytes": 225002022, |
| "metrics": { |
| "loss": 0.48464635646704474, |
| "ce_loss": 0.4746463660037879, |
| "lb_loss": 0.9999999898666792 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 226000000, |
| "cumulative_training_bytes": 226003031, |
| "metrics": { |
| "loss": 0.4846968945550516, |
| "ce_loss": 0.47469690409179477, |
| "lb_loss": 0.999999989960508 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 227000000, |
| "cumulative_training_bytes": 227007213, |
| "metrics": { |
| "loss": 0.48466569770009893, |
| "ce_loss": 0.4746657072368421, |
| "lb_loss": 0.9999999899738713 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 228000000, |
| "cumulative_training_bytes": 228002618, |
| "metrics": { |
| "loss": 0.4847170054192085, |
| "ce_loss": 0.47471701495595164, |
| "lb_loss": 0.9999999898766133 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 229000000, |
| "cumulative_training_bytes": 229002817, |
| "metrics": { |
| "loss": 0.4849277419846056, |
| "ce_loss": 0.47492775152134875, |
| "lb_loss": 0.9999999898439013 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 230000000, |
| "cumulative_training_bytes": 230004657, |
| "metrics": { |
| "loss": 0.48519230282958, |
| "ce_loss": 0.47519231236632314, |
| "lb_loss": 0.9999999897664978 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 231000000, |
| "cumulative_training_bytes": 231006924, |
| "metrics": { |
| "loss": 0.4853118831206326, |
| "ce_loss": 0.4753118926573758, |
| "lb_loss": 0.9999999898737654 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 232000000, |
| "cumulative_training_bytes": 232007018, |
| "metrics": { |
| "loss": 0.4854375916427203, |
| "ce_loss": 0.4754376011794635, |
| "lb_loss": 0.9999999898104178 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 233000000, |
| "cumulative_training_bytes": 233006236, |
| "metrics": { |
| "loss": 0.48551042782778, |
| "ce_loss": 0.47551043736452314, |
| "lb_loss": 0.9999999898362022 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 234000000, |
| "cumulative_training_bytes": 234000486, |
| "metrics": { |
| "loss": 0.4855104365451488, |
| "ce_loss": 0.47551044608189197, |
| "lb_loss": 0.9999999898169262 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 235000000, |
| "cumulative_training_bytes": 235002824, |
| "metrics": { |
| "loss": 0.485555099787045, |
| "ce_loss": 0.47555510932378814, |
| "lb_loss": 0.9999999898311206 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 236000000, |
| "cumulative_training_bytes": 236004788, |
| "metrics": { |
| "loss": 0.48557505530384387, |
| "ce_loss": 0.47557506484058704, |
| "lb_loss": 0.9999999897843591 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 237000000, |
| "cumulative_training_bytes": 237001532, |
| "metrics": { |
| "loss": 0.4854937203452311, |
| "ce_loss": 0.47549372988197425, |
| "lb_loss": 0.999999989826477 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 238000000, |
| "cumulative_training_bytes": 238004993, |
| "metrics": { |
| "loss": 0.485468689970447, |
| "ce_loss": 0.4754686995071902, |
| "lb_loss": 0.9999999898203087 |
| } |
| }, |
| { |
| "epoch": 5, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.48547107517566046, |
| "ce_loss": 0.4754710847124036, |
| "lb_loss": 0.9999999897626342, |
| "training_bytes": 47653400 |
| }, |
| "cumulative_training_bytes": 238267014, |
| "training_bytes_this_epoch": 47653400 |
| } |
| ] |
| } |