| { | |
| "run_name": "run_large_20251112_071557", | |
| "timestamp": "20251112_071557", | |
| "phase": "large", | |
| "config": { | |
| "arch_layout": [ | |
| "m4", | |
| [ | |
| "T22" | |
| ], | |
| "m4" | |
| ], | |
| "d_model": [ | |
| 1024, | |
| 1536 | |
| ], | |
| "d_intermediate": [ | |
| 0, | |
| 4096 | |
| ], | |
| "vocab_size": 256, | |
| "ssm_cfg": { | |
| "chunk_size": 256, | |
| "d_conv": 4, | |
| "d_state": 128, | |
| "expand": 2 | |
| }, | |
| "attn_cfg": { | |
| "num_heads": [ | |
| 16, | |
| 16 | |
| ], | |
| "rotary_emb_dim": [ | |
| 32, | |
| 48 | |
| ], | |
| "window_size": [ | |
| 1023, | |
| -1 | |
| ] | |
| }, | |
| "tie_embeddings": false | |
| }, | |
| "training_args": { | |
| "data": "datasets/moses/smiles-molecules-moses_all.csv", | |
| "max_samples": null, | |
| "batch_size": 16, | |
| "epochs": 5, | |
| "lr": 0.0001, | |
| "weight_decay": 0.1, | |
| "gradient_accumulation": 8, | |
| "concatenate": true, | |
| "num_concatenate": 10, | |
| "concatenate_separator": " ", | |
| "checkpoint_bytes": 1000000, | |
| "num_test_samples": 5, | |
| "num_visualize": 5, | |
| "skip_visualization": false | |
| }, | |
| "dataset_info": { | |
| "train_size": 193691, | |
| "test_size": 5, | |
| "test_smiles_file": "checkpoints/run_large_20251112_071557/test_smiles.txt" | |
| }, | |
| "model_info": { | |
| "num_parameters": 622923776, | |
| "device": "cuda", | |
| "dtype": "torch.bfloat16", | |
| "use_amp": true | |
| }, | |
| "training_history": [ | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 1000000, | |
| "cumulative_training_bytes": 1005180, | |
| "metrics": { | |
| "loss": 2.7717187923543594, | |
| "ce_loss": 2.76171875, | |
| "lb_loss": 0.9999999957926133 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 2000000, | |
| "cumulative_training_bytes": 2005824, | |
| "metrics": { | |
| "loss": 2.068881838019267, | |
| "ce_loss": 2.0588818215339235, | |
| "lb_loss": 0.999999996131852 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 3000000, | |
| "cumulative_training_bytes": 3000192, | |
| "metrics": { | |
| "loss": 1.7394671552985377, | |
| "ce_loss": 1.7294671474358974, | |
| "lb_loss": 0.9999999945920836 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 4000000, | |
| "cumulative_training_bytes": 4000199, | |
| "metrics": { | |
| "loss": 1.532599624444747, | |
| "ce_loss": 1.5225996209319526, | |
| "lb_loss": 0.9999999955913724 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 5000000, | |
| "cumulative_training_bytes": 5000601, | |
| "metrics": { | |
| "loss": 1.3878337656957864, | |
| "ce_loss": 1.3778337647928993, | |
| "lb_loss": 0.9999999956971795 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 6000000, | |
| "cumulative_training_bytes": 6001219, | |
| "metrics": { | |
| "loss": 1.2813033152378992, | |
| "ce_loss": 1.2713033160749507, | |
| "lb_loss": 0.9999999946508652 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 7000000, | |
| "cumulative_training_bytes": 7000167, | |
| "metrics": { | |
| "loss": 1.1998906361280814, | |
| "ce_loss": 1.189890638207946, | |
| "lb_loss": 0.9999999956669489 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 8000000, | |
| "cumulative_training_bytes": 8005274, | |
| "metrics": { | |
| "loss": 1.1353377880031588, | |
| "ce_loss": 1.1253377910199556, | |
| "lb_loss": 0.9999999970484027 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 9000000, | |
| "cumulative_training_bytes": 9004340, | |
| "metrics": { | |
| "loss": 1.082989381114694, | |
| "ce_loss": 1.0729893848554533, | |
| "lb_loss": 0.9999999974153045 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 10000000, | |
| "cumulative_training_bytes": 10005356, | |
| "metrics": { | |
| "loss": 1.0403814267858396, | |
| "ce_loss": 1.0303814311058546, | |
| "lb_loss": 0.999999997638373 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 11000000, | |
| "cumulative_training_bytes": 11004976, | |
| "metrics": { | |
| "loss": 1.0042456269264222, | |
| "ce_loss": 0.9942456317204301, | |
| "lb_loss": 0.9999999972761319 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 12000000, | |
| "cumulative_training_bytes": 12005836, | |
| "metrics": { | |
| "loss": 0.9732034663486622, | |
| "ce_loss": 0.9632034715377032, | |
| "lb_loss": 0.9999999970036108 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 13000000, | |
| "cumulative_training_bytes": 13000402, | |
| "metrics": { | |
| "loss": 0.9467905753842798, | |
| "ce_loss": 0.9367905809057806, | |
| "lb_loss": 0.9999999964188379 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 14000000, | |
| "cumulative_training_bytes": 14000972, | |
| "metrics": { | |
| "loss": 0.9233459631266074, | |
| "ce_loss": 0.9133459689349113, | |
| "lb_loss": 0.9999999965234823 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 15000000, | |
| "cumulative_training_bytes": 15000856, | |
| "metrics": { | |
| "loss": 0.9026359037064472, | |
| "ce_loss": 0.8926359097633136, | |
| "lb_loss": 0.9999999959087936 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 16000000, | |
| "cumulative_training_bytes": 16000794, | |
| "metrics": { | |
| "loss": 0.8842589041361442, | |
| "ce_loss": 0.874258910410503, | |
| "lb_loss": 0.9999999957015882 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 17000000, | |
| "cumulative_training_bytes": 17000571, | |
| "metrics": { | |
| "loss": 0.8677488623120169, | |
| "ce_loss": 0.8577488687782805, | |
| "lb_loss": 0.9999999955602527 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 18000000, | |
| "cumulative_training_bytes": 18004713, | |
| "metrics": { | |
| "loss": 0.8526691315482018, | |
| "ce_loss": 0.8426691381860006, | |
| "lb_loss": 0.9999999956319962 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 19000000, | |
| "cumulative_training_bytes": 19004995, | |
| "metrics": { | |
| "loss": 0.8389714284525119, | |
| "ce_loss": 0.8289714352428393, | |
| "lb_loss": 0.9999999954535685 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 20000000, | |
| "cumulative_training_bytes": 20004594, | |
| "metrics": { | |
| "loss": 0.8264131751930668, | |
| "ce_loss": 0.8164131821206744, | |
| "lb_loss": 0.9999999955926764 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 21000000, | |
| "cumulative_training_bytes": 21004699, | |
| "metrics": { | |
| "loss": 0.8155721760467745, | |
| "ce_loss": 0.8055721830985916, | |
| "lb_loss": 0.9999999949629877 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 22000000, | |
| "cumulative_training_bytes": 22004049, | |
| "metrics": { | |
| "loss": 0.8053173244432212, | |
| "ce_loss": 0.7953173316079591, | |
| "lb_loss": 0.9999999948072318 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 23000000, | |
| "cumulative_training_bytes": 23003811, | |
| "metrics": { | |
| "loss": 0.795621416021767, | |
| "ce_loss": 0.785621423289609, | |
| "lb_loss": 0.9999999948336509 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 24000000, | |
| "cumulative_training_bytes": 24003387, | |
| "metrics": { | |
| "loss": 0.7866196854525316, | |
| "ce_loss": 0.7766196928148879, | |
| "lb_loss": 0.999999995078246 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 25000000, | |
| "cumulative_training_bytes": 25003311, | |
| "metrics": { | |
| "loss": 0.7783351499098937, | |
| "ce_loss": 0.7683351573592049, | |
| "lb_loss": 0.9999999949365671 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 26000000, | |
| "cumulative_training_bytes": 26003789, | |
| "metrics": { | |
| "loss": 0.7705019833691698, | |
| "ce_loss": 0.7605019908987486, | |
| "lb_loss": 0.9999999948464698 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 27000000, | |
| "cumulative_training_bytes": 27005374, | |
| "metrics": { | |
| "loss": 0.7630905858996038, | |
| "ce_loss": 0.7530905935035057, | |
| "lb_loss": 0.999999994763045 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 28000000, | |
| "cumulative_training_bytes": 28004376, | |
| "metrics": { | |
| "loss": 0.7561506895592821, | |
| "ce_loss": 0.7461506972321994, | |
| "lb_loss": 0.9999999949374462 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 29000000, | |
| "cumulative_training_bytes": 29003450, | |
| "metrics": { | |
| "loss": 0.7496948872036664, | |
| "ce_loss": 0.7396948949408405, | |
| "lb_loss": 0.9999999950268667 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 30000000, | |
| "cumulative_training_bytes": 30002714, | |
| "metrics": { | |
| "loss": 0.7436015993809234, | |
| "ce_loss": 0.7336016071780714, | |
| "lb_loss": 0.999999995180851 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 31000000, | |
| "cumulative_training_bytes": 31003719, | |
| "metrics": { | |
| "loss": 0.7378081684349147, | |
| "ce_loss": 0.727808176288168, | |
| "lb_loss": 0.9999999952907781 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 32000000, | |
| "cumulative_training_bytes": 32002795, | |
| "metrics": { | |
| "loss": 0.7323312628928164, | |
| "ce_loss": 0.7223312707986689, | |
| "lb_loss": 0.9999999951954843 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 33000000, | |
| "cumulative_training_bytes": 33002982, | |
| "metrics": { | |
| "loss": 0.7271596202716984, | |
| "ce_loss": 0.7171596282269631, | |
| "lb_loss": 0.9999999955120203 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 34000000, | |
| "cumulative_training_bytes": 34002814, | |
| "metrics": { | |
| "loss": 0.7221670737365277, | |
| "ce_loss": 0.7121670817382982, | |
| "lb_loss": 0.9999999956336253 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 35000000, | |
| "cumulative_training_bytes": 35001491, | |
| "metrics": { | |
| "loss": 0.7178196230395731, | |
| "ce_loss": 0.7078196310851927, | |
| "lb_loss": 0.9999999956173056 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 36000000, | |
| "cumulative_training_bytes": 36002685, | |
| "metrics": { | |
| "loss": 0.7134658659885629, | |
| "ce_loss": 0.7034658740755957, | |
| "lb_loss": 0.9999999959055478 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 37000000, | |
| "cumulative_training_bytes": 37002731, | |
| "metrics": { | |
| "loss": 0.7092018776668845, | |
| "ce_loss": 0.6992018857930924, | |
| "lb_loss": 0.9999999959590071 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 38000000, | |
| "cumulative_training_bytes": 38001457, | |
| "metrics": { | |
| "loss": 0.7051239603872, | |
| "ce_loss": 0.6951239685505216, | |
| "lb_loss": 0.9999999958890149 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 39000000, | |
| "cumulative_training_bytes": 39001008, | |
| "metrics": { | |
| "loss": 0.7012219511840528, | |
| "ce_loss": 0.6912219593825849, | |
| "lb_loss": 0.9999999958406953 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 40000000, | |
| "cumulative_training_bytes": 40002237, | |
| "metrics": { | |
| "loss": 0.6974433710758123, | |
| "ce_loss": 0.6874433793077948, | |
| "lb_loss": 0.9999999959270306 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 41000000, | |
| "cumulative_training_bytes": 41003402, | |
| "metrics": { | |
| "loss": 0.6939161623711194, | |
| "ce_loss": 0.6839161706349206, | |
| "lb_loss": 0.9999999961123666 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 42000000, | |
| "cumulative_training_bytes": 42003174, | |
| "metrics": { | |
| "loss": 0.6905112212804828, | |
| "ce_loss": 0.680511229574588, | |
| "lb_loss": 0.9999999960034074 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 43000000, | |
| "cumulative_training_bytes": 43003031, | |
| "metrics": { | |
| "loss": 0.6871942206602144, | |
| "ce_loss": 0.6771942289832141, | |
| "lb_loss": 0.9999999960389319 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 44000000, | |
| "cumulative_training_bytes": 44004167, | |
| "metrics": { | |
| "loss": 0.6840069627062967, | |
| "ce_loss": 0.6740069710568778, | |
| "lb_loss": 0.9999999959686519 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 45000000, | |
| "cumulative_training_bytes": 45004421, | |
| "metrics": { | |
| "loss": 0.6809336089975044, | |
| "ce_loss": 0.6709336173744412, | |
| "lb_loss": 0.9999999959798602 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 46000000, | |
| "cumulative_training_bytes": 46004726, | |
| "metrics": { | |
| "loss": 0.6780240068711652, | |
| "ce_loss": 0.6680240152733119, | |
| "lb_loss": 0.9999999960059138 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 47000000, | |
| "cumulative_training_bytes": 47003808, | |
| "metrics": { | |
| "loss": 0.6751821455578549, | |
| "ce_loss": 0.6651821539841389, | |
| "lb_loss": 0.9999999959183123 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 48000000, | |
| "cumulative_training_bytes": 48003562, | |
| "metrics": { | |
| "loss": 0.6724254583939221, | |
| "ce_loss": 0.6624254668433378, | |
| "lb_loss": 0.9999999958784414 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 49000000, | |
| "cumulative_training_bytes": 49002795, | |
| "metrics": { | |
| "loss": 0.6697642958932843, | |
| "ce_loss": 0.6597643043648878, | |
| "lb_loss": 0.9999999957538348 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 50000000, | |
| "cumulative_training_bytes": 50002223, | |
| "metrics": { | |
| "loss": 0.6672086428501326, | |
| "ce_loss": 0.6572086513430363, | |
| "lb_loss": 0.9999999955707352 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 51000000, | |
| "cumulative_training_bytes": 51002866, | |
| "metrics": { | |
| "loss": 0.6648764597290906, | |
| "ce_loss": 0.6548764682424594, | |
| "lb_loss": 0.9999999957958673 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 52000000, | |
| "cumulative_training_bytes": 52002541, | |
| "metrics": { | |
| "loss": 0.662538409517926, | |
| "ce_loss": 0.6525384180509728, | |
| "lb_loss": 0.9999999958292347 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 53000000, | |
| "cumulative_training_bytes": 53002612, | |
| "metrics": { | |
| "loss": 0.6602227322941887, | |
| "ce_loss": 0.650222740846171, | |
| "lb_loss": 0.9999999958347279 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 54000000, | |
| "cumulative_training_bytes": 54002019, | |
| "metrics": { | |
| "loss": 0.6579624243212044, | |
| "ce_loss": 0.6479624328914211, | |
| "lb_loss": 0.9999999960620576 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 55000000, | |
| "cumulative_training_bytes": 55003666, | |
| "metrics": { | |
| "loss": 0.6557700134647078, | |
| "ce_loss": 0.6457700220524957, | |
| "lb_loss": 0.9999999959477048 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 56000000, | |
| "cumulative_training_bytes": 56004562, | |
| "metrics": { | |
| "loss": 0.653625767280107, | |
| "ce_loss": 0.6436257758848389, | |
| "lb_loss": 0.9999999958311384 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 57000000, | |
| "cumulative_training_bytes": 57003399, | |
| "metrics": { | |
| "loss": 0.6515486406938451, | |
| "ce_loss": 0.6415486493149263, | |
| "lb_loss": 0.999999995693914 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 58000000, | |
| "cumulative_training_bytes": 58004263, | |
| "metrics": { | |
| "loss": 0.6495315760820968, | |
| "ce_loss": 0.6395315847189635, | |
| "lb_loss": 0.9999999955675012 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 59000000, | |
| "cumulative_training_bytes": 59005055, | |
| "metrics": { | |
| "loss": 0.6475570260450345, | |
| "ce_loss": 0.6375570346971521, | |
| "lb_loss": 0.999999995678484 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 60000000, | |
| "cumulative_training_bytes": 60005248, | |
| "metrics": { | |
| "loss": 0.6456633104091682, | |
| "ce_loss": 0.635663319076028, | |
| "lb_loss": 0.9999999955741744 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 61000000, | |
| "cumulative_training_bytes": 61004559, | |
| "metrics": { | |
| "loss": 0.6438197430768832, | |
| "ce_loss": 0.633819751758002, | |
| "lb_loss": 0.9999999955889094 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 62000000, | |
| "cumulative_training_bytes": 62004749, | |
| "metrics": { | |
| "loss": 0.642214755655211, | |
| "ce_loss": 0.6322147643501288, | |
| "lb_loss": 0.999999995517849 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 63000000, | |
| "cumulative_training_bytes": 63005338, | |
| "metrics": { | |
| "loss": 0.6405543280803199, | |
| "ce_loss": 0.6305543367885988, | |
| "lb_loss": 0.999999995768115 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 64000000, | |
| "cumulative_training_bytes": 64005014, | |
| "metrics": { | |
| "loss": 0.638879157175052, | |
| "ce_loss": 0.6288791658962744, | |
| "lb_loss": 0.9999999957625986 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 65000000, | |
| "cumulative_training_bytes": 65000026, | |
| "metrics": { | |
| "loss": 0.6372423956814168, | |
| "ce_loss": 0.6272424044151115, | |
| "lb_loss": 0.9999999956700495 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 66000000, | |
| "cumulative_training_bytes": 66005586, | |
| "metrics": { | |
| "loss": 0.6356362670272628, | |
| "ce_loss": 0.6256362757731959, | |
| "lb_loss": 0.9999999957306938 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 67000000, | |
| "cumulative_training_bytes": 67004497, | |
| "metrics": { | |
| "loss": 0.6340658579854652, | |
| "ce_loss": 0.6240658667432003, | |
| "lb_loss": 0.9999999956786106 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 68000000, | |
| "cumulative_training_bytes": 68004301, | |
| "metrics": { | |
| "loss": 0.6325890467972416, | |
| "ce_loss": 0.6225890555664317, | |
| "lb_loss": 0.9999999957162241 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 69000000, | |
| "cumulative_training_bytes": 69003767, | |
| "metrics": { | |
| "loss": 0.6311319261365108, | |
| "ce_loss": 0.6211319349168238, | |
| "lb_loss": 0.9999999957016372 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 70000000, | |
| "cumulative_training_bytes": 70004275, | |
| "metrics": { | |
| "loss": 0.6296715307554119, | |
| "ce_loss": 0.6196715395465303, | |
| "lb_loss": 0.9999999958637974 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 71000000, | |
| "cumulative_training_bytes": 71003661, | |
| "metrics": { | |
| "loss": 0.6282464104692141, | |
| "ce_loss": 0.6182464192708333, | |
| "lb_loss": 0.9999999958972137 | |
| } | |
| }, | |
| { | |
| "epoch": 1, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.6273426957392759, | |
| "ce_loss": 0.6173427045473319, | |
| "lb_loss": 0.9999999960463795, | |
| "training_bytes": 71629728 | |
| }, | |
| "cumulative_training_bytes": 71629728, | |
| "training_bytes_this_epoch": 71629728 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 72000000, | |
| "cumulative_training_bytes": 72002782, | |
| "metrics": { | |
| "loss": 0.5245709230029394, | |
| "ce_loss": 0.5145709325396826, | |
| "lb_loss": 0.9999999943233672 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 73000000, | |
| "cumulative_training_bytes": 73002022, | |
| "metrics": { | |
| "loss": 0.5237897264546362, | |
| "ce_loss": 0.5137897359913793, | |
| "lb_loss": 0.9999999958893349 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 74000000, | |
| "cumulative_training_bytes": 74002053, | |
| "metrics": { | |
| "loss": 0.523175060600414, | |
| "ce_loss": 0.5131750701371571, | |
| "lb_loss": 0.9999999982163199 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 75000000, | |
| "cumulative_training_bytes": 75001463, | |
| "metrics": { | |
| "loss": 0.5227672602000989, | |
| "ce_loss": 0.5127672697368421, | |
| "lb_loss": 0.9999999993725827 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 76000000, | |
| "cumulative_training_bytes": 76001540, | |
| "metrics": { | |
| "loss": 0.5225935501723231, | |
| "ce_loss": 0.5125935597090663, | |
| "lb_loss": 0.999999997419014 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 77000000, | |
| "cumulative_training_bytes": 77001527, | |
| "metrics": { | |
| "loss": 0.5224307276603934, | |
| "ce_loss": 0.5124307371971366, | |
| "lb_loss": 0.9999999961926549 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 78000000, | |
| "cumulative_training_bytes": 78001457, | |
| "metrics": { | |
| "loss": 0.522277294316553, | |
| "ce_loss": 0.5122773038532962, | |
| "lb_loss": 0.9999999968454366 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 79000000, | |
| "cumulative_training_bytes": 79001702, | |
| "metrics": { | |
| "loss": 0.522033811189581, | |
| "ce_loss": 0.5120338207263242, | |
| "lb_loss": 0.9999999971297924 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 80000000, | |
| "cumulative_training_bytes": 80002593, | |
| "metrics": { | |
| "loss": 0.5217684312759777, | |
| "ce_loss": 0.5117684408127209, | |
| "lb_loss": 0.999999997514718 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 81000000, | |
| "cumulative_training_bytes": 81002708, | |
| "metrics": { | |
| "loss": 0.5216262629537871, | |
| "ce_loss": 0.5116262724905303, | |
| "lb_loss": 0.9999999968015184 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 82000000, | |
| "cumulative_training_bytes": 82002370, | |
| "metrics": { | |
| "loss": 0.5214791957541867, | |
| "ce_loss": 0.5114792052909298, | |
| "lb_loss": 0.9999999966678521 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 83000000, | |
| "cumulative_training_bytes": 83002802, | |
| "metrics": { | |
| "loss": 0.5213061656713733, | |
| "ce_loss": 0.5113061752081165, | |
| "lb_loss": 0.9999999966197157 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 84000000, | |
| "cumulative_training_bytes": 84001825, | |
| "metrics": { | |
| "loss": 0.5217131360873601, | |
| "ce_loss": 0.5117131456241033, | |
| "lb_loss": 0.9999999977195736 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 85000000, | |
| "cumulative_training_bytes": 85001219, | |
| "metrics": { | |
| "loss": 0.5217455311159117, | |
| "ce_loss": 0.5117455406526549, | |
| "lb_loss": 0.9999999976791112 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 86000000, | |
| "cumulative_training_bytes": 86001317, | |
| "metrics": { | |
| "loss": 0.5215764172335327, | |
| "ce_loss": 0.5115764267702758, | |
| "lb_loss": 0.9999999972025815 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 87000000, | |
| "cumulative_training_bytes": 87002232, | |
| "metrics": { | |
| "loss": 0.5213811911464747, | |
| "ce_loss": 0.5113812006832179, | |
| "lb_loss": 0.9999999972010136 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 88000000, | |
| "cumulative_training_bytes": 88002341, | |
| "metrics": { | |
| "loss": 0.5211851074672322, | |
| "ce_loss": 0.5111851170039754, | |
| "lb_loss": 0.999999997091931 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 89000000, | |
| "cumulative_training_bytes": 89002058, | |
| "metrics": { | |
| "loss": 0.5209484002895511, | |
| "ce_loss": 0.5109484098262943, | |
| "lb_loss": 0.9999999970157075 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 90000000, | |
| "cumulative_training_bytes": 90002506, | |
| "metrics": { | |
| "loss": 0.5207305409866063, | |
| "ce_loss": 0.5107305505233495, | |
| "lb_loss": 0.999999997312512 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 91000000, | |
| "cumulative_training_bytes": 91001385, | |
| "metrics": { | |
| "loss": 0.5205339798493289, | |
| "ce_loss": 0.5105339893860721, | |
| "lb_loss": 0.999999997414826 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 92000000, | |
| "cumulative_training_bytes": 92001245, | |
| "metrics": { | |
| "loss": 0.5203743006578546, | |
| "ce_loss": 0.5103743101945978, | |
| "lb_loss": 0.9999999980091391 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 93000000, | |
| "cumulative_training_bytes": 93002978, | |
| "metrics": { | |
| "loss": 0.5201673795482513, | |
| "ce_loss": 0.5101673890849945, | |
| "lb_loss": 0.999999998036281 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 94000000, | |
| "cumulative_training_bytes": 94003884, | |
| "metrics": { | |
| "loss": 0.519991870091662, | |
| "ce_loss": 0.5099918796284052, | |
| "lb_loss": 0.9999999981082895 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 95000000, | |
| "cumulative_training_bytes": 95004709, | |
| "metrics": { | |
| "loss": 0.5197606708430037, | |
| "ce_loss": 0.5097606803797469, | |
| "lb_loss": 0.999999998113777 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 96000000, | |
| "cumulative_training_bytes": 96004703, | |
| "metrics": { | |
| "loss": 0.5195778423387121, | |
| "ce_loss": 0.5095778518754552, | |
| "lb_loss": 0.999999998133285 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 97000000, | |
| "cumulative_training_bytes": 97004722, | |
| "metrics": { | |
| "loss": 0.5193511229842457, | |
| "ce_loss": 0.5093511325209888, | |
| "lb_loss": 0.9999999983041589 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 98000000, | |
| "cumulative_training_bytes": 98005936, | |
| "metrics": { | |
| "loss": 0.5191288888253838, | |
| "ce_loss": 0.5091288983621269, | |
| "lb_loss": 0.999999998542314 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 99000000, | |
| "cumulative_training_bytes": 99005325, | |
| "metrics": { | |
| "loss": 0.5189959339079174, | |
| "ce_loss": 0.5089959434446606, | |
| "lb_loss": 0.9999999980415248 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 100000000, | |
| "cumulative_training_bytes": 100005645, | |
| "metrics": { | |
| "loss": 0.5188124741702432, | |
| "ce_loss": 0.5088124837069864, | |
| "lb_loss": 0.9999999982224267 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 101000000, | |
| "cumulative_training_bytes": 101005293, | |
| "metrics": { | |
| "loss": 0.5186718025099933, | |
| "ce_loss": 0.5086718120467365, | |
| "lb_loss": 0.9999999980307893 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 102000000, | |
| "cumulative_training_bytes": 102004390, | |
| "metrics": { | |
| "loss": 0.5184943529218581, | |
| "ce_loss": 0.5084943624586012, | |
| "lb_loss": 0.9999999979678915 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 103000000, | |
| "cumulative_training_bytes": 103003700, | |
| "metrics": { | |
| "loss": 0.5183400036658219, | |
| "ce_loss": 0.508340013202565, | |
| "lb_loss": 0.9999999980888741 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 104000000, | |
| "cumulative_training_bytes": 104004107, | |
| "metrics": { | |
| "loss": 0.5181776973495664, | |
| "ce_loss": 0.5081777068863096, | |
| "lb_loss": 0.9999999981152251 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 105000000, | |
| "cumulative_training_bytes": 105003741, | |
| "metrics": { | |
| "loss": 0.5180538606981859, | |
| "ce_loss": 0.5080538702349291, | |
| "lb_loss": 0.9999999981928379 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 106000000, | |
| "cumulative_training_bytes": 106003973, | |
| "metrics": { | |
| "loss": 0.5178760367922292, | |
| "ce_loss": 0.5078760463289723, | |
| "lb_loss": 0.9999999980915022 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 107000000, | |
| "cumulative_training_bytes": 107004552, | |
| "metrics": { | |
| "loss": 0.5179294557944712, | |
| "ce_loss": 0.5079294653312144, | |
| "lb_loss": 0.999999998235192 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 108000000, | |
| "cumulative_training_bytes": 108004388, | |
| "metrics": { | |
| "loss": 0.518020289999616, | |
| "ce_loss": 0.5080202995363592, | |
| "lb_loss": 0.9999999982255328 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 109000000, | |
| "cumulative_training_bytes": 109004031, | |
| "metrics": { | |
| "loss": 0.5179760755052137, | |
| "ce_loss": 0.5079760850419569, | |
| "lb_loss": 0.9999999984334434 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 110000000, | |
| "cumulative_training_bytes": 110003930, | |
| "metrics": { | |
| "loss": 0.5178546550738967, | |
| "ce_loss": 0.5078546646106399, | |
| "lb_loss": 0.9999999985202239 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 111000000, | |
| "cumulative_training_bytes": 111002483, | |
| "metrics": { | |
| "loss": 0.5177637650725144, | |
| "ce_loss": 0.5077637746092576, | |
| "lb_loss": 0.9999999984951037 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 112000000, | |
| "cumulative_training_bytes": 112000941, | |
| "metrics": { | |
| "loss": 0.5176624922952955, | |
| "ce_loss": 0.5076625018320386, | |
| "lb_loss": 0.999999998462492 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 113000000, | |
| "cumulative_training_bytes": 113001291, | |
| "metrics": { | |
| "loss": 0.5176278486404593, | |
| "ce_loss": 0.5076278581772025, | |
| "lb_loss": 0.999999998840642 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 114000000, | |
| "cumulative_training_bytes": 114005949, | |
| "metrics": { | |
| "loss": 0.5175975976260605, | |
| "ce_loss": 0.5075976071628037, | |
| "lb_loss": 0.999999998751648 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 115000000, | |
| "cumulative_training_bytes": 115005446, | |
| "metrics": { | |
| "loss": 0.5174949178265088, | |
| "ce_loss": 0.507494927363252, | |
| "lb_loss": 0.9999999990487323 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 116000000, | |
| "cumulative_training_bytes": 116005050, | |
| "metrics": { | |
| "loss": 0.5173872300465902, | |
| "ce_loss": 0.5073872395833333, | |
| "lb_loss": 0.9999999989589056 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 117000000, | |
| "cumulative_training_bytes": 117004462, | |
| "metrics": { | |
| "loss": 0.5172662063160408, | |
| "ce_loss": 0.507266215852784, | |
| "lb_loss": 0.9999999990906581 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 118000000, | |
| "cumulative_training_bytes": 118004814, | |
| "metrics": { | |
| "loss": 0.5171274763493247, | |
| "ce_loss": 0.5071274858860678, | |
| "lb_loss": 0.9999999993155884 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 119000000, | |
| "cumulative_training_bytes": 119005101, | |
| "metrics": { | |
| "loss": 0.5169919194004368, | |
| "ce_loss": 0.5069919289371799, | |
| "lb_loss": 0.9999999993077018 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 120000000, | |
| "cumulative_training_bytes": 120004824, | |
| "metrics": { | |
| "loss": 0.5170007587179046, | |
| "ce_loss": 0.5070007682546478, | |
| "lb_loss": 0.9999999993511726 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 121000000, | |
| "cumulative_training_bytes": 121005427, | |
| "metrics": { | |
| "loss": 0.51693738748243, | |
| "ce_loss": 0.5069373970191732, | |
| "lb_loss": 0.9999999995357337 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 122000000, | |
| "cumulative_training_bytes": 122001139, | |
| "metrics": { | |
| "loss": 0.5168287009648427, | |
| "ce_loss": 0.5068287105015858, | |
| "lb_loss": 0.999999999698931 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 123000000, | |
| "cumulative_training_bytes": 123001524, | |
| "metrics": { | |
| "loss": 0.5166915019957379, | |
| "ce_loss": 0.506691511532481, | |
| "lb_loss": 0.9999999996018119 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 124000000, | |
| "cumulative_training_bytes": 124002811, | |
| "metrics": { | |
| "loss": 0.5165513776652679, | |
| "ce_loss": 0.5065513872020111, | |
| "lb_loss": 0.9999999995420725 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 125000000, | |
| "cumulative_training_bytes": 125003361, | |
| "metrics": { | |
| "loss": 0.516395933506494, | |
| "ce_loss": 0.5063959430432372, | |
| "lb_loss": 0.9999999994845719 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 126000000, | |
| "cumulative_training_bytes": 126002775, | |
| "metrics": { | |
| "loss": 0.5162708629058512, | |
| "ce_loss": 0.5062708724425944, | |
| "lb_loss": 0.9999999993059422 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 127000000, | |
| "cumulative_training_bytes": 127003533, | |
| "metrics": { | |
| "loss": 0.5161498922852273, | |
| "ce_loss": 0.5061499018219705, | |
| "lb_loss": 0.9999999994267559 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 128000000, | |
| "cumulative_training_bytes": 128003350, | |
| "metrics": { | |
| "loss": 0.5160164026995326, | |
| "ce_loss": 0.5060164122362758, | |
| "lb_loss": 0.9999999997184623 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 129000000, | |
| "cumulative_training_bytes": 129003039, | |
| "metrics": { | |
| "loss": 0.5158909909402577, | |
| "ce_loss": 0.5058910004770009, | |
| "lb_loss": 0.999999999569686 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 130000000, | |
| "cumulative_training_bytes": 130003634, | |
| "metrics": { | |
| "loss": 0.515758194974154, | |
| "ce_loss": 0.5057582045108971, | |
| "lb_loss": 0.9999999994441331 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 131000000, | |
| "cumulative_training_bytes": 131003813, | |
| "metrics": { | |
| "loss": 0.5156362957751962, | |
| "ce_loss": 0.5056363053119394, | |
| "lb_loss": 0.9999999995544799 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 132000000, | |
| "cumulative_training_bytes": 132004994, | |
| "metrics": { | |
| "loss": 0.5154975693003636, | |
| "ce_loss": 0.5054975788371068, | |
| "lb_loss": 0.9999999996085944 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 133000000, | |
| "cumulative_training_bytes": 133003835, | |
| "metrics": { | |
| "loss": 0.5153652466819225, | |
| "ce_loss": 0.5053652562186657, | |
| "lb_loss": 0.9999999995804918 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 134000000, | |
| "cumulative_training_bytes": 134003355, | |
| "metrics": { | |
| "loss": 0.5152516195188492, | |
| "ce_loss": 0.5052516290555924, | |
| "lb_loss": 0.9999999994910902 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 135000000, | |
| "cumulative_training_bytes": 135002497, | |
| "metrics": { | |
| "loss": 0.515110941093509, | |
| "ce_loss": 0.5051109506302521, | |
| "lb_loss": 0.9999999995269473 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 136000000, | |
| "cumulative_training_bytes": 136002211, | |
| "metrics": { | |
| "loss": 0.5149785831073418, | |
| "ce_loss": 0.504978592644085, | |
| "lb_loss": 0.9999999994904649 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 137000000, | |
| "cumulative_training_bytes": 137002818, | |
| "metrics": { | |
| "loss": 0.5148538101591293, | |
| "ce_loss": 0.5048538196958725, | |
| "lb_loss": 0.9999999994011481 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 138000000, | |
| "cumulative_training_bytes": 138002176, | |
| "metrics": { | |
| "loss": 0.514731752242253, | |
| "ce_loss": 0.5047317617789961, | |
| "lb_loss": 0.9999999992188747 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 139000000, | |
| "cumulative_training_bytes": 139002858, | |
| "metrics": { | |
| "loss": 0.5145917039710735, | |
| "ce_loss": 0.5045917135078166, | |
| "lb_loss": 0.9999999994450999 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 140000000, | |
| "cumulative_training_bytes": 140002848, | |
| "metrics": { | |
| "loss": 0.5144674152793538, | |
| "ce_loss": 0.504467424816097, | |
| "lb_loss": 0.999999999458374 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 141000000, | |
| "cumulative_training_bytes": 141001755, | |
| "metrics": { | |
| "loss": 0.5143557057694663, | |
| "ce_loss": 0.5043557153062095, | |
| "lb_loss": 0.9999999994610975 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 142000000, | |
| "cumulative_training_bytes": 142001377, | |
| "metrics": { | |
| "loss": 0.5142396167139925, | |
| "ce_loss": 0.5042396262507357, | |
| "lb_loss": 0.9999999993935792 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 143000000, | |
| "cumulative_training_bytes": 143001209, | |
| "metrics": { | |
| "loss": 0.5141645089407066, | |
| "ce_loss": 0.5041645184774498, | |
| "lb_loss": 0.9999999995503215 | |
| } | |
| }, | |
| { | |
| "epoch": 2, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.5141700236596057, | |
| "ce_loss": 0.5041700331963489, | |
| "lb_loss": 0.9999999995568793, | |
| "training_bytes": 71629742 | |
| }, | |
| "cumulative_training_bytes": 143259470, | |
| "training_bytes_this_epoch": 71629742 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 144000000, | |
| "cumulative_training_bytes": 144004798, | |
| "metrics": { | |
| "loss": 0.5033345638759552, | |
| "ce_loss": 0.49333457341269843, | |
| "lb_loss": 1.000000001892211 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 145000000, | |
| "cumulative_training_bytes": 145004453, | |
| "metrics": { | |
| "loss": 0.5023530095310534, | |
| "ce_loss": 0.4923530190677966, | |
| "lb_loss": 1.0000000127291275 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 146000000, | |
| "cumulative_training_bytes": 146003901, | |
| "metrics": { | |
| "loss": 0.5018044422412741, | |
| "ce_loss": 0.49180445177801724, | |
| "lb_loss": 1.0000000069367474 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 147000000, | |
| "cumulative_training_bytes": 147004659, | |
| "metrics": { | |
| "loss": 0.5011353330777908, | |
| "ce_loss": 0.49113534261453395, | |
| "lb_loss": 1.0000000053672429 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 148000000, | |
| "cumulative_training_bytes": 148005351, | |
| "metrics": { | |
| "loss": 0.5010307144345786, | |
| "ce_loss": 0.4910307239713217, | |
| "lb_loss": 1.0000000031957603 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 149000000, | |
| "cumulative_training_bytes": 149005165, | |
| "metrics": { | |
| "loss": 0.5008639515858109, | |
| "ce_loss": 0.4908639611225541, | |
| "lb_loss": 1.0000000036217034 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 150000000, | |
| "cumulative_training_bytes": 150005636, | |
| "metrics": { | |
| "loss": 0.5008031702878183, | |
| "ce_loss": 0.4908031798245614, | |
| "lb_loss": 1.0000000014639738 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 151000000, | |
| "cumulative_training_bytes": 151000050, | |
| "metrics": { | |
| "loss": 0.5007569906543884, | |
| "ce_loss": 0.4907570001911315, | |
| "lb_loss": 1.000000002096188 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 152000000, | |
| "cumulative_training_bytes": 152000047, | |
| "metrics": { | |
| "loss": 0.500738184437529, | |
| "ce_loss": 0.49073819397427215, | |
| "lb_loss": 1.0000000011703012 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 153000000, | |
| "cumulative_training_bytes": 153000782, | |
| "metrics": { | |
| "loss": 0.5005915286619202, | |
| "ce_loss": 0.4905915381986634, | |
| "lb_loss": 1.0000000017019552 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 154000000, | |
| "cumulative_training_bytes": 154000682, | |
| "metrics": { | |
| "loss": 0.50057656587648, | |
| "ce_loss": 0.49057657541322314, | |
| "lb_loss": 1.0000000016420012 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 155000000, | |
| "cumulative_training_bytes": 155000553, | |
| "metrics": { | |
| "loss": 0.5005808876406762, | |
| "ce_loss": 0.4905808971774194, | |
| "lb_loss": 1.000000001982816 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 156000000, | |
| "cumulative_training_bytes": 156000853, | |
| "metrics": { | |
| "loss": 0.5005047004609345, | |
| "ce_loss": 0.49050470999767765, | |
| "lb_loss": 1.000000002159388 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 157000000, | |
| "cumulative_training_bytes": 157005344, | |
| "metrics": { | |
| "loss": 0.5004092470818535, | |
| "ce_loss": 0.49040925661859663, | |
| "lb_loss": 1.0000000026171647 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 158000000, | |
| "cumulative_training_bytes": 158005059, | |
| "metrics": { | |
| "loss": 0.5003464428609294, | |
| "ce_loss": 0.49034645239767255, | |
| "lb_loss": 1.0000000022244109 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 159000000, | |
| "cumulative_training_bytes": 159004522, | |
| "metrics": { | |
| "loss": 0.5003165714008743, | |
| "ce_loss": 0.49031658093761743, | |
| "lb_loss": 1.0000000018591453 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 160000000, | |
| "cumulative_training_bytes": 160005391, | |
| "metrics": { | |
| "loss": 0.5002716335727975, | |
| "ce_loss": 0.49027164310954063, | |
| "lb_loss": 1.0000000016217518 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 161000000, | |
| "cumulative_training_bytes": 161005358, | |
| "metrics": { | |
| "loss": 0.5002122226656577, | |
| "ce_loss": 0.4902122322024008, | |
| "lb_loss": 1.0000000014111137 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 162000000, | |
| "cumulative_training_bytes": 162005279, | |
| "metrics": { | |
| "loss": 0.5003447219578907, | |
| "ce_loss": 0.4903447314946338, | |
| "lb_loss": 1.0000000011100612 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 163000000, | |
| "cumulative_training_bytes": 163004202, | |
| "metrics": { | |
| "loss": 0.5003701535963104, | |
| "ce_loss": 0.49037016313305365, | |
| "lb_loss": 1.0000000013574926 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 164000000, | |
| "cumulative_training_bytes": 164003475, | |
| "metrics": { | |
| "loss": 0.5003257267011347, | |
| "ce_loss": 0.4903257362378779, | |
| "lb_loss": 1.0000000012580559 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 165000000, | |
| "cumulative_training_bytes": 165003971, | |
| "metrics": { | |
| "loss": 0.5002354283884269, | |
| "ce_loss": 0.49023543792517005, | |
| "lb_loss": 1.0000000012650783 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 166000000, | |
| "cumulative_training_bytes": 166003094, | |
| "metrics": { | |
| "loss": 0.5001840638568572, | |
| "ce_loss": 0.49018407339360043, | |
| "lb_loss": 1.0000000017211539 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 167000000, | |
| "cumulative_training_bytes": 167003137, | |
| "metrics": { | |
| "loss": 0.5001199910862322, | |
| "ce_loss": 0.49012000062297534, | |
| "lb_loss": 1.0000000016635235 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 168000000, | |
| "cumulative_training_bytes": 168003662, | |
| "metrics": { | |
| "loss": 0.50006763528033, | |
| "ce_loss": 0.49006764481707316, | |
| "lb_loss": 1.0000000019241098 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 169000000, | |
| "cumulative_training_bytes": 169003714, | |
| "metrics": { | |
| "loss": 0.500026528902696, | |
| "ce_loss": 0.4900265384394392, | |
| "lb_loss": 1.0000000021370545 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 170000000, | |
| "cumulative_training_bytes": 170005211, | |
| "metrics": { | |
| "loss": 0.4999621378636993, | |
| "ce_loss": 0.48996214740044247, | |
| "lb_loss": 1.0000000022153939 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 171000000, | |
| "cumulative_training_bytes": 171005841, | |
| "metrics": { | |
| "loss": 0.49993529449929863, | |
| "ce_loss": 0.4899353040360418, | |
| "lb_loss": 1.0000000024406253 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 172000000, | |
| "cumulative_training_bytes": 172005501, | |
| "metrics": { | |
| "loss": 0.49990348405629925, | |
| "ce_loss": 0.4899034935930424, | |
| "lb_loss": 1.0000000021226025 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 173000000, | |
| "cumulative_training_bytes": 173005085, | |
| "metrics": { | |
| "loss": 0.49985011230530974, | |
| "ce_loss": 0.4898501218420529, | |
| "lb_loss": 1.000000002193527 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 174000000, | |
| "cumulative_training_bytes": 174000244, | |
| "metrics": { | |
| "loss": 0.4997937374555571, | |
| "ce_loss": 0.48979374699230027, | |
| "lb_loss": 1.0000000023864806 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 175000000, | |
| "cumulative_training_bytes": 175005818, | |
| "metrics": { | |
| "loss": 0.49977711942411424, | |
| "ce_loss": 0.4897771289608574, | |
| "lb_loss": 1.000000002455289 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 176000000, | |
| "cumulative_training_bytes": 176000495, | |
| "metrics": { | |
| "loss": 0.4997098139089463, | |
| "ce_loss": 0.4897098234456895, | |
| "lb_loss": 1.0000000023053306 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 177000000, | |
| "cumulative_training_bytes": 177000876, | |
| "metrics": { | |
| "loss": 0.49965548414968264, | |
| "ce_loss": 0.4896554936864258, | |
| "lb_loss": 1.00000000232063 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 178000000, | |
| "cumulative_training_bytes": 178000666, | |
| "metrics": { | |
| "loss": 0.49961792182077686, | |
| "ce_loss": 0.48961793135752, | |
| "lb_loss": 1.0000000020812387 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 179000000, | |
| "cumulative_training_bytes": 179000024, | |
| "metrics": { | |
| "loss": 0.4995814913945482, | |
| "ce_loss": 0.4895815009312914, | |
| "lb_loss": 1.00000000217103 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 180000000, | |
| "cumulative_training_bytes": 180005268, | |
| "metrics": { | |
| "loss": 0.49955155799546297, | |
| "ce_loss": 0.48955156753220613, | |
| "lb_loss": 1.0000000022171776 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 181000000, | |
| "cumulative_training_bytes": 181005588, | |
| "metrics": { | |
| "loss": 0.49950381894342616, | |
| "ce_loss": 0.4895038284801693, | |
| "lb_loss": 1.0000000026256317 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 182000000, | |
| "cumulative_training_bytes": 182000786, | |
| "metrics": { | |
| "loss": 0.4994432111177551, | |
| "ce_loss": 0.48944322065449825, | |
| "lb_loss": 1.0000000024125906 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 183000000, | |
| "cumulative_training_bytes": 183000940, | |
| "metrics": { | |
| "loss": 0.49941193581763443, | |
| "ce_loss": 0.4894119453543776, | |
| "lb_loss": 1.0000000023341307 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 184000000, | |
| "cumulative_training_bytes": 184001325, | |
| "metrics": { | |
| "loss": 0.49934361564481095, | |
| "ce_loss": 0.4893436251815541, | |
| "lb_loss": 1.00000000238938 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 185000000, | |
| "cumulative_training_bytes": 185001001, | |
| "metrics": { | |
| "loss": 0.4993054266519441, | |
| "ce_loss": 0.48930543618868727, | |
| "lb_loss": 1.0000000024081832 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 186000000, | |
| "cumulative_training_bytes": 186000105, | |
| "metrics": { | |
| "loss": 0.49926686148291627, | |
| "ce_loss": 0.48926687101965943, | |
| "lb_loss": 1.0000000024673665 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 187000000, | |
| "cumulative_training_bytes": 187005589, | |
| "metrics": { | |
| "loss": 0.4993321712423722, | |
| "ce_loss": 0.4893321807791154, | |
| "lb_loss": 1.0000000023138824 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 188000000, | |
| "cumulative_training_bytes": 188000312, | |
| "metrics": { | |
| "loss": 0.49934911756284683, | |
| "ce_loss": 0.48934912709959, | |
| "lb_loss": 1.000000002364951 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 189000000, | |
| "cumulative_training_bytes": 189005692, | |
| "metrics": { | |
| "loss": 0.49930618266025595, | |
| "ce_loss": 0.4893061921969991, | |
| "lb_loss": 1.0000000023283666 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 190000000, | |
| "cumulative_training_bytes": 190000372, | |
| "metrics": { | |
| "loss": 0.4992524868235556, | |
| "ce_loss": 0.48925249636029877, | |
| "lb_loss": 1.0000000023543043 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 191000000, | |
| "cumulative_training_bytes": 191000957, | |
| "metrics": { | |
| "loss": 0.4992190696960283, | |
| "ce_loss": 0.48921907923277147, | |
| "lb_loss": 1.0000000023862543 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 192000000, | |
| "cumulative_training_bytes": 192000891, | |
| "metrics": { | |
| "loss": 0.49916283798055683, | |
| "ce_loss": 0.4891628475173, | |
| "lb_loss": 1.0000000025326727 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 193000000, | |
| "cumulative_training_bytes": 193001130, | |
| "metrics": { | |
| "loss": 0.4991221111954719, | |
| "ce_loss": 0.48912212073221506, | |
| "lb_loss": 1.0000000026519316 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 194000000, | |
| "cumulative_training_bytes": 194000556, | |
| "metrics": { | |
| "loss": 0.49906408485101195, | |
| "ce_loss": 0.4890640943877551, | |
| "lb_loss": 1.000000002599666 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 195000000, | |
| "cumulative_training_bytes": 195001954, | |
| "metrics": { | |
| "loss": 0.49900763141705373, | |
| "ce_loss": 0.4890076409537969, | |
| "lb_loss": 1.000000002699387 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 196000000, | |
| "cumulative_training_bytes": 196000689, | |
| "metrics": { | |
| "loss": 0.4989594545185693, | |
| "ce_loss": 0.4889594640553125, | |
| "lb_loss": 1.000000002741827 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 197000000, | |
| "cumulative_training_bytes": 197001752, | |
| "metrics": { | |
| "loss": 0.49890855444971355, | |
| "ce_loss": 0.4889085639864567, | |
| "lb_loss": 1.0000000027695617 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 198000000, | |
| "cumulative_training_bytes": 198002474, | |
| "metrics": { | |
| "loss": 0.49885149131451617, | |
| "ce_loss": 0.48885150085125934, | |
| "lb_loss": 1.000000002751182 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 199000000, | |
| "cumulative_training_bytes": 199002221, | |
| "metrics": { | |
| "loss": 0.49881285537833114, | |
| "ce_loss": 0.4888128649150743, | |
| "lb_loss": 1.0000000026512044 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 200000000, | |
| "cumulative_training_bytes": 200000993, | |
| "metrics": { | |
| "loss": 0.49877435920608715, | |
| "ce_loss": 0.4887743687428303, | |
| "lb_loss": 1.0000000026977178 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 201000000, | |
| "cumulative_training_bytes": 201000542, | |
| "metrics": { | |
| "loss": 0.4987275889721726, | |
| "ce_loss": 0.4887275985089158, | |
| "lb_loss": 1.0000000027365117 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 202000000, | |
| "cumulative_training_bytes": 202000266, | |
| "metrics": { | |
| "loss": 0.49867375425141036, | |
| "ce_loss": 0.4886737637881535, | |
| "lb_loss": 1.0000000025758429 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 203000000, | |
| "cumulative_training_bytes": 203004847, | |
| "metrics": { | |
| "loss": 0.4986276846422209, | |
| "ce_loss": 0.48862769417896407, | |
| "lb_loss": 1.0000000024321198 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 204000000, | |
| "cumulative_training_bytes": 204000174, | |
| "metrics": { | |
| "loss": 0.4986164967954536, | |
| "ce_loss": 0.4886165063321968, | |
| "lb_loss": 1.0000000022994096 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 205000000, | |
| "cumulative_training_bytes": 205000569, | |
| "metrics": { | |
| "loss": 0.4985904758954976, | |
| "ce_loss": 0.48859048543224076, | |
| "lb_loss": 1.000000002336429 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 206000000, | |
| "cumulative_training_bytes": 206000959, | |
| "metrics": { | |
| "loss": 0.4985317591843735, | |
| "ce_loss": 0.48853176872111664, | |
| "lb_loss": 1.0000000022767028 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 207000000, | |
| "cumulative_training_bytes": 207001002, | |
| "metrics": { | |
| "loss": 0.4984893900640738, | |
| "ce_loss": 0.48848939960081694, | |
| "lb_loss": 1.0000000022465174 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 208000000, | |
| "cumulative_training_bytes": 208000719, | |
| "metrics": { | |
| "loss": 0.49843654790773173, | |
| "ce_loss": 0.4884365574444749, | |
| "lb_loss": 1.000000002075621 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 209000000, | |
| "cumulative_training_bytes": 209005520, | |
| "metrics": { | |
| "loss": 0.49840692531385533, | |
| "ce_loss": 0.4884069348505985, | |
| "lb_loss": 1.0000000021457887 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 210000000, | |
| "cumulative_training_bytes": 210005103, | |
| "metrics": { | |
| "loss": 0.49835361690385965, | |
| "ce_loss": 0.4883536264406028, | |
| "lb_loss": 1.0000000021611968 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 211000000, | |
| "cumulative_training_bytes": 211004535, | |
| "metrics": { | |
| "loss": 0.49829659387840225, | |
| "ce_loss": 0.4882966034151454, | |
| "lb_loss": 1.0000000022542415 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 212000000, | |
| "cumulative_training_bytes": 212003237, | |
| "metrics": { | |
| "loss": 0.49826476548907883, | |
| "ce_loss": 0.488264775025822, | |
| "lb_loss": 1.0000000022368416 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 213000000, | |
| "cumulative_training_bytes": 213003872, | |
| "metrics": { | |
| "loss": 0.49821131437943567, | |
| "ce_loss": 0.48821132391617883, | |
| "lb_loss": 1.0000000022199405 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 214000000, | |
| "cumulative_training_bytes": 214003977, | |
| "metrics": { | |
| "loss": 0.4981701560704833, | |
| "ce_loss": 0.4881701656072265, | |
| "lb_loss": 1.000000002153664 | |
| } | |
| }, | |
| { | |
| "epoch": 3, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.4981207118514115, | |
| "ce_loss": 0.48812072138815465, | |
| "lb_loss": 1.000000002058049, | |
| "training_bytes": 71629748 | |
| }, | |
| "cumulative_training_bytes": 214889218, | |
| "training_bytes_this_epoch": 71629748 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 215000000, | |
| "cumulative_training_bytes": 215001580, | |
| "metrics": { | |
| "loss": 0.4880016352000989, | |
| "ce_loss": 0.4780016447368421, | |
| "lb_loss": 1.0000000313708657 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 216000000, | |
| "cumulative_training_bytes": 216001732, | |
| "metrics": { | |
| "loss": 0.4874559412611292, | |
| "ce_loss": 0.47745595079787234, | |
| "lb_loss": 1.0000000231443567 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 217000000, | |
| "cumulative_training_bytes": 217001957, | |
| "metrics": { | |
| "loss": 0.48738313121955934, | |
| "ce_loss": 0.4773831407563025, | |
| "lb_loss": 1.0000000128559037 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 218000000, | |
| "cumulative_training_bytes": 218001275, | |
| "metrics": { | |
| "loss": 0.4878064008720021, | |
| "ce_loss": 0.4778064104087453, | |
| "lb_loss": 1.0000000087253949 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 219000000, | |
| "cumulative_training_bytes": 219001184, | |
| "metrics": { | |
| "loss": 0.48801539064311295, | |
| "ce_loss": 0.4780154001798561, | |
| "lb_loss": 1.0000000052314868 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 220000000, | |
| "cumulative_training_bytes": 220001433, | |
| "metrics": { | |
| "loss": 0.4881019327375624, | |
| "ce_loss": 0.4781019422743056, | |
| "lb_loss": 1.000000007312607 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 221000000, | |
| "cumulative_training_bytes": 221002027, | |
| "metrics": { | |
| "loss": 0.4882320058311174, | |
| "ce_loss": 0.4782320153678606, | |
| "lb_loss": 1.00000000657786 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 222000000, | |
| "cumulative_training_bytes": 222000843, | |
| "metrics": { | |
| "loss": 0.4884359955589307, | |
| "ce_loss": 0.47843600509567386, | |
| "lb_loss": 1.0000000053059044 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 223000000, | |
| "cumulative_training_bytes": 223001364, | |
| "metrics": { | |
| "loss": 0.4885213138585887, | |
| "ce_loss": 0.4785213233953319, | |
| "lb_loss": 1.0000000031736973 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 224000000, | |
| "cumulative_training_bytes": 224001492, | |
| "metrics": { | |
| "loss": 0.48859297950546465, | |
| "ce_loss": 0.4785929890422078, | |
| "lb_loss": 1.0000000030189367 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 225000000, | |
| "cumulative_training_bytes": 225001545, | |
| "metrics": { | |
| "loss": 0.4886264715925722, | |
| "ce_loss": 0.4786264811293154, | |
| "lb_loss": 1.0000000019182302 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 226000000, | |
| "cumulative_training_bytes": 226001650, | |
| "metrics": { | |
| "loss": 0.4886456157960577, | |
| "ce_loss": 0.47864562533280086, | |
| "lb_loss": 1.0000000023486388 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 227000000, | |
| "cumulative_training_bytes": 227001584, | |
| "metrics": { | |
| "loss": 0.4887551046547566, | |
| "ce_loss": 0.47875511419149974, | |
| "lb_loss": 1.0000000021256175 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 228000000, | |
| "cumulative_training_bytes": 228001190, | |
| "metrics": { | |
| "loss": 0.4888778602173182, | |
| "ce_loss": 0.4788778697540614, | |
| "lb_loss": 1.000000001533152 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 229000000, | |
| "cumulative_training_bytes": 229001032, | |
| "metrics": { | |
| "loss": 0.4889627459664015, | |
| "ce_loss": 0.47896275550314465, | |
| "lb_loss": 1.0000000022742233 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 230000000, | |
| "cumulative_training_bytes": 230000968, | |
| "metrics": { | |
| "loss": 0.48901880898322553, | |
| "ce_loss": 0.4790188185199687, | |
| "lb_loss": 1.000000001867021 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 231000000, | |
| "cumulative_training_bytes": 231000807, | |
| "metrics": { | |
| "loss": 0.48906002305598545, | |
| "ce_loss": 0.4790600325927286, | |
| "lb_loss": 1.0000000024953837 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 232000000, | |
| "cumulative_training_bytes": 232001935, | |
| "metrics": { | |
| "loss": 0.48905724971291104, | |
| "ce_loss": 0.4790572592496542, | |
| "lb_loss": 1.000000002040408 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 233000000, | |
| "cumulative_training_bytes": 233002684, | |
| "metrics": { | |
| "loss": 0.4890720104322212, | |
| "ce_loss": 0.47907201996896437, | |
| "lb_loss": 1.0000000021419506 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 234000000, | |
| "cumulative_training_bytes": 234002126, | |
| "metrics": { | |
| "loss": 0.4890822031180556, | |
| "ce_loss": 0.4790822126547988, | |
| "lb_loss": 1.0000000019191588 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 235000000, | |
| "cumulative_training_bytes": 235001865, | |
| "metrics": { | |
| "loss": 0.48909425532056783, | |
| "ce_loss": 0.479094264857311, | |
| "lb_loss": 1.0000000020517044 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 236000000, | |
| "cumulative_training_bytes": 236002486, | |
| "metrics": { | |
| "loss": 0.4891166612171806, | |
| "ce_loss": 0.4791166707539238, | |
| "lb_loss": 1.0000000017707658 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 237000000, | |
| "cumulative_training_bytes": 237003585, | |
| "metrics": { | |
| "loss": 0.4891103856666285, | |
| "ce_loss": 0.4791103952033717, | |
| "lb_loss": 1.0000000018661341 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 238000000, | |
| "cumulative_training_bytes": 238002820, | |
| "metrics": { | |
| "loss": 0.48916915728993376, | |
| "ce_loss": 0.47916916682667693, | |
| "lb_loss": 1.0000000021668867 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 239000000, | |
| "cumulative_training_bytes": 239002121, | |
| "metrics": { | |
| "loss": 0.48922257481908504, | |
| "ce_loss": 0.4792225843558282, | |
| "lb_loss": 1.0000000026328433 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 240000000, | |
| "cumulative_training_bytes": 240002063, | |
| "metrics": { | |
| "loss": 0.48926897682635767, | |
| "ce_loss": 0.47926898636310084, | |
| "lb_loss": 1.000000002373512 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 241000000, | |
| "cumulative_training_bytes": 241003283, | |
| "metrics": { | |
| "loss": 0.48924765007689835, | |
| "ce_loss": 0.4792476596136415, | |
| "lb_loss": 1.0000000022420963 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 242000000, | |
| "cumulative_training_bytes": 242002834, | |
| "metrics": { | |
| "loss": 0.4892743589158976, | |
| "ce_loss": 0.47927436845264076, | |
| "lb_loss": 1.000000002783805 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 243000000, | |
| "cumulative_training_bytes": 243002432, | |
| "metrics": { | |
| "loss": 0.48929176785222755, | |
| "ce_loss": 0.4792917773889707, | |
| "lb_loss": 1.0000000026471438 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 244000000, | |
| "cumulative_training_bytes": 244001977, | |
| "metrics": { | |
| "loss": 0.48928654407098043, | |
| "ce_loss": 0.4792865536077236, | |
| "lb_loss": 1.0000000030771503 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 245000000, | |
| "cumulative_training_bytes": 245003684, | |
| "metrics": { | |
| "loss": 0.4892567206656542, | |
| "ce_loss": 0.47925673020239734, | |
| "lb_loss": 1.0000000031506484 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 246000000, | |
| "cumulative_training_bytes": 246003783, | |
| "metrics": { | |
| "loss": 0.4892511018649305, | |
| "ce_loss": 0.47925111140167365, | |
| "lb_loss": 1.0000000031400698 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 247000000, | |
| "cumulative_training_bytes": 247004190, | |
| "metrics": { | |
| "loss": 0.48922172035546246, | |
| "ce_loss": 0.47922172989220563, | |
| "lb_loss": 1.0000000033827585 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 248000000, | |
| "cumulative_training_bytes": 248001380, | |
| "metrics": { | |
| "loss": 0.489257635432878, | |
| "ce_loss": 0.47925764496962114, | |
| "lb_loss": 1.0000000028438956 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 249000000, | |
| "cumulative_training_bytes": 249000805, | |
| "metrics": { | |
| "loss": 0.48925180637826116, | |
| "ce_loss": 0.4792518159150043, | |
| "lb_loss": 1.0000000031430722 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 250000000, | |
| "cumulative_training_bytes": 250001009, | |
| "metrics": { | |
| "loss": 0.4892397265181945, | |
| "ce_loss": 0.47923973605493764, | |
| "lb_loss": 1.000000003174093 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 251000000, | |
| "cumulative_training_bytes": 251000542, | |
| "metrics": { | |
| "loss": 0.4892279956451346, | |
| "ce_loss": 0.47922800518187775, | |
| "lb_loss": 1.0000000031740963 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 252000000, | |
| "cumulative_training_bytes": 252000674, | |
| "metrics": { | |
| "loss": 0.4892209452025744, | |
| "ce_loss": 0.4792209547393176, | |
| "lb_loss": 1.0000000032216159 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 253000000, | |
| "cumulative_training_bytes": 253005686, | |
| "metrics": { | |
| "loss": 0.4892111244278641, | |
| "ce_loss": 0.47921113396460724, | |
| "lb_loss": 1.0000000030718321 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 254000000, | |
| "cumulative_training_bytes": 254000399, | |
| "metrics": { | |
| "loss": 0.489192856386101, | |
| "ce_loss": 0.47919286592284416, | |
| "lb_loss": 1.0000000032642786 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 255000000, | |
| "cumulative_training_bytes": 255000527, | |
| "metrics": { | |
| "loss": 0.4891710748783623, | |
| "ce_loss": 0.4791710844151055, | |
| "lb_loss": 1.000000002980672 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 256000000, | |
| "cumulative_training_bytes": 256001300, | |
| "metrics": { | |
| "loss": 0.48916581381170243, | |
| "ce_loss": 0.4791658233484456, | |
| "lb_loss": 1.0000000028566993 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 257000000, | |
| "cumulative_training_bytes": 257002023, | |
| "metrics": { | |
| "loss": 0.4891462577633833, | |
| "ce_loss": 0.47914626730012644, | |
| "lb_loss": 1.0000000031154879 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 258000000, | |
| "cumulative_training_bytes": 258002271, | |
| "metrics": { | |
| "loss": 0.4892640542499711, | |
| "ce_loss": 0.47926406378671427, | |
| "lb_loss": 1.000000003092308 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 259000000, | |
| "cumulative_training_bytes": 259002781, | |
| "metrics": { | |
| "loss": 0.4893791298831093, | |
| "ce_loss": 0.47937913941985244, | |
| "lb_loss": 1.000000003134141 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 260000000, | |
| "cumulative_training_bytes": 260002747, | |
| "metrics": { | |
| "loss": 0.4893935482249305, | |
| "ce_loss": 0.47939355776167364, | |
| "lb_loss": 1.0000000030177587 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 261000000, | |
| "cumulative_training_bytes": 261003435, | |
| "metrics": { | |
| "loss": 0.48941761685873997, | |
| "ce_loss": 0.47941762639548313, | |
| "lb_loss": 1.0000000031435274 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 262000000, | |
| "cumulative_training_bytes": 262003338, | |
| "metrics": { | |
| "loss": 0.4894158878508479, | |
| "ce_loss": 0.47941589738759105, | |
| "lb_loss": 1.000000002949539 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 263000000, | |
| "cumulative_training_bytes": 263003784, | |
| "metrics": { | |
| "loss": 0.4895384178553365, | |
| "ce_loss": 0.4795384273920797, | |
| "lb_loss": 1.0000000027929368 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 264000000, | |
| "cumulative_training_bytes": 264003117, | |
| "metrics": { | |
| "loss": 0.4896001881289195, | |
| "ce_loss": 0.47960019766566264, | |
| "lb_loss": 1.0000000026283493 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 265000000, | |
| "cumulative_training_bytes": 265003414, | |
| "metrics": { | |
| "loss": 0.4895905376057766, | |
| "ce_loss": 0.47959054714251975, | |
| "lb_loss": 1.0000000025266345 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 266000000, | |
| "cumulative_training_bytes": 266003442, | |
| "metrics": { | |
| "loss": 0.48961065880372917, | |
| "ce_loss": 0.47961066834047233, | |
| "lb_loss": 1.000000002311595 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 267000000, | |
| "cumulative_training_bytes": 267003434, | |
| "metrics": { | |
| "loss": 0.4895792225371753, | |
| "ce_loss": 0.47957923207391845, | |
| "lb_loss": 1.000000002152183 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 268000000, | |
| "cumulative_training_bytes": 268003818, | |
| "metrics": { | |
| "loss": 0.48956441921771315, | |
| "ce_loss": 0.4795644287544563, | |
| "lb_loss": 1.0000000020120552 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 269000000, | |
| "cumulative_training_bytes": 269002978, | |
| "metrics": { | |
| "loss": 0.48955592950098237, | |
| "ce_loss": 0.47955593903772553, | |
| "lb_loss": 1.000000002202993 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 270000000, | |
| "cumulative_training_bytes": 270002483, | |
| "metrics": { | |
| "loss": 0.48952992356128133, | |
| "ce_loss": 0.4795299330980245, | |
| "lb_loss": 1.0000000023806022 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 271000000, | |
| "cumulative_training_bytes": 271002540, | |
| "metrics": { | |
| "loss": 0.4895038147409116, | |
| "ce_loss": 0.47950382427765476, | |
| "lb_loss": 1.0000000024261724 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 272000000, | |
| "cumulative_training_bytes": 272002993, | |
| "metrics": { | |
| "loss": 0.48948651203909604, | |
| "ce_loss": 0.4794865215758392, | |
| "lb_loss": 1.0000000023651656 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 273000000, | |
| "cumulative_training_bytes": 273003373, | |
| "metrics": { | |
| "loss": 0.48946483302002297, | |
| "ce_loss": 0.47946484255676614, | |
| "lb_loss": 1.000000002154531 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 274000000, | |
| "cumulative_training_bytes": 274002590, | |
| "metrics": { | |
| "loss": 0.4894640248101037, | |
| "ce_loss": 0.47946403434684687, | |
| "lb_loss": 1.0000000023090088 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 275000000, | |
| "cumulative_training_bytes": 275002224, | |
| "metrics": { | |
| "loss": 0.48944997785128713, | |
| "ce_loss": 0.4794499873880303, | |
| "lb_loss": 1.0000000022060582 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 276000000, | |
| "cumulative_training_bytes": 276003651, | |
| "metrics": { | |
| "loss": 0.4894235311415101, | |
| "ce_loss": 0.47942354067825327, | |
| "lb_loss": 1.0000000022276716 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 277000000, | |
| "cumulative_training_bytes": 277002727, | |
| "metrics": { | |
| "loss": 0.48940091304828115, | |
| "ce_loss": 0.4794009225850243, | |
| "lb_loss": 1.000000002265624 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 278000000, | |
| "cumulative_training_bytes": 278002905, | |
| "metrics": { | |
| "loss": 0.48939109742697334, | |
| "ce_loss": 0.4793911069637165, | |
| "lb_loss": 1.0000000021459012 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 279000000, | |
| "cumulative_training_bytes": 279002296, | |
| "metrics": { | |
| "loss": 0.48937239470875754, | |
| "ce_loss": 0.4793724042455007, | |
| "lb_loss": 1.0000000020409159 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 280000000, | |
| "cumulative_training_bytes": 280002829, | |
| "metrics": { | |
| "loss": 0.48934805423552147, | |
| "ce_loss": 0.47934806377226463, | |
| "lb_loss": 1.000000002193737 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 281000000, | |
| "cumulative_training_bytes": 281001549, | |
| "metrics": { | |
| "loss": 0.4893414064325578, | |
| "ce_loss": 0.47934141596930097, | |
| "lb_loss": 1.0000000022459103 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 282000000, | |
| "cumulative_training_bytes": 282001567, | |
| "metrics": { | |
| "loss": 0.489328068624075, | |
| "ce_loss": 0.4793280781608182, | |
| "lb_loss": 1.0000000024594404 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 283000000, | |
| "cumulative_training_bytes": 283001333, | |
| "metrics": { | |
| "loss": 0.48931800692577093, | |
| "ce_loss": 0.4793180164625141, | |
| "lb_loss": 1.000000002594208 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 284000000, | |
| "cumulative_training_bytes": 284000025, | |
| "metrics": { | |
| "loss": 0.48929820322010614, | |
| "ce_loss": 0.4792982127568493, | |
| "lb_loss": 1.0000000026281157 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 285000000, | |
| "cumulative_training_bytes": 285005735, | |
| "metrics": { | |
| "loss": 0.4892746146721176, | |
| "ce_loss": 0.47927462420886074, | |
| "lb_loss": 1.0000000025853828 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 286000000, | |
| "cumulative_training_bytes": 286000791, | |
| "metrics": { | |
| "loss": 0.48924303995152446, | |
| "ce_loss": 0.4792430494882676, | |
| "lb_loss": 1.0000000023756552 | |
| } | |
| }, | |
| { | |
| "epoch": 4, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.4892372682077637, | |
| "ce_loss": 0.47923727774450686, | |
| "lb_loss": 1.0000000023534628, | |
| "training_bytes": 71629711 | |
| }, | |
| "cumulative_training_bytes": 286518929, | |
| "training_bytes_this_epoch": 71629711 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 287000000, | |
| "cumulative_training_bytes": 287004047, | |
| "metrics": { | |
| "loss": 0.4806078410730129, | |
| "ce_loss": 0.4706078506097561, | |
| "lb_loss": 1.0000000225334633 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 288000000, | |
| "cumulative_training_bytes": 288004715, | |
| "metrics": { | |
| "loss": 0.4802595771166433, | |
| "ce_loss": 0.47025958665338646, | |
| "lb_loss": 1.0000000116359664 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 289000000, | |
| "cumulative_training_bytes": 289004562, | |
| "metrics": { | |
| "loss": 0.48041944730849495, | |
| "ce_loss": 0.4704194568452381, | |
| "lb_loss": 1.0000000120628447 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 290000000, | |
| "cumulative_training_bytes": 290004822, | |
| "metrics": { | |
| "loss": 0.48065669143948775, | |
| "ce_loss": 0.4706567009762309, | |
| "lb_loss": 1.000000007690922 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 291000000, | |
| "cumulative_training_bytes": 291005873, | |
| "metrics": { | |
| "loss": 0.4806928087350246, | |
| "ce_loss": 0.4706928182717678, | |
| "lb_loss": 1.0000000058189231 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 292000000, | |
| "cumulative_training_bytes": 292005364, | |
| "metrics": { | |
| "loss": 0.48091802242118564, | |
| "ce_loss": 0.4709180319579288, | |
| "lb_loss": 1.0000000037936074 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 293000000, | |
| "cumulative_training_bytes": 293004963, | |
| "metrics": { | |
| "loss": 0.4810613071831473, | |
| "ce_loss": 0.4710613167198905, | |
| "lb_loss": 1.000000003317412 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 294000000, | |
| "cumulative_training_bytes": 294005549, | |
| "metrics": { | |
| "loss": 0.48113079579922524, | |
| "ce_loss": 0.4711308053359684, | |
| "lb_loss": 1.0000000031569258 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 295000000, | |
| "cumulative_training_bytes": 295000539, | |
| "metrics": { | |
| "loss": 0.48119787078426174, | |
| "ce_loss": 0.4711978803210049, | |
| "lb_loss": 1.0000000028700073 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 296000000, | |
| "cumulative_training_bytes": 296005546, | |
| "metrics": { | |
| "loss": 0.48143782187311335, | |
| "ce_loss": 0.4714378314098565, | |
| "lb_loss": 1.0000000027143725 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 297000000, | |
| "cumulative_training_bytes": 297000097, | |
| "metrics": { | |
| "loss": 0.4815578144327656, | |
| "ce_loss": 0.47155782396950874, | |
| "lb_loss": 1.000000002692474 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 298000000, | |
| "cumulative_training_bytes": 298005559, | |
| "metrics": { | |
| "loss": 0.48169829352868704, | |
| "ce_loss": 0.4716983030654302, | |
| "lb_loss": 1.0000000024873654 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 299000000, | |
| "cumulative_training_bytes": 299000334, | |
| "metrics": { | |
| "loss": 0.48176441524988556, | |
| "ce_loss": 0.4717644247866287, | |
| "lb_loss": 1.0000000026848939 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 300000000, | |
| "cumulative_training_bytes": 300001103, | |
| "metrics": { | |
| "loss": 0.48185715683726915, | |
| "ce_loss": 0.4718571663740123, | |
| "lb_loss": 1.0000000018315738 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 301000000, | |
| "cumulative_training_bytes": 301002320, | |
| "metrics": { | |
| "loss": 0.48188760156869204, | |
| "ce_loss": 0.4718876111054352, | |
| "lb_loss": 1.0000000009743302 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 302000000, | |
| "cumulative_training_bytes": 302002159, | |
| "metrics": { | |
| "loss": 0.4819753344999541, | |
| "ce_loss": 0.4719753440366973, | |
| "lb_loss": 1.0000000013442945 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 303000000, | |
| "cumulative_training_bytes": 303002056, | |
| "metrics": { | |
| "loss": 0.4820222638025746, | |
| "ce_loss": 0.4720222733393178, | |
| "lb_loss": 1.0000000014981418 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 304000000, | |
| "cumulative_training_bytes": 304001329, | |
| "metrics": { | |
| "loss": 0.4820512609862765, | |
| "ce_loss": 0.47205127052301965, | |
| "lb_loss": 1.0000000014931427 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 305000000, | |
| "cumulative_training_bytes": 305002202, | |
| "metrics": { | |
| "loss": 0.4821052628375764, | |
| "ce_loss": 0.47210527237431954, | |
| "lb_loss": 1.0000000020230844 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 306000000, | |
| "cumulative_training_bytes": 306001200, | |
| "metrics": { | |
| "loss": 0.48219109769290447, | |
| "ce_loss": 0.47219110722964763, | |
| "lb_loss": 1.0000000016476376 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 307000000, | |
| "cumulative_training_bytes": 307001550, | |
| "metrics": { | |
| "loss": 0.48222227538813406, | |
| "ce_loss": 0.4722222849248772, | |
| "lb_loss": 1.0000000016360708 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 308000000, | |
| "cumulative_training_bytes": 308001611, | |
| "metrics": { | |
| "loss": 0.4822451695266177, | |
| "ce_loss": 0.4722451790633609, | |
| "lb_loss": 1.0000000020853417 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 309000000, | |
| "cumulative_training_bytes": 309000993, | |
| "metrics": { | |
| "loss": 0.48228710549352793, | |
| "ce_loss": 0.4722871150302711, | |
| "lb_loss": 1.000000001819989 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 310000000, | |
| "cumulative_training_bytes": 310002159, | |
| "metrics": { | |
| "loss": 0.48228461511673465, | |
| "ce_loss": 0.4722846246534778, | |
| "lb_loss": 1.0000000019978372 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 311000000, | |
| "cumulative_training_bytes": 311002120, | |
| "metrics": { | |
| "loss": 0.48233567679997424, | |
| "ce_loss": 0.4723356863367174, | |
| "lb_loss": 1.0000000019162238 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 312000000, | |
| "cumulative_training_bytes": 312002300, | |
| "metrics": { | |
| "loss": 0.48235052605313145, | |
| "ce_loss": 0.4723505355898746, | |
| "lb_loss": 1.0000000015641721 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 313000000, | |
| "cumulative_training_bytes": 313000722, | |
| "metrics": { | |
| "loss": 0.48237080051912273, | |
| "ce_loss": 0.4723708100558659, | |
| "lb_loss": 1.0000000015850175 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 314000000, | |
| "cumulative_training_bytes": 314001059, | |
| "metrics": { | |
| "loss": 0.48239464620184425, | |
| "ce_loss": 0.4723946557385874, | |
| "lb_loss": 1.000000001732693 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 315000000, | |
| "cumulative_training_bytes": 315000576, | |
| "metrics": { | |
| "loss": 0.48241925209841163, | |
| "ce_loss": 0.4724192616351548, | |
| "lb_loss": 1.0000000016842368 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 316000000, | |
| "cumulative_training_bytes": 316000807, | |
| "metrics": { | |
| "loss": 0.4824112177565126, | |
| "ce_loss": 0.47241122729325574, | |
| "lb_loss": 1.000000001806564 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 317000000, | |
| "cumulative_training_bytes": 317000909, | |
| "metrics": { | |
| "loss": 0.4824116732675667, | |
| "ce_loss": 0.47241168280430984, | |
| "lb_loss": 1.0000000018282924 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 318000000, | |
| "cumulative_training_bytes": 318000322, | |
| "metrics": { | |
| "loss": 0.48244807845667786, | |
| "ce_loss": 0.47244808799342103, | |
| "lb_loss": 1.0000000013444656 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 319000000, | |
| "cumulative_training_bytes": 319001281, | |
| "metrics": { | |
| "loss": 0.4824737020910579, | |
| "ce_loss": 0.47247371162780105, | |
| "lb_loss": 1.0000000015745443 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 320000000, | |
| "cumulative_training_bytes": 320000836, | |
| "metrics": { | |
| "loss": 0.48250469895123843, | |
| "ce_loss": 0.4725047084879816, | |
| "lb_loss": 1.000000001948897 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 321000000, | |
| "cumulative_training_bytes": 321001661, | |
| "metrics": { | |
| "loss": 0.4825023903903205, | |
| "ce_loss": 0.47250239992706367, | |
| "lb_loss": 1.0000000019230604 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 322000000, | |
| "cumulative_training_bytes": 322001730, | |
| "metrics": { | |
| "loss": 0.4825008634410754, | |
| "ce_loss": 0.47250087297781856, | |
| "lb_loss": 1.0000000020378506 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 323000000, | |
| "cumulative_training_bytes": 323000981, | |
| "metrics": { | |
| "loss": 0.482539654899591, | |
| "ce_loss": 0.47253966443633416, | |
| "lb_loss": 1.0000000021560156 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 324000000, | |
| "cumulative_training_bytes": 324002180, | |
| "metrics": { | |
| "loss": 0.4825421488544788, | |
| "ce_loss": 0.472542158391222, | |
| "lb_loss": 1.0000000021737723 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 325000000, | |
| "cumulative_training_bytes": 325001770, | |
| "metrics": { | |
| "loss": 0.48258986484815614, | |
| "ce_loss": 0.4725898743848993, | |
| "lb_loss": 1.000000002218103 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 326000000, | |
| "cumulative_training_bytes": 326001016, | |
| "metrics": { | |
| "loss": 0.4826266742724595, | |
| "ce_loss": 0.47262668380920264, | |
| "lb_loss": 1.0000000019296467 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 327000000, | |
| "cumulative_training_bytes": 327000727, | |
| "metrics": { | |
| "loss": 0.4826471043720421, | |
| "ce_loss": 0.4726471139087853, | |
| "lb_loss": 1.000000001925541 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 328000000, | |
| "cumulative_training_bytes": 328001475, | |
| "metrics": { | |
| "loss": 0.4826450956701042, | |
| "ce_loss": 0.4726451052068474, | |
| "lb_loss": 1.0000000018791193 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 329000000, | |
| "cumulative_training_bytes": 329000597, | |
| "metrics": { | |
| "loss": 0.4826633140459285, | |
| "ce_loss": 0.47266332358267166, | |
| "lb_loss": 1.0000000018099753 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 330000000, | |
| "cumulative_training_bytes": 330000395, | |
| "metrics": { | |
| "loss": 0.48268707368318065, | |
| "ce_loss": 0.4726870832199238, | |
| "lb_loss": 1.0000000017034534 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 331000000, | |
| "cumulative_training_bytes": 331000366, | |
| "metrics": { | |
| "loss": 0.48268638047922063, | |
| "ce_loss": 0.4726863900159638, | |
| "lb_loss": 1.0000000016889437 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 332000000, | |
| "cumulative_training_bytes": 332000600, | |
| "metrics": { | |
| "loss": 0.4826902918228717, | |
| "ce_loss": 0.4726903013596149, | |
| "lb_loss": 1.0000000017681316 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 333000000, | |
| "cumulative_training_bytes": 333000847, | |
| "metrics": { | |
| "loss": 0.4826878186777699, | |
| "ce_loss": 0.47268782821451305, | |
| "lb_loss": 1.0000000019273814 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 334000000, | |
| "cumulative_training_bytes": 334000366, | |
| "metrics": { | |
| "loss": 0.4826988372821751, | |
| "ce_loss": 0.4726988468189183, | |
| "lb_loss": 1.0000000019016437 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 335000000, | |
| "cumulative_training_bytes": 335005476, | |
| "metrics": { | |
| "loss": 0.4827082030273281, | |
| "ce_loss": 0.47270821256407125, | |
| "lb_loss": 1.0000000017312551 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 336000000, | |
| "cumulative_training_bytes": 336005120, | |
| "metrics": { | |
| "loss": 0.4827010808315457, | |
| "ce_loss": 0.4727010903682889, | |
| "lb_loss": 1.0000000015109631 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 337000000, | |
| "cumulative_training_bytes": 337004532, | |
| "metrics": { | |
| "loss": 0.48272331332132057, | |
| "ce_loss": 0.47272332285806373, | |
| "lb_loss": 1.0000000015718524 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 338000000, | |
| "cumulative_training_bytes": 338005247, | |
| "metrics": { | |
| "loss": 0.4827238063321225, | |
| "ce_loss": 0.47272381586886564, | |
| "lb_loss": 1.0000000015961248 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 339000000, | |
| "cumulative_training_bytes": 339000096, | |
| "metrics": { | |
| "loss": 0.4827229669121237, | |
| "ce_loss": 0.47272297644886685, | |
| "lb_loss": 1.0000000014314792 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 340000000, | |
| "cumulative_training_bytes": 340005646, | |
| "metrics": { | |
| "loss": 0.48274137501907055, | |
| "ce_loss": 0.4727413845558137, | |
| "lb_loss": 1.0000000013122385 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 341000000, | |
| "cumulative_training_bytes": 341005395, | |
| "metrics": { | |
| "loss": 0.4827476606277877, | |
| "ce_loss": 0.47274767016453084, | |
| "lb_loss": 1.00000000132052 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 342000000, | |
| "cumulative_training_bytes": 342000175, | |
| "metrics": { | |
| "loss": 0.482743314508692, | |
| "ce_loss": 0.4727433240454352, | |
| "lb_loss": 1.00000000118243 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 343000000, | |
| "cumulative_training_bytes": 343000194, | |
| "metrics": { | |
| "loss": 0.48274443290694463, | |
| "ce_loss": 0.4727444424436878, | |
| "lb_loss": 1.0000000013987889 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 344000000, | |
| "cumulative_training_bytes": 344000175, | |
| "metrics": { | |
| "loss": 0.48273867625180944, | |
| "ce_loss": 0.4727386857885526, | |
| "lb_loss": 1.0000000012946861 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 345000000, | |
| "cumulative_training_bytes": 345000308, | |
| "metrics": { | |
| "loss": 0.4827433929599684, | |
| "ce_loss": 0.47274340249671154, | |
| "lb_loss": 1.0000000012122365 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 346000000, | |
| "cumulative_training_bytes": 346005463, | |
| "metrics": { | |
| "loss": 0.4828590717076615, | |
| "ce_loss": 0.4728590812444047, | |
| "lb_loss": 1.0000000011858081 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 347000000, | |
| "cumulative_training_bytes": 347004643, | |
| "metrics": { | |
| "loss": 0.48290444134126503, | |
| "ce_loss": 0.4729044508780082, | |
| "lb_loss": 1.000000001125386 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 348000000, | |
| "cumulative_training_bytes": 348004692, | |
| "metrics": { | |
| "loss": 0.48292070460289693, | |
| "ce_loss": 0.4729207141396401, | |
| "lb_loss": 1.0000000011644445 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 349000000, | |
| "cumulative_training_bytes": 349004119, | |
| "metrics": { | |
| "loss": 0.48292387037566215, | |
| "ce_loss": 0.4729238799124053, | |
| "lb_loss": 1.0000000013490067 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 350000000, | |
| "cumulative_training_bytes": 350003796, | |
| "metrics": { | |
| "loss": 0.48291546779106, | |
| "ce_loss": 0.47291547732780315, | |
| "lb_loss": 1.0000000011722043 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 351000000, | |
| "cumulative_training_bytes": 351004186, | |
| "metrics": { | |
| "loss": 0.48290248690755855, | |
| "ce_loss": 0.4729024964443017, | |
| "lb_loss": 1.0000000011923116 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 352000000, | |
| "cumulative_training_bytes": 352003413, | |
| "metrics": { | |
| "loss": 0.48291019789300293, | |
| "ce_loss": 0.4729102074297461, | |
| "lb_loss": 1.000000001454166 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 353000000, | |
| "cumulative_training_bytes": 353003199, | |
| "metrics": { | |
| "loss": 0.4829023801036983, | |
| "ce_loss": 0.47290238964044146, | |
| "lb_loss": 1.000000001453513 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 354000000, | |
| "cumulative_training_bytes": 354003719, | |
| "metrics": { | |
| "loss": 0.48289616401652297, | |
| "ce_loss": 0.47289617355326613, | |
| "lb_loss": 1.0000000015103676 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 355000000, | |
| "cumulative_training_bytes": 355004300, | |
| "metrics": { | |
| "loss": 0.48289350448174656, | |
| "ce_loss": 0.4728935140184897, | |
| "lb_loss": 1.0000000015655617 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 356000000, | |
| "cumulative_training_bytes": 356004592, | |
| "metrics": { | |
| "loss": 0.48288726240185853, | |
| "ce_loss": 0.4728872719386017, | |
| "lb_loss": 1.0000000014871975 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 357000000, | |
| "cumulative_training_bytes": 357003308, | |
| "metrics": { | |
| "loss": 0.4828915270964838, | |
| "ce_loss": 0.47289153663322697, | |
| "lb_loss": 1.0000000015261432 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 358000000, | |
| "cumulative_training_bytes": 358002795, | |
| "metrics": { | |
| "loss": 0.4828872656784708, | |
| "ce_loss": 0.47288727521521395, | |
| "lb_loss": 1.000000001662674 | |
| } | |
| }, | |
| { | |
| "epoch": 5, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.48288743393137185, | |
| "ce_loss": 0.472887443468115, | |
| "lb_loss": 1.0000000016887818, | |
| "training_bytes": 71629698 | |
| }, | |
| "cumulative_training_bytes": 358148627, | |
| "training_bytes_this_epoch": 71629698 | |
| } | |
| ] | |
| } |