| { |
| "run_name": "run_large_20251113_074900", |
| "timestamp": "20251113_074900", |
| "phase": "large", |
| "config": { |
| "arch_layout": [ |
| "m4", |
| [ |
| "T22" |
| ], |
| "m4" |
| ], |
| "d_model": [ |
| 1024, |
| 1536 |
| ], |
| "d_intermediate": [ |
| 0, |
| 4096 |
| ], |
| "vocab_size": 256, |
| "ssm_cfg": { |
| "chunk_size": 256, |
| "d_conv": 4, |
| "d_state": 128, |
| "expand": 2 |
| }, |
| "attn_cfg": { |
| "num_heads": [ |
| 16, |
| 16 |
| ], |
| "rotary_emb_dim": [ |
| 32, |
| 48 |
| ], |
| "window_size": [ |
| 1023, |
| -1 |
| ] |
| }, |
| "tie_embeddings": false |
| }, |
| "training_args": { |
| "data": "datasets/moses/smiles-molecules-moses_all.csv", |
| "max_samples": null, |
| "batch_size": 128, |
| "epochs": 5, |
| "lr": 0.0001, |
| "weight_decay": 0.1, |
| "gradient_accumulation": 8, |
| "concatenate": false, |
| "num_concatenate": 0, |
| "concatenate_separator": " ", |
| "checkpoint_bytes": 1000000, |
| "num_test_samples": 50, |
| "num_visualize": 15, |
| "skip_visualization": false |
| }, |
| "dataset_info": { |
| "train_size": 1936912, |
| "test_size": 50, |
| "test_smiles_file": "checkpoints/run_large_20251113_074900/test_smiles.txt" |
| }, |
| "model_info": { |
| "num_parameters": 622923776, |
| "device": "cuda", |
| "dtype": "torch.bfloat16", |
| "use_amp": true |
| }, |
| "training_history": [ |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 1000000, |
| "cumulative_training_bytes": 1002806, |
| "metrics": { |
| "loss": 2.281135283806474, |
| "ce_loss": 2.271135265700483, |
| "lb_loss": 1.0000000002879452 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 2000000, |
| "cumulative_training_bytes": 2002946, |
| "metrics": { |
| "loss": 1.706012413519058, |
| "ce_loss": 1.6960124092009685, |
| "lb_loss": 0.9999999955260436 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 3000000, |
| "cumulative_training_bytes": 3001506, |
| "metrics": { |
| "loss": 1.4319949007573536, |
| "ce_loss": 1.4219949010500808, |
| "lb_loss": 0.9999999944150737 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 4000000, |
| "cumulative_training_bytes": 4000430, |
| "metrics": { |
| "loss": 1.2650426110354336, |
| "ce_loss": 1.2550426136363637, |
| "lb_loss": 0.9999999934254271 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 5000000, |
| "cumulative_training_bytes": 5003932, |
| "metrics": { |
| "loss": 1.1526008317636889, |
| "ce_loss": 1.142600835755814, |
| "lb_loss": 0.9999999931269838 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 6000000, |
| "cumulative_training_bytes": 6003600, |
| "metrics": { |
| "loss": 1.0721529130981888, |
| "ce_loss": 1.062152918012924, |
| "lb_loss": 0.999999994848387 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 7000000, |
| "cumulative_training_bytes": 7002600, |
| "metrics": { |
| "loss": 1.0112524831724299, |
| "ce_loss": 1.0012524887465375, |
| "lb_loss": 0.9999999941798788 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 8000000, |
| "cumulative_training_bytes": 8002304, |
| "metrics": { |
| "loss": 0.9639488575675271, |
| "ce_loss": 0.9539488636363637, |
| "lb_loss": 0.9999999943646518 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 9000000, |
| "cumulative_training_bytes": 9000790, |
| "metrics": { |
| "loss": 0.9256936054805229, |
| "ce_loss": 0.9156936119342672, |
| "lb_loss": 0.9999999935128565 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 10000000, |
| "cumulative_training_bytes": 10003453, |
| "metrics": { |
| "loss": 0.8938084639106858, |
| "ce_loss": 0.8838084706737761, |
| "lb_loss": 0.9999999943371254 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 11000000, |
| "cumulative_training_bytes": 11002426, |
| "metrics": { |
| "loss": 0.8670801356690946, |
| "ce_loss": 0.8570801426840018, |
| "lb_loss": 0.999999994536022 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 12000000, |
| "cumulative_training_bytes": 12000566, |
| "metrics": { |
| "loss": 0.8441871771186289, |
| "ce_loss": 0.8341871843434343, |
| "lb_loss": 0.9999999944128172 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 13000000, |
| "cumulative_training_bytes": 13004187, |
| "metrics": { |
| "loss": 0.8242754283722971, |
| "ce_loss": 0.8142754357755406, |
| "lb_loss": 0.999999994221772 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 14000000, |
| "cumulative_training_bytes": 14003783, |
| "metrics": { |
| "loss": 0.8069831988157659, |
| "ce_loss": 0.7969832063711911, |
| "lb_loss": 0.9999999938083818 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 15000000, |
| "cumulative_training_bytes": 15002839, |
| "metrics": { |
| "loss": 0.791671675408973, |
| "ce_loss": 0.7816716830963154, |
| "lb_loss": 0.9999999934693036 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 16000000, |
| "cumulative_training_bytes": 16002416, |
| "metrics": { |
| "loss": 0.7779427005305435, |
| "ce_loss": 0.7679427083333333, |
| "lb_loss": 0.9999999930461247 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 17000000, |
| "cumulative_training_bytes": 17002552, |
| "metrics": { |
| "loss": 0.7657100694626995, |
| "ce_loss": 0.7557100773673702, |
| "lb_loss": 0.9999999931316952 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 18000000, |
| "cumulative_training_bytes": 18001467, |
| "metrics": { |
| "loss": 0.7545878573738295, |
| "ce_loss": 0.7445878653690733, |
| "lb_loss": 0.9999999934004555 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 19000000, |
| "cumulative_training_bytes": 19000835, |
| "metrics": { |
| "loss": 0.7444627280135493, |
| "ce_loss": 0.7344627360898418, |
| "lb_loss": 0.9999999935496761 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 20000000, |
| "cumulative_training_bytes": 20000123, |
| "metrics": { |
| "loss": 0.7352657759802418, |
| "ce_loss": 0.725265784129486, |
| "lb_loss": 0.9999999934238328 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 21000000, |
| "cumulative_training_bytes": 21002597, |
| "metrics": { |
| "loss": 0.726655715058511, |
| "ce_loss": 0.7166557232740707, |
| "lb_loss": 0.999999993339033 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 22000000, |
| "cumulative_training_bytes": 22000619, |
| "metrics": { |
| "loss": 0.7188806190663086, |
| "ce_loss": 0.7088806273418559, |
| "lb_loss": 0.9999999929451853 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 23000000, |
| "cumulative_training_bytes": 23000348, |
| "metrics": { |
| "loss": 0.7116252486272977, |
| "ce_loss": 0.7016252569576218, |
| "lb_loss": 0.9999999927991858 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 24000000, |
| "cumulative_training_bytes": 24004833, |
| "metrics": { |
| "loss": 0.7047766645990237, |
| "ce_loss": 0.694776672979798, |
| "lb_loss": 0.9999999928354013 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 25000000, |
| "cumulative_training_bytes": 25004271, |
| "metrics": { |
| "loss": 0.6984826149729632, |
| "ce_loss": 0.6884826233999224, |
| "lb_loss": 0.9999999924742777 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 26000000, |
| "cumulative_training_bytes": 26003036, |
| "metrics": { |
| "loss": 0.6926022446542389, |
| "ce_loss": 0.6826022531238344, |
| "lb_loss": 0.9999999924410372 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 27000000, |
| "cumulative_training_bytes": 27002303, |
| "metrics": { |
| "loss": 0.68704403782713, |
| "ce_loss": 0.6770440463362069, |
| "lb_loss": 0.9999999920034717 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 28000000, |
| "cumulative_training_bytes": 28001005, |
| "metrics": { |
| "loss": 0.6819027289412697, |
| "ce_loss": 0.6719027374870108, |
| "lb_loss": 0.9999999921752128 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 29000000, |
| "cumulative_training_bytes": 29003421, |
| "metrics": { |
| "loss": 0.6770648373905342, |
| "ce_loss": 0.6670648459705735, |
| "lb_loss": 0.9999999924460257 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 30000000, |
| "cumulative_training_bytes": 30003416, |
| "metrics": { |
| "loss": 0.6724512500756774, |
| "ce_loss": 0.6624512586875707, |
| "lb_loss": 0.9999999922158472 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 31000000, |
| "cumulative_training_bytes": 31002007, |
| "metrics": { |
| "loss": 0.6681099358288201, |
| "ce_loss": 0.6581099444705146, |
| "lb_loss": 0.9999999920937371 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 32000000, |
| "cumulative_training_bytes": 32004458, |
| "metrics": { |
| "loss": 0.6640003464438698, |
| "ce_loss": 0.6540003551136364, |
| "lb_loss": 0.9999999920256211 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 33000000, |
| "cumulative_training_bytes": 33002297, |
| "metrics": { |
| "loss": 0.6601016364516562, |
| "ce_loss": 0.6501016451476638, |
| "lb_loss": 0.9999999920743162 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 34000000, |
| "cumulative_training_bytes": 34004030, |
| "metrics": { |
| "loss": 0.6564515032034601, |
| "ce_loss": 0.6464515119242835, |
| "lb_loss": 0.9999999919427915 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 35000000, |
| "cumulative_training_bytes": 35002326, |
| "metrics": { |
| "loss": 0.6529157669000273, |
| "ce_loss": 0.6429157756441335, |
| "lb_loss": 0.999999991933268 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 36000000, |
| "cumulative_training_bytes": 36001349, |
| "metrics": { |
| "loss": 0.6495351869409735, |
| "ce_loss": 0.6395351957070707, |
| "lb_loss": 0.9999999918600525 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 37000000, |
| "cumulative_training_bytes": 37004270, |
| "metrics": { |
| "loss": 0.6462952058520207, |
| "ce_loss": 0.63629521463902, |
| "lb_loss": 0.999999991823105 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 38000000, |
| "cumulative_training_bytes": 38003714, |
| "metrics": { |
| "loss": 0.6431952418950054, |
| "ce_loss": 0.6331952507017096, |
| "lb_loss": 0.9999999918174792 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 39000000, |
| "cumulative_training_bytes": 39003133, |
| "metrics": { |
| "loss": 0.6402474970796226, |
| "ce_loss": 0.6302475059050224, |
| "lb_loss": 0.9999999917750926 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 40000000, |
| "cumulative_training_bytes": 40001052, |
| "metrics": { |
| "loss": 0.6374116862325957, |
| "ce_loss": 0.6274116950757576, |
| "lb_loss": 0.999999991792621 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 41000000, |
| "cumulative_training_bytes": 41003634, |
| "metrics": { |
| "loss": 0.634700220979048, |
| "ce_loss": 0.6247002298391865, |
| "lb_loss": 0.9999999917045446 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 42000000, |
| "cumulative_training_bytes": 42001934, |
| "metrics": { |
| "loss": 0.6320584671857601, |
| "ce_loss": 0.6220584760619877, |
| "lb_loss": 0.9999999916747524 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 43000000, |
| "cumulative_training_bytes": 43004796, |
| "metrics": { |
| "loss": 0.6295534807504097, |
| "ce_loss": 0.6195534896420518, |
| "lb_loss": 0.9999999916808849 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 44000000, |
| "cumulative_training_bytes": 44003115, |
| "metrics": { |
| "loss": 0.6271674778031694, |
| "ce_loss": 0.6171674867094535, |
| "lb_loss": 0.9999999917974657 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 45000000, |
| "cumulative_training_bytes": 45002725, |
| "metrics": { |
| "loss": 0.6248585546974785, |
| "ce_loss": 0.6148585636177548, |
| "lb_loss": 0.9999999917547551 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 46000000, |
| "cumulative_training_bytes": 46002129, |
| "metrics": { |
| "loss": 0.6225959593288815, |
| "ce_loss": 0.6125959682625421, |
| "lb_loss": 0.9999999915694104 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 47000000, |
| "cumulative_training_bytes": 47000416, |
| "metrics": { |
| "loss": 0.6204234810989125, |
| "ce_loss": 0.6104234900453889, |
| "lb_loss": 0.9999999917178196 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 48000000, |
| "cumulative_training_bytes": 48003247, |
| "metrics": { |
| "loss": 0.6183409256689982, |
| "ce_loss": 0.6083409346278154, |
| "lb_loss": 0.9999999918066941 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 49000000, |
| "cumulative_training_bytes": 49003796, |
| "metrics": { |
| "loss": 0.6163010463005029, |
| "ce_loss": 0.6063010552710992, |
| "lb_loss": 0.9999999916847186 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 50000000, |
| "cumulative_training_bytes": 50003148, |
| "metrics": { |
| "loss": 0.614361976497587, |
| "ce_loss": 0.6043619854794919, |
| "lb_loss": 0.9999999918392555 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 51000000, |
| "cumulative_training_bytes": 51000399, |
| "metrics": { |
| "loss": 0.6125098095498663, |
| "ce_loss": 0.6025098185426372, |
| "lb_loss": 0.99999999184608 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 52000000, |
| "cumulative_training_bytes": 52003702, |
| "metrics": { |
| "loss": 0.6106979154326813, |
| "ce_loss": 0.60069792443595, |
| "lb_loss": 0.9999999918200599 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 53000000, |
| "cumulative_training_bytes": 53002141, |
| "metrics": { |
| "loss": 0.6089451033860565, |
| "ce_loss": 0.598945112399378, |
| "lb_loss": 0.9999999917342992 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 54000000, |
| "cumulative_training_bytes": 54001738, |
| "metrics": { |
| "loss": 0.6072190762593643, |
| "ce_loss": 0.5972190852823667, |
| "lb_loss": 0.9999999917854974 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 55000000, |
| "cumulative_training_bytes": 55000564, |
| "metrics": { |
| "loss": 0.6055407572868681, |
| "ce_loss": 0.5955407663191996, |
| "lb_loss": 0.9999999920344992 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 56000000, |
| "cumulative_training_bytes": 56004377, |
| "metrics": { |
| "loss": 0.603901492993085, |
| "ce_loss": 0.5939015020344559, |
| "lb_loss": 0.9999999920224413 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 57000000, |
| "cumulative_training_bytes": 57002312, |
| "metrics": { |
| "loss": 0.6023297931422605, |
| "ce_loss": 0.592329802192311, |
| "lb_loss": 0.9999999919442221 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 58000000, |
| "cumulative_training_bytes": 58000186, |
| "metrics": { |
| "loss": 0.6008043852197599, |
| "ce_loss": 0.5908043942781911, |
| "lb_loss": 0.9999999920480638 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 59000000, |
| "cumulative_training_bytes": 59003287, |
| "metrics": { |
| "loss": 0.5993142948056599, |
| "ce_loss": 0.5893143038722268, |
| "lb_loss": 0.999999991938435 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 60000000, |
| "cumulative_training_bytes": 60002539, |
| "metrics": { |
| "loss": 0.5978776750521423, |
| "ce_loss": 0.5878776841265352, |
| "lb_loss": 0.9999999918992395 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 61000000, |
| "cumulative_training_bytes": 61001369, |
| "metrics": { |
| "loss": 0.5964548781080705, |
| "ce_loss": 0.5864548871900334, |
| "lb_loss": 0.9999999918518527 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 62000000, |
| "cumulative_training_bytes": 62000171, |
| "metrics": { |
| "loss": 0.595078989532466, |
| "ce_loss": 0.5850789986217547, |
| "lb_loss": 0.9999999919225172 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 63000000, |
| "cumulative_training_bytes": 63004361, |
| "metrics": { |
| "loss": 0.5937321655380584, |
| "ce_loss": 0.5837321746344748, |
| "lb_loss": 0.9999999919915575 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 64000000, |
| "cumulative_training_bytes": 64002292, |
| "metrics": { |
| "loss": 0.5924299737105143, |
| "ce_loss": 0.582429982813802, |
| "lb_loss": 0.999999992053316 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 65000000, |
| "cumulative_training_bytes": 65000997, |
| "metrics": { |
| "loss": 0.59116503060811, |
| "ce_loss": 0.5811650397180578, |
| "lb_loss": 0.9999999920153695 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 66000000, |
| "cumulative_training_bytes": 66004775, |
| "metrics": { |
| "loss": 0.589915283734672, |
| "ce_loss": 0.5799152928511091, |
| "lb_loss": 0.9999999919879169 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 67000000, |
| "cumulative_training_bytes": 67002689, |
| "metrics": { |
| "loss": 0.5887356604232457, |
| "ce_loss": 0.5787356695459479, |
| "lb_loss": 0.9999999919995213 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 68000000, |
| "cumulative_training_bytes": 68000588, |
| "metrics": { |
| "loss": 0.5875624195750525, |
| "ce_loss": 0.5775624287038358, |
| "lb_loss": 0.9999999919300427 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 69000000, |
| "cumulative_training_bytes": 69004376, |
| "metrics": { |
| "loss": 0.5864199688653537, |
| "ce_loss": 0.5764199780000703, |
| "lb_loss": 0.9999999920055317 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 70000000, |
| "cumulative_training_bytes": 70002900, |
| "metrics": { |
| "loss": 0.5852980388978469, |
| "ce_loss": 0.5752980480382991, |
| "lb_loss": 0.9999999920370275 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 71000000, |
| "cumulative_training_bytes": 71001777, |
| "metrics": { |
| "loss": 0.5841887338123211, |
| "ce_loss": 0.5741887429583475, |
| "lb_loss": 0.9999999920513574 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 72000000, |
| "cumulative_training_bytes": 72001154, |
| "metrics": { |
| "loss": 0.5831138857462038, |
| "ce_loss": 0.57311389489765, |
| "lb_loss": 0.9999999919047493 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 73000000, |
| "cumulative_training_bytes": 73000028, |
| "metrics": { |
| "loss": 0.5820674106895334, |
| "ce_loss": 0.5720674198462509, |
| "lb_loss": 0.9999999919165382 |
| } |
| }, |
| { |
| "epoch": 1, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.5816875383369134, |
| "ce_loss": 0.5716875474955395, |
| "lb_loss": 0.999999991905931, |
| "training_bytes": 73364866 |
| }, |
| "cumulative_training_bytes": 73364866, |
| "training_bytes_this_epoch": 73364866 |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 74000000, |
| "cumulative_training_bytes": 74000512, |
| "metrics": { |
| "loss": 0.5005325524861576, |
| "ce_loss": 0.49053256202290074, |
| "lb_loss": 0.9999999849850895 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 75000000, |
| "cumulative_training_bytes": 75004045, |
| "metrics": { |
| "loss": 0.5011473647236119, |
| "ce_loss": 0.49114737426035504, |
| "lb_loss": 0.9999999903010194 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 76000000, |
| "cumulative_training_bytes": 76003847, |
| "metrics": { |
| "loss": 0.5011714346268598, |
| "ce_loss": 0.4911714441636029, |
| "lb_loss": 0.9999999864136472 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 77000000, |
| "cumulative_training_bytes": 77002822, |
| "metrics": { |
| "loss": 0.5010182196299235, |
| "ce_loss": 0.49101822916666665, |
| "lb_loss": 0.9999999865690867 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 78000000, |
| "cumulative_training_bytes": 78000023, |
| "metrics": { |
| "loss": 0.5009371635804116, |
| "ce_loss": 0.4909371731171548, |
| "lb_loss": 0.9999999881538886 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 79000000, |
| "cumulative_training_bytes": 79002990, |
| "metrics": { |
| "loss": 0.5009800124967908, |
| "ce_loss": 0.49098002203353397, |
| "lb_loss": 0.9999999891860877 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 80000000, |
| "cumulative_training_bytes": 80001116, |
| "metrics": { |
| "loss": 0.5009120384910143, |
| "ce_loss": 0.4909120480277575, |
| "lb_loss": 0.9999999892459114 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 81000000, |
| "cumulative_training_bytes": 81003932, |
| "metrics": { |
| "loss": 0.5009258906853381, |
| "ce_loss": 0.4909259002220812, |
| "lb_loss": 0.999999988805219 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 82000000, |
| "cumulative_training_bytes": 82002652, |
| "metrics": { |
| "loss": 0.5008152612264443, |
| "ce_loss": 0.49081527076318743, |
| "lb_loss": 0.9999999887279656 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 83000000, |
| "cumulative_training_bytes": 83002121, |
| "metrics": { |
| "loss": 0.5007128221408222, |
| "ce_loss": 0.4907128316775654, |
| "lb_loss": 0.9999999895062245 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 84000000, |
| "cumulative_training_bytes": 84000595, |
| "metrics": { |
| "loss": 0.5006207174459368, |
| "ce_loss": 0.49062072698268006, |
| "lb_loss": 0.9999999896764973 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 85000000, |
| "cumulative_training_bytes": 85000337, |
| "metrics": { |
| "loss": 0.5004435125986735, |
| "ce_loss": 0.49044352213541664, |
| "lb_loss": 0.9999999905377627 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 86000000, |
| "cumulative_training_bytes": 86003615, |
| "metrics": { |
| "loss": 0.500366971188228, |
| "ce_loss": 0.49036698072497126, |
| "lb_loss": 0.9999999906946335 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 87000000, |
| "cumulative_training_bytes": 87002716, |
| "metrics": { |
| "loss": 0.5002281165741704, |
| "ce_loss": 0.4902281261109136, |
| "lb_loss": 0.999999990316629 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 88000000, |
| "cumulative_training_bytes": 88001724, |
| "metrics": { |
| "loss": 0.5001107990919418, |
| "ce_loss": 0.490110808628685, |
| "lb_loss": 0.9999999901481558 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 89000000, |
| "cumulative_training_bytes": 89000615, |
| "metrics": { |
| "loss": 0.5000072579051174, |
| "ce_loss": 0.49000726744186046, |
| "lb_loss": 0.9999999906111133 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 90000000, |
| "cumulative_training_bytes": 90004256, |
| "metrics": { |
| "loss": 0.4999731520672778, |
| "ce_loss": 0.489973161604021, |
| "lb_loss": 0.9999999905695448 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 91000000, |
| "cumulative_training_bytes": 91004345, |
| "metrics": { |
| "loss": 0.4998607054852469, |
| "ce_loss": 0.4898607150219901, |
| "lb_loss": 0.99999999072671 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 92000000, |
| "cumulative_training_bytes": 92001910, |
| "metrics": { |
| "loss": 0.499761327124287, |
| "ce_loss": 0.48976133666103017, |
| "lb_loss": 0.999999990898042 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 93000000, |
| "cumulative_training_bytes": 93001551, |
| "metrics": { |
| "loss": 0.49959730913609635, |
| "ce_loss": 0.4895973186728395, |
| "lb_loss": 0.9999999908900555 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 94000000, |
| "cumulative_training_bytes": 94001433, |
| "metrics": { |
| "loss": 0.4994959777459166, |
| "ce_loss": 0.48949598728265975, |
| "lb_loss": 0.9999999912889829 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 95000000, |
| "cumulative_training_bytes": 95001556, |
| "metrics": { |
| "loss": 0.4993510381996979, |
| "ce_loss": 0.48935104773644106, |
| "lb_loss": 0.9999999910098777 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 96000000, |
| "cumulative_training_bytes": 96000636, |
| "metrics": { |
| "loss": 0.4992381378229398, |
| "ce_loss": 0.48923814735968296, |
| "lb_loss": 0.9999999908575566 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 97000000, |
| "cumulative_training_bytes": 97003789, |
| "metrics": { |
| "loss": 0.49909694559146195, |
| "ce_loss": 0.4890969551282051, |
| "lb_loss": 0.9999999909156408 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 98000000, |
| "cumulative_training_bytes": 98003981, |
| "metrics": { |
| "loss": 0.4989720032314127, |
| "ce_loss": 0.4889720127681559, |
| "lb_loss": 0.9999999909202922 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 99000000, |
| "cumulative_training_bytes": 99001943, |
| "metrics": { |
| "loss": 0.49889595580276885, |
| "ce_loss": 0.488895965339512, |
| "lb_loss": 0.9999999909471289 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 100000000, |
| "cumulative_training_bytes": 100000629, |
| "metrics": { |
| "loss": 0.49879610028484794, |
| "ce_loss": 0.4887961098215911, |
| "lb_loss": 0.9999999908091873 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 101000000, |
| "cumulative_training_bytes": 101003165, |
| "metrics": { |
| "loss": 0.4987198369544849, |
| "ce_loss": 0.48871984649122807, |
| "lb_loss": 0.9999999910802172 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 102000000, |
| "cumulative_training_bytes": 102000414, |
| "metrics": { |
| "loss": 0.4985755650907543, |
| "ce_loss": 0.48857557462749746, |
| "lb_loss": 0.9999999911894929 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 103000000, |
| "cumulative_training_bytes": 103004635, |
| "metrics": { |
| "loss": 0.4984361996690478, |
| "ce_loss": 0.48843620920579095, |
| "lb_loss": 0.9999999914585852 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 104000000, |
| "cumulative_training_bytes": 104002500, |
| "metrics": { |
| "loss": 0.49834367617302106, |
| "ce_loss": 0.4883436857097642, |
| "lb_loss": 0.9999999916710078 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 105000000, |
| "cumulative_training_bytes": 105001110, |
| "metrics": { |
| "loss": 0.4982617840456323, |
| "ce_loss": 0.4882617935823755, |
| "lb_loss": 0.9999999917969393 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 106000000, |
| "cumulative_training_bytes": 106004479, |
| "metrics": { |
| "loss": 0.49815445561105837, |
| "ce_loss": 0.48815446514780153, |
| "lb_loss": 0.9999999917215772 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 107000000, |
| "cumulative_training_bytes": 107002481, |
| "metrics": { |
| "loss": 0.49805969111546206, |
| "ce_loss": 0.4880597006522052, |
| "lb_loss": 0.9999999917611914 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 108000000, |
| "cumulative_training_bytes": 108000820, |
| "metrics": { |
| "loss": 0.49798761556824006, |
| "ce_loss": 0.4879876251049832, |
| "lb_loss": 0.999999992090537 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 109000000, |
| "cumulative_training_bytes": 109004357, |
| "metrics": { |
| "loss": 0.4978611764413551, |
| "ce_loss": 0.48786118597809824, |
| "lb_loss": 0.999999992159204 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 110000000, |
| "cumulative_training_bytes": 110001924, |
| "metrics": { |
| "loss": 0.4977405579834368, |
| "ce_loss": 0.48774056752017997, |
| "lb_loss": 0.9999999922230807 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 111000000, |
| "cumulative_training_bytes": 111000809, |
| "metrics": { |
| "loss": 0.49763489521979426, |
| "ce_loss": 0.4876349047565374, |
| "lb_loss": 0.9999999922374989 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 112000000, |
| "cumulative_training_bytes": 112000343, |
| "metrics": { |
| "loss": 0.49752072510687584, |
| "ce_loss": 0.487520734643619, |
| "lb_loss": 0.9999999922511718 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 113000000, |
| "cumulative_training_bytes": 113004390, |
| "metrics": { |
| "loss": 0.49740453195665213, |
| "ce_loss": 0.4874045414933953, |
| "lb_loss": 0.9999999924327763 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 114000000, |
| "cumulative_training_bytes": 114003370, |
| "metrics": { |
| "loss": 0.4973142315453375, |
| "ce_loss": 0.48731424108208066, |
| "lb_loss": 0.9999999925903078 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 115000000, |
| "cumulative_training_bytes": 115002294, |
| "metrics": { |
| "loss": 0.4972011996359397, |
| "ce_loss": 0.48720120917268284, |
| "lb_loss": 0.9999999926569965 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 116000000, |
| "cumulative_training_bytes": 116000382, |
| "metrics": { |
| "loss": 0.4970865782645987, |
| "ce_loss": 0.48708658780134184, |
| "lb_loss": 0.9999999925036688 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 117000000, |
| "cumulative_training_bytes": 117000510, |
| "metrics": { |
| "loss": 0.49695767275492353, |
| "ce_loss": 0.4869576822916667, |
| "lb_loss": 0.9999999926090241 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 118000000, |
| "cumulative_training_bytes": 118003216, |
| "metrics": { |
| "loss": 0.4968769081617471, |
| "ce_loss": 0.4868769176984903, |
| "lb_loss": 0.9999999924773979 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 119000000, |
| "cumulative_training_bytes": 119001261, |
| "metrics": { |
| "loss": 0.49676820662707283, |
| "ce_loss": 0.486768216163816, |
| "lb_loss": 0.9999999922367688 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 120000000, |
| "cumulative_training_bytes": 120000540, |
| "metrics": { |
| "loss": 0.4966631457418721, |
| "ce_loss": 0.48666315527861526, |
| "lb_loss": 0.9999999924773845 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 121000000, |
| "cumulative_training_bytes": 121003783, |
| "metrics": { |
| "loss": 0.4965640567796623, |
| "ce_loss": 0.4865640663164055, |
| "lb_loss": 0.9999999925388039 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 122000000, |
| "cumulative_training_bytes": 122001344, |
| "metrics": { |
| "loss": 0.4964622565243613, |
| "ce_loss": 0.4864622660611045, |
| "lb_loss": 0.9999999927217215 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 123000000, |
| "cumulative_training_bytes": 123004762, |
| "metrics": { |
| "loss": 0.4963449017705134, |
| "ce_loss": 0.48634491130725654, |
| "lb_loss": 0.9999999927349744 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 124000000, |
| "cumulative_training_bytes": 124003071, |
| "metrics": { |
| "loss": 0.49625051403913045, |
| "ce_loss": 0.4862505235758736, |
| "lb_loss": 0.9999999929239101 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 125000000, |
| "cumulative_training_bytes": 125001438, |
| "metrics": { |
| "loss": 0.49615721014920183, |
| "ce_loss": 0.486157219685945, |
| "lb_loss": 0.9999999929096719 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 126000000, |
| "cumulative_training_bytes": 126004765, |
| "metrics": { |
| "loss": 0.4960651325244099, |
| "ce_loss": 0.4860651420611531, |
| "lb_loss": 0.9999999929844597 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 127000000, |
| "cumulative_training_bytes": 127003693, |
| "metrics": { |
| "loss": 0.49597558558030314, |
| "ce_loss": 0.4859755951170463, |
| "lb_loss": 0.99999999303966 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 128000000, |
| "cumulative_training_bytes": 128002776, |
| "metrics": { |
| "loss": 0.49587631449830566, |
| "ce_loss": 0.4858763240350488, |
| "lb_loss": 0.9999999929817779 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 129000000, |
| "cumulative_training_bytes": 129001342, |
| "metrics": { |
| "loss": 0.49577192754934957, |
| "ce_loss": 0.48577193708609273, |
| "lb_loss": 0.9999999930973708 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 130000000, |
| "cumulative_training_bytes": 130004919, |
| "metrics": { |
| "loss": 0.49565188584543607, |
| "ce_loss": 0.48565189538217923, |
| "lb_loss": 0.9999999932349775 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 131000000, |
| "cumulative_training_bytes": 131003386, |
| "metrics": { |
| "loss": 0.4955454855112003, |
| "ce_loss": 0.4855454950479435, |
| "lb_loss": 0.999999993322114 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 132000000, |
| "cumulative_training_bytes": 132001655, |
| "metrics": { |
| "loss": 0.4954483614326657, |
| "ce_loss": 0.48544837096940885, |
| "lb_loss": 0.9999999934161384 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 133000000, |
| "cumulative_training_bytes": 133001280, |
| "metrics": { |
| "loss": 0.49533400802280486, |
| "ce_loss": 0.485334017559548, |
| "lb_loss": 0.9999999933519573 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 134000000, |
| "cumulative_training_bytes": 134000603, |
| "metrics": { |
| "loss": 0.4952326351822142, |
| "ce_loss": 0.4852326447189574, |
| "lb_loss": 0.9999999933518446 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 135000000, |
| "cumulative_training_bytes": 135004408, |
| "metrics": { |
| "loss": 0.4951358704479194, |
| "ce_loss": 0.48513587998466257, |
| "lb_loss": 0.9999999933053774 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 136000000, |
| "cumulative_training_bytes": 136002901, |
| "metrics": { |
| "loss": 0.4950331875414302, |
| "ce_loss": 0.4850331970781734, |
| "lb_loss": 0.9999999932783307 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 137000000, |
| "cumulative_training_bytes": 137002147, |
| "metrics": { |
| "loss": 0.4949309004891596, |
| "ce_loss": 0.4849309100259028, |
| "lb_loss": 0.9999999933247884 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 138000000, |
| "cumulative_training_bytes": 138000664, |
| "metrics": { |
| "loss": 0.4948386598770732, |
| "ce_loss": 0.48483866941381637, |
| "lb_loss": 0.9999999932938068 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 139000000, |
| "cumulative_training_bytes": 139003345, |
| "metrics": { |
| "loss": 0.4947454480302854, |
| "ce_loss": 0.4847454575670286, |
| "lb_loss": 0.9999999931806194 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 140000000, |
| "cumulative_training_bytes": 140001624, |
| "metrics": { |
| "loss": 0.4946506588881386, |
| "ce_loss": 0.48465066842488175, |
| "lb_loss": 0.9999999931006919 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 141000000, |
| "cumulative_training_bytes": 141001168, |
| "metrics": { |
| "loss": 0.494550829112816, |
| "ce_loss": 0.48455083864955917, |
| "lb_loss": 0.9999999930231249 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 142000000, |
| "cumulative_training_bytes": 142003856, |
| "metrics": { |
| "loss": 0.4944707291798835, |
| "ce_loss": 0.48447073871662666, |
| "lb_loss": 0.999999993099872 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 143000000, |
| "cumulative_training_bytes": 143002536, |
| "metrics": { |
| "loss": 0.4943706393075899, |
| "ce_loss": 0.48437064884433306, |
| "lb_loss": 0.999999993173932 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 144000000, |
| "cumulative_training_bytes": 144001676, |
| "metrics": { |
| "loss": 0.49428343353978393, |
| "ce_loss": 0.4842834430765271, |
| "lb_loss": 0.9999999932131705 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 145000000, |
| "cumulative_training_bytes": 145000756, |
| "metrics": { |
| "loss": 0.4941936364093857, |
| "ce_loss": 0.48419364594612885, |
| "lb_loss": 0.9999999931867728 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 146000000, |
| "cumulative_training_bytes": 146001319, |
| "metrics": { |
| "loss": 0.4941026584565154, |
| "ce_loss": 0.4841026679932586, |
| "lb_loss": 0.9999999931093816 |
| } |
| }, |
| { |
| "epoch": 2, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.49404458670987017, |
| "ce_loss": 0.48404459624661333, |
| "lb_loss": 0.9999999931190567, |
| "training_bytes": 73364908 |
| }, |
| "cumulative_training_bytes": 146729774, |
| "training_bytes_this_epoch": 73364908 |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 147000000, |
| "cumulative_training_bytes": 147001010, |
| "metrics": { |
| "loss": 0.4805636065346854, |
| "ce_loss": 0.47056361607142855, |
| "lb_loss": 0.9999999914850507 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 148000000, |
| "cumulative_training_bytes": 148004362, |
| "metrics": { |
| "loss": 0.4808664947408234, |
| "ce_loss": 0.47086650427756654, |
| "lb_loss": 0.9999999929743575 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 149000000, |
| "cumulative_training_bytes": 149002455, |
| "metrics": { |
| "loss": 0.48084887106027174, |
| "ce_loss": 0.4708488805970149, |
| "lb_loss": 0.9999999895787188 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 150000000, |
| "cumulative_training_bytes": 150001378, |
| "metrics": { |
| "loss": 0.48101561546325683, |
| "ce_loss": 0.471015625, |
| "lb_loss": 0.9999999924059267 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 151000000, |
| "cumulative_training_bytes": 151001036, |
| "metrics": { |
| "loss": 0.4810201380228482, |
| "ce_loss": 0.4710201475595914, |
| "lb_loss": 0.9999999941816125 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 152000000, |
| "cumulative_training_bytes": 152004640, |
| "metrics": { |
| "loss": 0.4810783021590289, |
| "ce_loss": 0.4710783116957721, |
| "lb_loss": 0.9999999939190114 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 153000000, |
| "cumulative_training_bytes": 153004013, |
| "metrics": { |
| "loss": 0.4810306487901502, |
| "ce_loss": 0.47103065832689334, |
| "lb_loss": 0.9999999938276488 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 154000000, |
| "cumulative_training_bytes": 154004012, |
| "metrics": { |
| "loss": 0.48104295921325685, |
| "ce_loss": 0.47104296875, |
| "lb_loss": 0.9999999943574269 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 155000000, |
| "cumulative_training_bytes": 155002759, |
| "metrics": { |
| "loss": 0.48113014689643385, |
| "ce_loss": 0.471130156433177, |
| "lb_loss": 0.9999999966459285 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 156000000, |
| "cumulative_training_bytes": 156001894, |
| "metrics": { |
| "loss": 0.48115360088428194, |
| "ce_loss": 0.4711536104210251, |
| "lb_loss": 0.9999999970072981 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 157000000, |
| "cumulative_training_bytes": 157004920, |
| "metrics": { |
| "loss": 0.4811667406992172, |
| "ce_loss": 0.4711667502359604, |
| "lb_loss": 0.9999999969902327 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 158000000, |
| "cumulative_training_bytes": 158004293, |
| "metrics": { |
| "loss": 0.48113322299013855, |
| "ce_loss": 0.4711332325268817, |
| "lb_loss": 0.9999999964878123 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 159000000, |
| "cumulative_training_bytes": 159003028, |
| "metrics": { |
| "loss": 0.4811722976047029, |
| "ce_loss": 0.4711723071414461, |
| "lb_loss": 0.9999999965381735 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 160000000, |
| "cumulative_training_bytes": 160002161, |
| "metrics": { |
| "loss": 0.4812026359236149, |
| "ce_loss": 0.47120264546035806, |
| "lb_loss": 0.9999999964502897 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 161000000, |
| "cumulative_training_bytes": 161000072, |
| "metrics": { |
| "loss": 0.4811928899280886, |
| "ce_loss": 0.4711928994648318, |
| "lb_loss": 0.9999999957873713 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 162000000, |
| "cumulative_training_bytes": 162004719, |
| "metrics": { |
| "loss": 0.4811979071299235, |
| "ce_loss": 0.47119791666666666, |
| "lb_loss": 0.9999999959506686 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 163000000, |
| "cumulative_training_bytes": 163003159, |
| "metrics": { |
| "loss": 0.481205364234115, |
| "ce_loss": 0.47120537377085814, |
| "lb_loss": 0.9999999952046322 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 164000000, |
| "cumulative_training_bytes": 164002582, |
| "metrics": { |
| "loss": 0.4811905742293994, |
| "ce_loss": 0.4711905837661426, |
| "lb_loss": 0.99999999494649 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 165000000, |
| "cumulative_training_bytes": 165001639, |
| "metrics": { |
| "loss": 0.4812484146936178, |
| "ce_loss": 0.47124842423036095, |
| "lb_loss": 0.9999999941945582 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 166000000, |
| "cumulative_training_bytes": 166000287, |
| "metrics": { |
| "loss": 0.48126094054503843, |
| "ce_loss": 0.4712609500817816, |
| "lb_loss": 0.9999999941955215 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 167000000, |
| "cumulative_training_bytes": 167003168, |
| "metrics": { |
| "loss": 0.48131460568688755, |
| "ce_loss": 0.4713146152236307, |
| "lb_loss": 0.9999999943688508 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 168000000, |
| "cumulative_training_bytes": 168002141, |
| "metrics": { |
| "loss": 0.4813228442072732, |
| "ce_loss": 0.4713228537440164, |
| "lb_loss": 0.9999999944702324 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 169000000, |
| "cumulative_training_bytes": 169001802, |
| "metrics": { |
| "loss": 0.4812920721636705, |
| "ce_loss": 0.47129208170041365, |
| "lb_loss": 0.9999999943678607 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 170000000, |
| "cumulative_training_bytes": 170004652, |
| "metrics": { |
| "loss": 0.4812715562184652, |
| "ce_loss": 0.47127156575520834, |
| "lb_loss": 0.9999999946479996 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 171000000, |
| "cumulative_training_bytes": 171003991, |
| "metrics": { |
| "loss": 0.48125830860648494, |
| "ce_loss": 0.4712583181432281, |
| "lb_loss": 0.9999999945110385 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 172000000, |
| "cumulative_training_bytes": 172002412, |
| "metrics": { |
| "loss": 0.48124760714111564, |
| "ce_loss": 0.4712476166778588, |
| "lb_loss": 0.9999999945678806 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 173000000, |
| "cumulative_training_bytes": 173002030, |
| "metrics": { |
| "loss": 0.4812279862642904, |
| "ce_loss": 0.4712279958010336, |
| "lb_loss": 0.9999999947194114 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 174000000, |
| "cumulative_training_bytes": 174004724, |
| "metrics": { |
| "loss": 0.48122082379659015, |
| "ce_loss": 0.4712208333333333, |
| "lb_loss": 0.9999999946170383 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 175000000, |
| "cumulative_training_bytes": 175000065, |
| "metrics": { |
| "loss": 0.48117782869224485, |
| "ce_loss": 0.471177838228988, |
| "lb_loss": 0.9999999944996056 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 176000000, |
| "cumulative_training_bytes": 176003706, |
| "metrics": { |
| "loss": 0.48119810987273176, |
| "ce_loss": 0.4711981194094749, |
| "lb_loss": 0.9999999942439113 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 177000000, |
| "cumulative_training_bytes": 177001652, |
| "metrics": { |
| "loss": 0.4812033632407676, |
| "ce_loss": 0.4712033727775108, |
| "lb_loss": 0.9999999942047062 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 178000000, |
| "cumulative_training_bytes": 178000949, |
| "metrics": { |
| "loss": 0.4811637610090779, |
| "ce_loss": 0.47116377054582104, |
| "lb_loss": 0.9999999945654316 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 179000000, |
| "cumulative_training_bytes": 179003699, |
| "metrics": { |
| "loss": 0.4811420990870549, |
| "ce_loss": 0.4711421086237981, |
| "lb_loss": 0.9999999944120646 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 180000000, |
| "cumulative_training_bytes": 180003641, |
| "metrics": { |
| "loss": 0.4811195278612458, |
| "ce_loss": 0.47111953739798895, |
| "lb_loss": 0.999999994232369 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 181000000, |
| "cumulative_training_bytes": 181001865, |
| "metrics": { |
| "loss": 0.48110407533521493, |
| "ce_loss": 0.4711040848719581, |
| "lb_loss": 0.9999999940125498 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 182000000, |
| "cumulative_training_bytes": 182004543, |
| "metrics": { |
| "loss": 0.4810891764598204, |
| "ce_loss": 0.4710891859965636, |
| "lb_loss": 0.999999994043632 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 183000000, |
| "cumulative_training_bytes": 183004012, |
| "metrics": { |
| "loss": 0.4810652307386211, |
| "ce_loss": 0.4710652402753643, |
| "lb_loss": 0.9999999939526901 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 184000000, |
| "cumulative_training_bytes": 184003290, |
| "metrics": { |
| "loss": 0.48101411130038446, |
| "ce_loss": 0.4710141208371276, |
| "lb_loss": 0.9999999940837331 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 185000000, |
| "cumulative_training_bytes": 185001996, |
| "metrics": { |
| "loss": 0.48102430591682327, |
| "ce_loss": 0.47102431545356643, |
| "lb_loss": 0.9999999941173169 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 186000000, |
| "cumulative_training_bytes": 186000889, |
| "metrics": { |
| "loss": 0.4810144482666894, |
| "ce_loss": 0.47101445780343254, |
| "lb_loss": 0.9999999941197542 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 187000000, |
| "cumulative_training_bytes": 187000547, |
| "metrics": { |
| "loss": 0.4809928505625946, |
| "ce_loss": 0.4709928600993378, |
| "lb_loss": 0.9999999940359471 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 188000000, |
| "cumulative_training_bytes": 188003407, |
| "metrics": { |
| "loss": 0.48096928381382076, |
| "ce_loss": 0.4709692933505639, |
| "lb_loss": 0.9999999940269312 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 189000000, |
| "cumulative_training_bytes": 189001532, |
| "metrics": { |
| "loss": 0.4809352142817932, |
| "ce_loss": 0.47093522381853636, |
| "lb_loss": 0.999999993874081 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 190000000, |
| "cumulative_training_bytes": 190004203, |
| "metrics": { |
| "loss": 0.48090532134560976, |
| "ce_loss": 0.4709053308823529, |
| "lb_loss": 0.9999999939426989 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 191000000, |
| "cumulative_training_bytes": 191003779, |
| "metrics": { |
| "loss": 0.48086076009418444, |
| "ce_loss": 0.4708607696309276, |
| "lb_loss": 0.999999993929217 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 192000000, |
| "cumulative_training_bytes": 192002266, |
| "metrics": { |
| "loss": 0.48084745050127764, |
| "ce_loss": 0.4708474600380208, |
| "lb_loss": 0.9999999939993182 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 193000000, |
| "cumulative_training_bytes": 193001653, |
| "metrics": { |
| "loss": 0.48082509623450276, |
| "ce_loss": 0.4708251057712459, |
| "lb_loss": 0.9999999939414749 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 194000000, |
| "cumulative_training_bytes": 194000835, |
| "metrics": { |
| "loss": 0.4807908648478091, |
| "ce_loss": 0.47079087438455225, |
| "lb_loss": 0.9999999939716709 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 195000000, |
| "cumulative_training_bytes": 195004664, |
| "metrics": { |
| "loss": 0.48074686263832717, |
| "ce_loss": 0.47074687217507033, |
| "lb_loss": 0.99999999400122 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 196000000, |
| "cumulative_training_bytes": 196004836, |
| "metrics": { |
| "loss": 0.48070388425876953, |
| "ce_loss": 0.4707038937955127, |
| "lb_loss": 0.9999999940700358 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 197000000, |
| "cumulative_training_bytes": 197003791, |
| "metrics": { |
| "loss": 0.4806869147736349, |
| "ce_loss": 0.47068692431037806, |
| "lb_loss": 0.9999999940268879 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 198000000, |
| "cumulative_training_bytes": 198002877, |
| "metrics": { |
| "loss": 0.4806610015517758, |
| "ce_loss": 0.470661011088519, |
| "lb_loss": 0.9999999939910581 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 199000000, |
| "cumulative_training_bytes": 199001231, |
| "metrics": { |
| "loss": 0.48063136801427725, |
| "ce_loss": 0.4706313775510204, |
| "lb_loss": 0.9999999939455393 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 200000000, |
| "cumulative_training_bytes": 200001409, |
| "metrics": { |
| "loss": 0.4805955566952794, |
| "ce_loss": 0.4705955662320226, |
| "lb_loss": 0.9999999939180041 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 201000000, |
| "cumulative_training_bytes": 201003734, |
| "metrics": { |
| "loss": 0.4805725930385271, |
| "ce_loss": 0.4705726025752703, |
| "lb_loss": 0.9999999940358079 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 202000000, |
| "cumulative_training_bytes": 202002044, |
| "metrics": { |
| "loss": 0.4805435962620111, |
| "ce_loss": 0.47054360579875426, |
| "lb_loss": 0.9999999941383624 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 203000000, |
| "cumulative_training_bytes": 203001126, |
| "metrics": { |
| "loss": 0.48050098680104225, |
| "ce_loss": 0.4705009963377854, |
| "lb_loss": 0.999999994103737 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 204000000, |
| "cumulative_training_bytes": 204004123, |
| "metrics": { |
| "loss": 0.48048055313257615, |
| "ce_loss": 0.4704805626693193, |
| "lb_loss": 0.9999999941515592 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 205000000, |
| "cumulative_training_bytes": 205003381, |
| "metrics": { |
| "loss": 0.4804562526845083, |
| "ce_loss": 0.4704562622212515, |
| "lb_loss": 0.9999999940881398 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 206000000, |
| "cumulative_training_bytes": 206002109, |
| "metrics": { |
| "loss": 0.48041871080847937, |
| "ce_loss": 0.47041872034522253, |
| "lb_loss": 0.9999999941975191 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 207000000, |
| "cumulative_training_bytes": 207004847, |
| "metrics": { |
| "loss": 0.48040474973443376, |
| "ce_loss": 0.4704047592711769, |
| "lb_loss": 0.9999999941119376 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 208000000, |
| "cumulative_training_bytes": 208002759, |
| "metrics": { |
| "loss": 0.4803970944535235, |
| "ce_loss": 0.4703971039902667, |
| "lb_loss": 0.9999999939768037 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 209000000, |
| "cumulative_training_bytes": 209002658, |
| "metrics": { |
| "loss": 0.4803597255816092, |
| "ce_loss": 0.4703597351183524, |
| "lb_loss": 0.9999999939481073 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 210000000, |
| "cumulative_training_bytes": 210001152, |
| "metrics": { |
| "loss": 0.48035077764043516, |
| "ce_loss": 0.4703507871771783, |
| "lb_loss": 0.9999999939705624 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 211000000, |
| "cumulative_training_bytes": 211004753, |
| "metrics": { |
| "loss": 0.48031134121951813, |
| "ce_loss": 0.4703113507562613, |
| "lb_loss": 0.9999999939972691 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 212000000, |
| "cumulative_training_bytes": 212002762, |
| "metrics": { |
| "loss": 0.48029571871685955, |
| "ce_loss": 0.4702957282536027, |
| "lb_loss": 0.9999999940359934 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 213000000, |
| "cumulative_training_bytes": 213001000, |
| "metrics": { |
| "loss": 0.4802748512237924, |
| "ce_loss": 0.47027486076053554, |
| "lb_loss": 0.9999999941389638 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 214000000, |
| "cumulative_training_bytes": 214004278, |
| "metrics": { |
| "loss": 0.48026237785064424, |
| "ce_loss": 0.4702623873873874, |
| "lb_loss": 0.9999999940760501 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 215000000, |
| "cumulative_training_bytes": 215002179, |
| "metrics": { |
| "loss": 0.4802387263928783, |
| "ce_loss": 0.47023873592962145, |
| "lb_loss": 0.9999999940484248 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 216000000, |
| "cumulative_training_bytes": 216004742, |
| "metrics": { |
| "loss": 0.48021305715383633, |
| "ce_loss": 0.4702130666905795, |
| "lb_loss": 0.9999999940762461 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 217000000, |
| "cumulative_training_bytes": 217002756, |
| "metrics": { |
| "loss": 0.480171780298706, |
| "ce_loss": 0.47017178983544916, |
| "lb_loss": 0.9999999941522144 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 218000000, |
| "cumulative_training_bytes": 218000797, |
| "metrics": { |
| "loss": 0.4801445482863861, |
| "ce_loss": 0.47014455782312925, |
| "lb_loss": 0.9999999941895609 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 219000000, |
| "cumulative_training_bytes": 219003956, |
| "metrics": { |
| "loss": 0.4801110330942356, |
| "ce_loss": 0.47011104263097875, |
| "lb_loss": 0.9999999942022717 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 220000000, |
| "cumulative_training_bytes": 220002155, |
| "metrics": { |
| "loss": 0.48008201318376237, |
| "ce_loss": 0.47008202272050553, |
| "lb_loss": 0.9999999942024199 |
| } |
| }, |
| { |
| "epoch": 3, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.48007934931477336, |
| "ce_loss": 0.4700793588515165, |
| "lb_loss": 0.9999999941943272, |
| "training_bytes": 73364838 |
| }, |
| "cumulative_training_bytes": 220094612, |
| "training_bytes_this_epoch": 73364838 |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 221000000, |
| "cumulative_training_bytes": 221000512, |
| "metrics": { |
| "loss": 0.4711359363188718, |
| "ce_loss": 0.46113594585561496, |
| "lb_loss": 0.9999999980875515 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 222000000, |
| "cumulative_training_bytes": 222003838, |
| "metrics": { |
| "loss": 0.47099697650386596, |
| "ce_loss": 0.4609969860406091, |
| "lb_loss": 0.9999999947051712 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 223000000, |
| "cumulative_training_bytes": 223001840, |
| "metrics": { |
| "loss": 0.4709537665049235, |
| "ce_loss": 0.46095377604166665, |
| "lb_loss": 0.9999999970197677 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 224000000, |
| "cumulative_training_bytes": 224000184, |
| "metrics": { |
| "loss": 0.471029573279634, |
| "ce_loss": 0.4610295828163772, |
| "lb_loss": 0.9999999985949277 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 225000000, |
| "cumulative_training_bytes": 225003512, |
| "metrics": { |
| "loss": 0.4712440520377879, |
| "ce_loss": 0.4612440615745311, |
| "lb_loss": 0.9999999992939232 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 226000000, |
| "cumulative_training_bytes": 226001385, |
| "metrics": { |
| "loss": 0.4713188224977113, |
| "ce_loss": 0.46131883203445445, |
| "lb_loss": 0.9999999968217376 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 227000000, |
| "cumulative_training_bytes": 227000179, |
| "metrics": { |
| "loss": 0.47135689835799366, |
| "ce_loss": 0.4613569078947368, |
| "lb_loss": 0.9999999961936683 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 228000000, |
| "cumulative_training_bytes": 228003643, |
| "metrics": { |
| "loss": 0.47145688767526667, |
| "ce_loss": 0.46145689721200983, |
| "lb_loss": 0.9999999966399342 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 229000000, |
| "cumulative_training_bytes": 229002271, |
| "metrics": { |
| "loss": 0.4714496810236486, |
| "ce_loss": 0.46144969056039176, |
| "lb_loss": 0.9999999964976596 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 230000000, |
| "cumulative_training_bytes": 230001193, |
| "metrics": { |
| "loss": 0.471488837393296, |
| "ce_loss": 0.46148884693003916, |
| "lb_loss": 0.9999999964423842 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 231000000, |
| "cumulative_training_bytes": 231004533, |
| "metrics": { |
| "loss": 0.4715379180509956, |
| "ce_loss": 0.46153792758773876, |
| "lb_loss": 0.9999999968754562 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 232000000, |
| "cumulative_training_bytes": 232002563, |
| "metrics": { |
| "loss": 0.47164020210346846, |
| "ce_loss": 0.46164021164021163, |
| "lb_loss": 0.999999997040388 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 233000000, |
| "cumulative_training_bytes": 233001313, |
| "metrics": { |
| "loss": 0.4716305821361821, |
| "ce_loss": 0.46163059167292525, |
| "lb_loss": 0.9999999968664475 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 234000000, |
| "cumulative_training_bytes": 234004559, |
| "metrics": { |
| "loss": 0.4716438820137795, |
| "ce_loss": 0.46164389155052266, |
| "lb_loss": 0.9999999965317158 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 235000000, |
| "cumulative_training_bytes": 235002620, |
| "metrics": { |
| "loss": 0.47168165902632575, |
| "ce_loss": 0.4616816685630689, |
| "lb_loss": 0.9999999968996284 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 236000000, |
| "cumulative_training_bytes": 236000507, |
| "metrics": { |
| "loss": 0.47173076152510934, |
| "ce_loss": 0.4617307710618525, |
| "lb_loss": 0.9999999962769799 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 237000000, |
| "cumulative_training_bytes": 237000271, |
| "metrics": { |
| "loss": 0.47172198820551603, |
| "ce_loss": 0.4617219977422592, |
| "lb_loss": 0.9999999962747097 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 238000000, |
| "cumulative_training_bytes": 238002987, |
| "metrics": { |
| "loss": 0.4717779420870728, |
| "ce_loss": 0.46177795162381596, |
| "lb_loss": 0.9999999964834067 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 239000000, |
| "cumulative_training_bytes": 239002206, |
| "metrics": { |
| "loss": 0.4717871323819956, |
| "ce_loss": 0.46178714191873876, |
| "lb_loss": 0.9999999965774313 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 240000000, |
| "cumulative_training_bytes": 240000715, |
| "metrics": { |
| "loss": 0.4718072897388838, |
| "ce_loss": 0.46180729927562697, |
| "lb_loss": 0.9999999965749462 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 241000000, |
| "cumulative_training_bytes": 241004242, |
| "metrics": { |
| "loss": 0.4718284848999745, |
| "ce_loss": 0.46182849443671764, |
| "lb_loss": 0.999999996628759 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 242000000, |
| "cumulative_training_bytes": 242003525, |
| "metrics": { |
| "loss": 0.4718596074433453, |
| "ce_loss": 0.4618596169800885, |
| "lb_loss": 0.9999999967032829 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 243000000, |
| "cumulative_training_bytes": 243001106, |
| "metrics": { |
| "loss": 0.47192603786063303, |
| "ce_loss": 0.4619260473973762, |
| "lb_loss": 0.9999999964307841 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 244000000, |
| "cumulative_training_bytes": 244003951, |
| "metrics": { |
| "loss": 0.4719613666542157, |
| "ce_loss": 0.46196137619095884, |
| "lb_loss": 0.9999999957347578 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 245000000, |
| "cumulative_training_bytes": 245002710, |
| "metrics": { |
| "loss": 0.4719913950409957, |
| "ce_loss": 0.46199140457773885, |
| "lb_loss": 0.9999999953838006 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 246000000, |
| "cumulative_training_bytes": 246001859, |
| "metrics": { |
| "loss": 0.47201764715175076, |
| "ce_loss": 0.4620176566884939, |
| "lb_loss": 0.9999999954167429 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 247000000, |
| "cumulative_training_bytes": 247000039, |
| "metrics": { |
| "loss": 0.47202893232057985, |
| "ce_loss": 0.462028941857323, |
| "lb_loss": 0.9999999953506015 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 248000000, |
| "cumulative_training_bytes": 248004378, |
| "metrics": { |
| "loss": 0.4720249720106691, |
| "ce_loss": 0.4620249815474123, |
| "lb_loss": 0.9999999952175502 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 249000000, |
| "cumulative_training_bytes": 249002307, |
| "metrics": { |
| "loss": 0.47205487183481953, |
| "ce_loss": 0.4620548813715627, |
| "lb_loss": 0.9999999950029641 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 250000000, |
| "cumulative_training_bytes": 250002323, |
| "metrics": { |
| "loss": 0.47208340821042055, |
| "ce_loss": 0.4620834177471637, |
| "lb_loss": 0.9999999950152355 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 251000000, |
| "cumulative_training_bytes": 251001550, |
| "metrics": { |
| "loss": 0.4721088753440598, |
| "ce_loss": 0.462108884880803, |
| "lb_loss": 0.9999999951949832 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 252000000, |
| "cumulative_training_bytes": 252003554, |
| "metrics": { |
| "loss": 0.47214146756336317, |
| "ce_loss": 0.46214147710010633, |
| "lb_loss": 0.9999999949748477 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 253000000, |
| "cumulative_training_bytes": 253002278, |
| "metrics": { |
| "loss": 0.4721357178347401, |
| "ce_loss": 0.4621357273714833, |
| "lb_loss": 0.9999999954872901 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 254000000, |
| "cumulative_training_bytes": 254000659, |
| "metrics": { |
| "loss": 0.4721559947520345, |
| "ce_loss": 0.4621560042887777, |
| "lb_loss": 0.9999999952026569 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 255000000, |
| "cumulative_training_bytes": 255004377, |
| "metrics": { |
| "loss": 0.4721760244642288, |
| "ce_loss": 0.46217603400097196, |
| "lb_loss": 0.999999995059154 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 256000000, |
| "cumulative_training_bytes": 256003159, |
| "metrics": { |
| "loss": 0.4721671577148767, |
| "ce_loss": 0.4621671672516199, |
| "lb_loss": 0.9999999949229845 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 257000000, |
| "cumulative_training_bytes": 257002287, |
| "metrics": { |
| "loss": 0.4721662097632306, |
| "ce_loss": 0.46216621929997376, |
| "lb_loss": 0.9999999948098398 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 258000000, |
| "cumulative_training_bytes": 258004823, |
| "metrics": { |
| "loss": 0.4721808876471208, |
| "ce_loss": 0.46218089718386396, |
| "lb_loss": 0.9999999949395878 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 259000000, |
| "cumulative_training_bytes": 259003094, |
| "metrics": { |
| "loss": 0.4721888822503504, |
| "ce_loss": 0.46218889178709355, |
| "lb_loss": 0.9999999949951999 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 260000000, |
| "cumulative_training_bytes": 260003317, |
| "metrics": { |
| "loss": 0.4721786844387214, |
| "ce_loss": 0.46217869397546457, |
| "lb_loss": 0.9999999949394331 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 261000000, |
| "cumulative_training_bytes": 261002561, |
| "metrics": { |
| "loss": 0.47218055651077434, |
| "ce_loss": 0.4621805660475175, |
| "lb_loss": 0.99999999493583 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 262000000, |
| "cumulative_training_bytes": 262000974, |
| "metrics": { |
| "loss": 0.472164943109295, |
| "ce_loss": 0.4621649526460382, |
| "lb_loss": 0.9999999951530288 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 263000000, |
| "cumulative_training_bytes": 263000368, |
| "metrics": { |
| "loss": 0.4721608691775264, |
| "ce_loss": 0.4621608787142696, |
| "lb_loss": 0.9999999952254329 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 264000000, |
| "cumulative_training_bytes": 264000070, |
| "metrics": { |
| "loss": 0.4721552553826562, |
| "ce_loss": 0.46215526491939934, |
| "lb_loss": 0.9999999952748002 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 265000000, |
| "cumulative_training_bytes": 265003237, |
| "metrics": { |
| "loss": 0.4721514470424883, |
| "ce_loss": 0.46215145657923146, |
| "lb_loss": 0.9999999955090628 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 266000000, |
| "cumulative_training_bytes": 266001732, |
| "metrics": { |
| "loss": 0.472134939644883, |
| "ce_loss": 0.4621349491816262, |
| "lb_loss": 0.9999999954745787 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 267000000, |
| "cumulative_training_bytes": 267000618, |
| "metrics": { |
| "loss": 0.4721354851795652, |
| "ce_loss": 0.4621354947163084, |
| "lb_loss": 0.9999999952875617 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 268000000, |
| "cumulative_training_bytes": 268000768, |
| "metrics": { |
| "loss": 0.4721304739180231, |
| "ce_loss": 0.46213048345476626, |
| "lb_loss": 0.9999999954340502 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 269000000, |
| "cumulative_training_bytes": 269004654, |
| "metrics": { |
| "loss": 0.4721118046916244, |
| "ce_loss": 0.46211181422836756, |
| "lb_loss": 0.9999999954804685 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 270000000, |
| "cumulative_training_bytes": 270004377, |
| "metrics": { |
| "loss": 0.47210367398681197, |
| "ce_loss": 0.46210368352355513, |
| "lb_loss": 0.9999999955477443 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 271000000, |
| "cumulative_training_bytes": 271003354, |
| "metrics": { |
| "loss": 0.4720871205580097, |
| "ce_loss": 0.4620871300947529, |
| "lb_loss": 0.9999999955669719 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 272000000, |
| "cumulative_training_bytes": 272002318, |
| "metrics": { |
| "loss": 0.47207904797236305, |
| "ce_loss": 0.4620790575091062, |
| "lb_loss": 0.9999999955075233 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 273000000, |
| "cumulative_training_bytes": 273000768, |
| "metrics": { |
| "loss": 0.4720791549345296, |
| "ce_loss": 0.4620791644712728, |
| "lb_loss": 0.9999999955923258 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 274000000, |
| "cumulative_training_bytes": 274000695, |
| "metrics": { |
| "loss": 0.47207574480267583, |
| "ce_loss": 0.462075754339419, |
| "lb_loss": 0.9999999954434798 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 275000000, |
| "cumulative_training_bytes": 275004246, |
| "metrics": { |
| "loss": 0.4720716687366985, |
| "ce_loss": 0.46207167827344164, |
| "lb_loss": 0.9999999954899187 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 276000000, |
| "cumulative_training_bytes": 276003111, |
| "metrics": { |
| "loss": 0.472073087335005, |
| "ce_loss": 0.4620730968717482, |
| "lb_loss": 0.9999999956066642 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 277000000, |
| "cumulative_training_bytes": 277000979, |
| "metrics": { |
| "loss": 0.4720668020474279, |
| "ce_loss": 0.46206681158417107, |
| "lb_loss": 0.9999999956583769 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 278000000, |
| "cumulative_training_bytes": 278004701, |
| "metrics": { |
| "loss": 0.4720647272778236, |
| "ce_loss": 0.4620647368145668, |
| "lb_loss": 0.9999999954741388 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 279000000, |
| "cumulative_training_bytes": 279002393, |
| "metrics": { |
| "loss": 0.4720607262051711, |
| "ce_loss": 0.46206073574191425, |
| "lb_loss": 0.9999999954478553 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 280000000, |
| "cumulative_training_bytes": 280000517, |
| "metrics": { |
| "loss": 0.4720587552322137, |
| "ce_loss": 0.46205876476895685, |
| "lb_loss": 0.9999999954948015 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 281000000, |
| "cumulative_training_bytes": 281003892, |
| "metrics": { |
| "loss": 0.4720639106916873, |
| "ce_loss": 0.46206392022843046, |
| "lb_loss": 0.9999999954409373 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 282000000, |
| "cumulative_training_bytes": 282002709, |
| "metrics": { |
| "loss": 0.4720607285212052, |
| "ce_loss": 0.46206073805794834, |
| "lb_loss": 0.9999999953744555 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 283000000, |
| "cumulative_training_bytes": 283002118, |
| "metrics": { |
| "loss": 0.4720594536067525, |
| "ce_loss": 0.46205946314349566, |
| "lb_loss": 0.9999999952274025 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 284000000, |
| "cumulative_training_bytes": 284001597, |
| "metrics": { |
| "loss": 0.4720484395794759, |
| "ce_loss": 0.46204844911621906, |
| "lb_loss": 0.9999999951392055 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 285000000, |
| "cumulative_training_bytes": 285004657, |
| "metrics": { |
| "loss": 0.47204512196112824, |
| "ce_loss": 0.4620451314978714, |
| "lb_loss": 0.9999999952588732 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 286000000, |
| "cumulative_training_bytes": 286003945, |
| "metrics": { |
| "loss": 0.47203695832828074, |
| "ce_loss": 0.4620369678650239, |
| "lb_loss": 0.9999999952649491 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 287000000, |
| "cumulative_training_bytes": 287003359, |
| "metrics": { |
| "loss": 0.47202351910248586, |
| "ce_loss": 0.462023528639229, |
| "lb_loss": 0.9999999952794815 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 288000000, |
| "cumulative_training_bytes": 288002717, |
| "metrics": { |
| "loss": 0.4720050370471078, |
| "ce_loss": 0.46200504658385094, |
| "lb_loss": 0.999999995314863 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 289000000, |
| "cumulative_training_bytes": 289000675, |
| "metrics": { |
| "loss": 0.4720141621282818, |
| "ce_loss": 0.46201417166502495, |
| "lb_loss": 0.9999999954834164 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 290000000, |
| "cumulative_training_bytes": 290004466, |
| "metrics": { |
| "loss": 0.47201130287657167, |
| "ce_loss": 0.46201131241331483, |
| "lb_loss": 0.9999999954862502 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 291000000, |
| "cumulative_training_bytes": 291003703, |
| "metrics": { |
| "loss": 0.47201100146250474, |
| "ce_loss": 0.4620110109992479, |
| "lb_loss": 0.9999999954479428 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 292000000, |
| "cumulative_training_bytes": 292003073, |
| "metrics": { |
| "loss": 0.47199688168662524, |
| "ce_loss": 0.4619968912233684, |
| "lb_loss": 0.9999999953785503 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 293000000, |
| "cumulative_training_bytes": 293002336, |
| "metrics": { |
| "loss": 0.471982888995309, |
| "ce_loss": 0.46198289853205216, |
| "lb_loss": 0.999999995291241 |
| } |
| }, |
| { |
| "epoch": 4, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.4719807145017819, |
| "ce_loss": 0.4619807240385251, |
| "lb_loss": 0.9999999953011074, |
| "training_bytes": 73364897 |
| }, |
| "cumulative_training_bytes": 293459509, |
| "training_bytes_this_epoch": 73364897 |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 294000000, |
| "cumulative_training_bytes": 294002959, |
| "metrics": { |
| "loss": 0.46361327171325684, |
| "ce_loss": 0.45361328125, |
| "lb_loss": 1.0000000010643686 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 295000000, |
| "cumulative_training_bytes": 295000958, |
| "metrics": { |
| "loss": 0.4638804444726908, |
| "ce_loss": 0.45388045400943394, |
| "lb_loss": 0.9999999983130761 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 296000000, |
| "cumulative_training_bytes": 296004583, |
| "metrics": { |
| "loss": 0.4637797523680187, |
| "ce_loss": 0.4537797619047619, |
| "lb_loss": 0.9999999964804877 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 297000000, |
| "cumulative_training_bytes": 297004491, |
| "metrics": { |
| "loss": 0.4639212028435578, |
| "ce_loss": 0.453921212380301, |
| "lb_loss": 0.99999999673846 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 298000000, |
| "cumulative_training_bytes": 298003144, |
| "metrics": { |
| "loss": 0.4639024884621896, |
| "ce_loss": 0.4539024979989328, |
| "lb_loss": 0.9999999972010626 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 299000000, |
| "cumulative_training_bytes": 299002426, |
| "metrics": { |
| "loss": 0.46409215472397425, |
| "ce_loss": 0.4540921642607174, |
| "lb_loss": 0.9999999960889341 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 300000000, |
| "cumulative_training_bytes": 300000259, |
| "metrics": { |
| "loss": 0.4642571948739314, |
| "ce_loss": 0.4542572044106746, |
| "lb_loss": 0.9999999966419918 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 301000000, |
| "cumulative_training_bytes": 301004717, |
| "metrics": { |
| "loss": 0.4644429732717401, |
| "ce_loss": 0.4544429828084833, |
| "lb_loss": 0.9999999972036381 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 302000000, |
| "cumulative_training_bytes": 302003675, |
| "metrics": { |
| "loss": 0.4645393988060491, |
| "ce_loss": 0.4545394083427923, |
| "lb_loss": 0.9999999968201836 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 303000000, |
| "cumulative_training_bytes": 303002992, |
| "metrics": { |
| "loss": 0.46463151675898856, |
| "ce_loss": 0.4546315262957317, |
| "lb_loss": 0.9999999958506929 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 304000000, |
| "cumulative_training_bytes": 304002454, |
| "metrics": { |
| "loss": 0.46469988840483917, |
| "ce_loss": 0.45469989794158233, |
| "lb_loss": 0.9999999959148611 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 305000000, |
| "cumulative_training_bytes": 305000421, |
| "metrics": { |
| "loss": 0.46478842927628206, |
| "ce_loss": 0.4547884388130252, |
| "lb_loss": 0.9999999954169538 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 306000000, |
| "cumulative_training_bytes": 306000366, |
| "metrics": { |
| "loss": 0.4648084845177812, |
| "ce_loss": 0.45480849405452434, |
| "lb_loss": 0.9999999952288625 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 307000000, |
| "cumulative_training_bytes": 307003767, |
| "metrics": { |
| "loss": 0.46488371204399437, |
| "ce_loss": 0.45488372158073753, |
| "lb_loss": 0.9999999947501816 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 308000000, |
| "cumulative_training_bytes": 308003273, |
| "metrics": { |
| "loss": 0.46492637217382704, |
| "ce_loss": 0.4549263817105702, |
| "lb_loss": 0.9999999948722913 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 309000000, |
| "cumulative_training_bytes": 309001955, |
| "metrics": { |
| "loss": 0.4650177849250353, |
| "ce_loss": 0.4550177944617785, |
| "lb_loss": 0.9999999948857169 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 310000000, |
| "cumulative_training_bytes": 310000618, |
| "metrics": { |
| "loss": 0.46512678612435326, |
| "ce_loss": 0.45512679566109643, |
| "lb_loss": 0.9999999947577269 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 311000000, |
| "cumulative_training_bytes": 311003605, |
| "metrics": { |
| "loss": 0.4651898614209683, |
| "ce_loss": 0.45518987095771146, |
| "lb_loss": 0.9999999944975259 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 312000000, |
| "cumulative_training_bytes": 312002374, |
| "metrics": { |
| "loss": 0.46525381499254553, |
| "ce_loss": 0.4552538245292887, |
| "lb_loss": 0.9999999948718807 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 313000000, |
| "cumulative_training_bytes": 313000646, |
| "metrics": { |
| "loss": 0.46529959886896405, |
| "ce_loss": 0.4552996084057072, |
| "lb_loss": 0.9999999948529984 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 314000000, |
| "cumulative_training_bytes": 314003740, |
| "metrics": { |
| "loss": 0.4653735964934669, |
| "ce_loss": 0.45537360603021004, |
| "lb_loss": 0.9999999949075097 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 315000000, |
| "cumulative_training_bytes": 315002206, |
| "metrics": { |
| "loss": 0.4654086916505177, |
| "ce_loss": 0.45540870118726084, |
| "lb_loss": 0.9999999946338379 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 316000000, |
| "cumulative_training_bytes": 316001950, |
| "metrics": { |
| "loss": 0.4654431973357025, |
| "ce_loss": 0.45544320687244566, |
| "lb_loss": 0.9999999943715984 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 317000000, |
| "cumulative_training_bytes": 317000791, |
| "metrics": { |
| "loss": 0.46548764816150606, |
| "ce_loss": 0.4554876576982492, |
| "lb_loss": 0.9999999944999216 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 318000000, |
| "cumulative_training_bytes": 318004641, |
| "metrics": { |
| "loss": 0.4654948232862517, |
| "ce_loss": 0.45549483282299486, |
| "lb_loss": 0.9999999947837105 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 319000000, |
| "cumulative_training_bytes": 319003086, |
| "metrics": { |
| "loss": 0.4655434100484884, |
| "ce_loss": 0.4555434195852316, |
| "lb_loss": 0.9999999944898516 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 320000000, |
| "cumulative_training_bytes": 320001778, |
| "metrics": { |
| "loss": 0.465581560727232, |
| "ce_loss": 0.45558157026397517, |
| "lb_loss": 0.9999999946863233 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 321000000, |
| "cumulative_training_bytes": 321000981, |
| "metrics": { |
| "loss": 0.46562760379952445, |
| "ce_loss": 0.4556276133362676, |
| "lb_loss": 0.9999999947950874 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 322000000, |
| "cumulative_training_bytes": 322004196, |
| "metrics": { |
| "loss": 0.4656789493981982, |
| "ce_loss": 0.45567895893494137, |
| "lb_loss": 0.9999999946541104 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 323000000, |
| "cumulative_training_bytes": 323002832, |
| "metrics": { |
| "loss": 0.46569165251397077, |
| "ce_loss": 0.45569166205071393, |
| "lb_loss": 0.9999999947468088 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 324000000, |
| "cumulative_training_bytes": 324001797, |
| "metrics": { |
| "loss": 0.4657140671024059, |
| "ce_loss": 0.4557140766391491, |
| "lb_loss": 0.999999994833444 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 325000000, |
| "cumulative_training_bytes": 325000041, |
| "metrics": { |
| "loss": 0.4657587817776304, |
| "ce_loss": 0.4557587913143736, |
| "lb_loss": 0.9999999947404972 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 326000000, |
| "cumulative_training_bytes": 326004479, |
| "metrics": { |
| "loss": 0.4657744545760399, |
| "ce_loss": 0.45577446411278305, |
| "lb_loss": 0.9999999946984546 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 327000000, |
| "cumulative_training_bytes": 327002957, |
| "metrics": { |
| "loss": 0.46581329000792293, |
| "ce_loss": 0.4558132995446661, |
| "lb_loss": 0.9999999949338637 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 328000000, |
| "cumulative_training_bytes": 328000993, |
| "metrics": { |
| "loss": 0.46584905699891094, |
| "ce_loss": 0.4558490665356541, |
| "lb_loss": 0.9999999950636244 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 329000000, |
| "cumulative_training_bytes": 329004548, |
| "metrics": { |
| "loss": 0.46587311273511606, |
| "ce_loss": 0.4558731222718592, |
| "lb_loss": 0.9999999950647906 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 330000000, |
| "cumulative_training_bytes": 330003193, |
| "metrics": { |
| "loss": 0.46590269337887313, |
| "ce_loss": 0.4559027029156163, |
| "lb_loss": 0.9999999950652384 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 331000000, |
| "cumulative_training_bytes": 331001539, |
| "metrics": { |
| "loss": 0.4659443212297556, |
| "ce_loss": 0.45594433076649876, |
| "lb_loss": 0.9999999952273176 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 332000000, |
| "cumulative_training_bytes": 332000431, |
| "metrics": { |
| "loss": 0.4659673291222077, |
| "ce_loss": 0.45596733865895084, |
| "lb_loss": 0.999999995358501 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 333000000, |
| "cumulative_training_bytes": 333002833, |
| "metrics": { |
| "loss": 0.4660065443039876, |
| "ce_loss": 0.45600655384073074, |
| "lb_loss": 0.9999999952570605 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 334000000, |
| "cumulative_training_bytes": 334001068, |
| "metrics": { |
| "loss": 0.4660331893839696, |
| "ce_loss": 0.45603319892071276, |
| "lb_loss": 0.9999999951315508 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 335000000, |
| "cumulative_training_bytes": 335003509, |
| "metrics": { |
| "loss": 0.46606596045683835, |
| "ce_loss": 0.4560659699935815, |
| "lb_loss": 0.9999999950961286 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 336000000, |
| "cumulative_training_bytes": 336002128, |
| "metrics": { |
| "loss": 0.46609774687351324, |
| "ce_loss": 0.4560977564102564, |
| "lb_loss": 0.9999999952791763 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 337000000, |
| "cumulative_training_bytes": 337000277, |
| "metrics": { |
| "loss": 0.4661180713423349, |
| "ce_loss": 0.45611808087907807, |
| "lb_loss": 0.9999999952879081 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 338000000, |
| "cumulative_training_bytes": 338003542, |
| "metrics": { |
| "loss": 0.46613396786584715, |
| "ce_loss": 0.4561339774025903, |
| "lb_loss": 0.9999999951345795 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 339000000, |
| "cumulative_training_bytes": 339002579, |
| "metrics": { |
| "loss": 0.46615904976972905, |
| "ce_loss": 0.4561590593064722, |
| "lb_loss": 0.9999999952349278 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 340000000, |
| "cumulative_training_bytes": 340001843, |
| "metrics": { |
| "loss": 0.466160675684611, |
| "ce_loss": 0.45616068522135417, |
| "lb_loss": 0.9999999953371783 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 341000000, |
| "cumulative_training_bytes": 341000740, |
| "metrics": { |
| "loss": 0.46616024152128255, |
| "ce_loss": 0.4561602510580257, |
| "lb_loss": 0.999999995264938 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 342000000, |
| "cumulative_training_bytes": 342004048, |
| "metrics": { |
| "loss": 0.46617922308584747, |
| "ce_loss": 0.45617923262259064, |
| "lb_loss": 0.9999999950532847 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 343000000, |
| "cumulative_training_bytes": 343002454, |
| "metrics": { |
| "loss": 0.46619792751678457, |
| "ce_loss": 0.45619793705352774, |
| "lb_loss": 0.9999999951296723 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 344000000, |
| "cumulative_training_bytes": 344004748, |
| "metrics": { |
| "loss": 0.46621765693409895, |
| "ce_loss": 0.4562176664708421, |
| "lb_loss": 0.9999999949462396 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 345000000, |
| "cumulative_training_bytes": 345002981, |
| "metrics": { |
| "loss": 0.46624444264770004, |
| "ce_loss": 0.4562444521844432, |
| "lb_loss": 0.9999999950441586 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 346000000, |
| "cumulative_training_bytes": 346003277, |
| "metrics": { |
| "loss": 0.46624570144544913, |
| "ce_loss": 0.4562457109821923, |
| "lb_loss": 0.9999999948028797 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 347000000, |
| "cumulative_training_bytes": 347001483, |
| "metrics": { |
| "loss": 0.46624762068102216, |
| "ce_loss": 0.4562476302177653, |
| "lb_loss": 0.9999999947594974 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 348000000, |
| "cumulative_training_bytes": 348004310, |
| "metrics": { |
| "loss": 0.46627123857453584, |
| "ce_loss": 0.456271248111279, |
| "lb_loss": 0.999999994612219 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 349000000, |
| "cumulative_training_bytes": 349004164, |
| "metrics": { |
| "loss": 0.46626359431897824, |
| "ce_loss": 0.4562636038557214, |
| "lb_loss": 0.9999999945946386 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 350000000, |
| "cumulative_training_bytes": 350004442, |
| "metrics": { |
| "loss": 0.4662550478562946, |
| "ce_loss": 0.4562550573930378, |
| "lb_loss": 0.9999999947105541 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 351000000, |
| "cumulative_training_bytes": 351002177, |
| "metrics": { |
| "loss": 0.466267367664369, |
| "ce_loss": 0.45626737720111216, |
| "lb_loss": 0.999999994822446 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 352000000, |
| "cumulative_training_bytes": 352000011, |
| "metrics": { |
| "loss": 0.46628234035973715, |
| "ce_loss": 0.4562823498964803, |
| "lb_loss": 0.9999999949601373 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 353000000, |
| "cumulative_training_bytes": 353004120, |
| "metrics": { |
| "loss": 0.46630100515956036, |
| "ce_loss": 0.4563010146963035, |
| "lb_loss": 0.9999999949043415 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 354000000, |
| "cumulative_training_bytes": 354003159, |
| "metrics": { |
| "loss": 0.46630914099777, |
| "ce_loss": 0.45630915053451315, |
| "lb_loss": 0.9999999947640699 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 355000000, |
| "cumulative_training_bytes": 355002650, |
| "metrics": { |
| "loss": 0.4663111660186373, |
| "ce_loss": 0.4563111755553805, |
| "lb_loss": 0.9999999947222609 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 356000000, |
| "cumulative_training_bytes": 356000221, |
| "metrics": { |
| "loss": 0.46632584438767544, |
| "ce_loss": 0.4563258539244186, |
| "lb_loss": 0.9999999946402025 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 357000000, |
| "cumulative_training_bytes": 357003562, |
| "metrics": { |
| "loss": 0.4663275968949346, |
| "ce_loss": 0.45632760643167775, |
| "lb_loss": 0.9999999947339453 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 358000000, |
| "cumulative_training_bytes": 358002372, |
| "metrics": { |
| "loss": 0.46632777510045353, |
| "ce_loss": 0.4563277846371967, |
| "lb_loss": 0.999999994712455 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 359000000, |
| "cumulative_training_bytes": 359001228, |
| "metrics": { |
| "loss": 0.4663366162306953, |
| "ce_loss": 0.45633662576743844, |
| "lb_loss": 0.9999999946960287 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 360000000, |
| "cumulative_training_bytes": 360004692, |
| "metrics": { |
| "loss": 0.46634837241174876, |
| "ce_loss": 0.4563483819484919, |
| "lb_loss": 0.9999999946848255 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 361000000, |
| "cumulative_training_bytes": 361003194, |
| "metrics": { |
| "loss": 0.46635145987719595, |
| "ce_loss": 0.4563514694139391, |
| "lb_loss": 0.9999999946350686 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 362000000, |
| "cumulative_training_bytes": 362001994, |
| "metrics": { |
| "loss": 0.46635404292824484, |
| "ce_loss": 0.456354052464988, |
| "lb_loss": 0.999999994725887 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 363000000, |
| "cumulative_training_bytes": 363000593, |
| "metrics": { |
| "loss": 0.4663513775850499, |
| "ce_loss": 0.4563513871217931, |
| "lb_loss": 0.9999999945730852 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 364000000, |
| "cumulative_training_bytes": 364004905, |
| "metrics": { |
| "loss": 0.4663431971157206, |
| "ce_loss": 0.45634320665246375, |
| "lb_loss": 0.9999999945724587 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 365000000, |
| "cumulative_training_bytes": 365004297, |
| "metrics": { |
| "loss": 0.4663444636370049, |
| "ce_loss": 0.45634447317374804, |
| "lb_loss": 0.9999999944583877 |
| } |
| }, |
| { |
| "checkpoint_type": "bytes", |
| "bytes_threshold": 366000000, |
| "cumulative_training_bytes": 366004177, |
| "metrics": { |
| "loss": 0.46634177937590804, |
| "ce_loss": 0.4563417889126512, |
| "lb_loss": 0.9999999944191601 |
| } |
| }, |
| { |
| "epoch": 5, |
| "checkpoint_type": "epoch", |
| "metrics": { |
| "loss": 0.46634681425067503, |
| "ce_loss": 0.4563468237874182, |
| "lb_loss": 0.9999999944030793, |
| "training_bytes": 73364767 |
| }, |
| "cumulative_training_bytes": 366824276, |
| "training_bytes_this_epoch": 73364767 |
| } |
| ] |
| } |