| { | |
| "run_name": "run_large_20260116_074355", | |
| "timestamp": "20260116_074355", | |
| "phase": "large", | |
| "config": { | |
| "arch_layout": [ | |
| "m4", | |
| [ | |
| "T1m4", | |
| [ | |
| "T22" | |
| ], | |
| "m4T1" | |
| ], | |
| "m4" | |
| ], | |
| "d_model": [ | |
| 1024, | |
| 1024, | |
| 1536 | |
| ], | |
| "d_intermediate": [ | |
| 0, | |
| 2816, | |
| 4096 | |
| ], | |
| "vocab_size": 256, | |
| "ssm_cfg": { | |
| "chunk_size": 256, | |
| "d_conv": 4, | |
| "d_state": 128, | |
| "expand": 2 | |
| }, | |
| "attn_cfg": { | |
| "num_heads": [ | |
| 16, | |
| 16, | |
| 16 | |
| ], | |
| "rotary_emb_dim": [ | |
| 32, | |
| 32, | |
| 48 | |
| ], | |
| "window_size": [ | |
| 1023, | |
| 1023, | |
| -1 | |
| ] | |
| }, | |
| "tie_embeddings": false | |
| }, | |
| "training_args": { | |
| "data": "datasets/moses/smiles-molecules-moses_all.csv", | |
| "max_samples": null, | |
| "batch_size": 16, | |
| "epochs": 5, | |
| "lr": 0.0001, | |
| "weight_decay": 0.1, | |
| "gradient_accumulation": 8, | |
| "concatenate": true, | |
| "num_concatenate": 10, | |
| "concatenate_separator": " ", | |
| "checkpoint_bytes": 1000000, | |
| "num_test_samples": 5, | |
| "num_visualize": 5, | |
| "skip_visualization": false | |
| }, | |
| "dataset_info": { | |
| "train_size": 193691, | |
| "test_size": 5, | |
| "test_smiles_file": "checkpoints/run_large_20260116_074355/test_smiles.txt" | |
| }, | |
| "model_info": { | |
| "num_parameters": 622923776, | |
| "device": "cuda", | |
| "dtype": "torch.bfloat16", | |
| "use_amp": true | |
| }, | |
| "training_history": [ | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 1000000, | |
| "cumulative_training_bytes": 1000870, | |
| "metrics": { | |
| "loss": 2.529138337930984, | |
| "ce_loss": 2.5191383136094676, | |
| "lb_loss": 0.9999999887139134 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 2000000, | |
| "cumulative_training_bytes": 2000188, | |
| "metrics": { | |
| "loss": 1.9090615828361737, | |
| "ce_loss": 1.899061575443787, | |
| "lb_loss": 0.9999999883612232 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 3000000, | |
| "cumulative_training_bytes": 3005690, | |
| "metrics": { | |
| "loss": 1.6126774746601975, | |
| "ce_loss": 1.6026774729330708, | |
| "lb_loss": 0.9999999873281464 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 4000000, | |
| "cumulative_training_bytes": 4005451, | |
| "metrics": { | |
| "loss": 1.4273629051191592, | |
| "ce_loss": 1.4173629062038404, | |
| "lb_loss": 0.999999986705611 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 5000000, | |
| "cumulative_training_bytes": 5005919, | |
| "metrics": { | |
| "loss": 1.2988270140708762, | |
| "ce_loss": 1.2888270168439717, | |
| "lb_loss": 0.9999999867545234 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 6000000, | |
| "cumulative_training_bytes": 6004942, | |
| "metrics": { | |
| "loss": 1.204308031815026, | |
| "ce_loss": 1.1943080357142857, | |
| "lb_loss": 0.9999999881377948 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 7000000, | |
| "cumulative_training_bytes": 7000756, | |
| "metrics": { | |
| "loss": 1.1318895240406415, | |
| "ce_loss": 1.1218895287404902, | |
| "lb_loss": 0.9999999886131448 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 8000000, | |
| "cumulative_training_bytes": 8000777, | |
| "metrics": { | |
| "loss": 1.0758717356348884, | |
| "ce_loss": 1.0658717409393492, | |
| "lb_loss": 0.9999999891106899 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 9000000, | |
| "cumulative_training_bytes": 9000410, | |
| "metrics": { | |
| "loss": 1.0291768310102956, | |
| "ce_loss": 1.0191768367850098, | |
| "lb_loss": 0.9999999889098524 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 10000000, | |
| "cumulative_training_bytes": 10001418, | |
| "metrics": { | |
| "loss": 0.9909772497662426, | |
| "ce_loss": 0.9809772559171598, | |
| "lb_loss": 0.9999999883612232 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 11000000, | |
| "cumulative_training_bytes": 11000495, | |
| "metrics": { | |
| "loss": 0.9587417565859281, | |
| "ce_loss": 0.9487417630446476, | |
| "lb_loss": 0.9999999891627919 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 12000000, | |
| "cumulative_training_bytes": 12002164, | |
| "metrics": { | |
| "loss": 0.9310948989800447, | |
| "ce_loss": 0.9210949056952663, | |
| "lb_loss": 0.9999999888314768 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 13000000, | |
| "cumulative_training_bytes": 13003659, | |
| "metrics": { | |
| "loss": 0.90723912597625, | |
| "ce_loss": 0.8972391329085116, | |
| "lb_loss": 0.9999999889309535 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 14000000, | |
| "cumulative_training_bytes": 14003026, | |
| "metrics": { | |
| "loss": 0.8862365868166152, | |
| "ce_loss": 0.8762365939349113, | |
| "lb_loss": 0.9999999888650664 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 15000000, | |
| "cumulative_training_bytes": 15002454, | |
| "metrics": { | |
| "loss": 0.8678279019906912, | |
| "ce_loss": 0.857827909270217, | |
| "lb_loss": 0.9999999884787866 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 16000000, | |
| "cumulative_training_bytes": 16002028, | |
| "metrics": { | |
| "loss": 0.8511597905638655, | |
| "ce_loss": 0.8411597979844675, | |
| "lb_loss": 0.9999999888241291 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 17000000, | |
| "cumulative_training_bytes": 17002370, | |
| "metrics": { | |
| "loss": 0.8362065383129074, | |
| "ce_loss": 0.8262065458579881, | |
| "lb_loss": 0.9999999890043642 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 18000000, | |
| "cumulative_training_bytes": 18002503, | |
| "metrics": { | |
| "loss": 0.8229468611805631, | |
| "ce_loss": 0.8129468688362919, | |
| "lb_loss": 0.9999999889490402 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 19000000, | |
| "cumulative_training_bytes": 19002715, | |
| "metrics": { | |
| "loss": 0.8110695578011721, | |
| "ce_loss": 0.8010695655559016, | |
| "lb_loss": 0.9999999891222916 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 20000000, | |
| "cumulative_training_bytes": 20001653, | |
| "metrics": { | |
| "loss": 0.7999523775111994, | |
| "ce_loss": 0.7899523853550295, | |
| "lb_loss": 0.9999999889255275 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 21000000, | |
| "cumulative_training_bytes": 21000685, | |
| "metrics": { | |
| "loss": 0.7896056028109734, | |
| "ce_loss": 0.7796056107354185, | |
| "lb_loss": 0.9999999889322454 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 22000000, | |
| "cumulative_training_bytes": 22000998, | |
| "metrics": { | |
| "loss": 0.7800996339468625, | |
| "ce_loss": 0.7700996419445939, | |
| "lb_loss": 0.999999989082635 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 23000000, | |
| "cumulative_training_bytes": 23001073, | |
| "metrics": { | |
| "loss": 0.7714011769752316, | |
| "ce_loss": 0.7614011850398765, | |
| "lb_loss": 0.9999999893119533 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 24000000, | |
| "cumulative_training_bytes": 24001251, | |
| "metrics": { | |
| "loss": 0.7632320863020255, | |
| "ce_loss": 0.7532320944280079, | |
| "lb_loss": 0.9999999893164259 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 25000000, | |
| "cumulative_training_bytes": 25001805, | |
| "metrics": { | |
| "loss": 0.7556249918175872, | |
| "ce_loss": 0.745625, | |
| "lb_loss": 0.9999999893910786 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 26000000, | |
| "cumulative_training_bytes": 26001634, | |
| "metrics": { | |
| "loss": 0.7484830439389158, | |
| "ce_loss": 0.7384830521734183, | |
| "lb_loss": 0.9999999890937337 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 27000000, | |
| "cumulative_training_bytes": 27003072, | |
| "metrics": { | |
| "loss": 0.7417793857562759, | |
| "ce_loss": 0.7317793940390094, | |
| "lb_loss": 0.9999999890927288 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 28000000, | |
| "cumulative_training_bytes": 28002976, | |
| "metrics": { | |
| "loss": 0.7355289699057855, | |
| "ce_loss": 0.7255289782333052, | |
| "lb_loss": 0.9999999888902585 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 29000000, | |
| "cumulative_training_bytes": 29002890, | |
| "metrics": { | |
| "loss": 0.7296044090966063, | |
| "ce_loss": 0.7196044174658233, | |
| "lb_loss": 0.9999999887139134 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 30000000, | |
| "cumulative_training_bytes": 30002588, | |
| "metrics": { | |
| "loss": 0.7242658939587294, | |
| "ce_loss": 0.7142659023668639, | |
| "lb_loss": 0.9999999886551317 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 31000000, | |
| "cumulative_training_bytes": 31003334, | |
| "metrics": { | |
| "loss": 0.7192434659780585, | |
| "ce_loss": 0.7092434744225997, | |
| "lb_loss": 0.9999999885205026 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 32000000, | |
| "cumulative_training_bytes": 32004404, | |
| "metrics": { | |
| "loss": 0.7142980222165938, | |
| "ce_loss": 0.7042980306952663, | |
| "lb_loss": 0.9999999885596115 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 33000000, | |
| "cumulative_training_bytes": 33005122, | |
| "metrics": { | |
| "loss": 0.7095906652385923, | |
| "ce_loss": 0.6995906737493276, | |
| "lb_loss": 0.9999999883719108 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 34000000, | |
| "cumulative_training_bytes": 34004602, | |
| "metrics": { | |
| "loss": 0.7051187425468012, | |
| "ce_loss": 0.6951187510877131, | |
| "lb_loss": 0.9999999883301035 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 35000000, | |
| "cumulative_training_bytes": 35004371, | |
| "metrics": { | |
| "loss": 0.7008211294272545, | |
| "ce_loss": 0.6908211379966188, | |
| "lb_loss": 0.9999999883813769 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 36000000, | |
| "cumulative_training_bytes": 36004728, | |
| "metrics": { | |
| "loss": 0.696762272592127, | |
| "ce_loss": 0.6867622811883629, | |
| "lb_loss": 0.9999999884298019 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 37000000, | |
| "cumulative_training_bytes": 37005490, | |
| "metrics": { | |
| "loss": 0.6928459753060596, | |
| "ce_loss": 0.6828459839277147, | |
| "lb_loss": 0.999999988513738 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 38000000, | |
| "cumulative_training_bytes": 38005107, | |
| "metrics": { | |
| "loss": 0.6892191306410902, | |
| "ce_loss": 0.6792191392868265, | |
| "lb_loss": 0.9999999885654123 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 39000000, | |
| "cumulative_training_bytes": 39003704, | |
| "metrics": { | |
| "loss": 0.685711899615441, | |
| "ce_loss": 0.6757119082840237, | |
| "lb_loss": 0.9999999885782633 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 40000000, | |
| "cumulative_training_bytes": 40003473, | |
| "metrics": { | |
| "loss": 0.6823355358733229, | |
| "ce_loss": 0.6723355445636094, | |
| "lb_loss": 0.9999999885728373 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 41000000, | |
| "cumulative_training_bytes": 41003986, | |
| "metrics": { | |
| "loss": 0.6790900475742463, | |
| "ce_loss": 0.6690900562851783, | |
| "lb_loss": 0.9999999886106871 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 42000000, | |
| "cumulative_training_bytes": 42002642, | |
| "metrics": { | |
| "loss": 0.6761168771879745, | |
| "ce_loss": 0.6661168859185687, | |
| "lb_loss": 0.9999999885711579 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 43000000, | |
| "cumulative_training_bytes": 43002853, | |
| "metrics": { | |
| "loss": 0.6731352473742306, | |
| "ce_loss": 0.6631352561235723, | |
| "lb_loss": 0.999999988599084 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 44000000, | |
| "cumulative_training_bytes": 44002051, | |
| "metrics": { | |
| "loss": 0.670238190533462, | |
| "ce_loss": 0.6602381993006993, | |
| "lb_loss": 0.9999999885936781 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 45000000, | |
| "cumulative_training_bytes": 45001206, | |
| "metrics": { | |
| "loss": 0.667434964095347, | |
| "ce_loss": 0.6574349728796844, | |
| "lb_loss": 0.99999998859635 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 46000000, | |
| "cumulative_training_bytes": 46001844, | |
| "metrics": { | |
| "loss": 0.6648008498306411, | |
| "ce_loss": 0.6548008586313352, | |
| "lb_loss": 0.999999988568237 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 47000000, | |
| "cumulative_training_bytes": 47001995, | |
| "metrics": { | |
| "loss": 0.6622222702343818, | |
| "ce_loss": 0.6522222790507365, | |
| "lb_loss": 0.9999999885713365 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 48000000, | |
| "cumulative_training_bytes": 48001607, | |
| "metrics": { | |
| "loss": 0.6597010513264282, | |
| "ce_loss": 0.6497010601577909, | |
| "lb_loss": 0.9999999884347003 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 49000000, | |
| "cumulative_training_bytes": 49001375, | |
| "metrics": { | |
| "loss": 0.6572643425308871, | |
| "ce_loss": 0.6472643513766453, | |
| "lb_loss": 0.9999999883828165 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 50000000, | |
| "cumulative_training_bytes": 50001945, | |
| "metrics": { | |
| "loss": 0.6550795029747416, | |
| "ce_loss": 0.6450795118343196, | |
| "lb_loss": 0.9999999882695237 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 51000000, | |
| "cumulative_training_bytes": 51000374, | |
| "metrics": { | |
| "loss": 0.652884262069249, | |
| "ce_loss": 0.6428842709421047, | |
| "lb_loss": 0.9999999882851528 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 52000000, | |
| "cumulative_training_bytes": 52000635, | |
| "metrics": { | |
| "loss": 0.6506681074377727, | |
| "ce_loss": 0.6406681163233955, | |
| "lb_loss": 0.9999999883205282 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 53000000, | |
| "cumulative_training_bytes": 53001343, | |
| "metrics": { | |
| "loss": 0.6485172646032642, | |
| "ce_loss": 0.6385172735011723, | |
| "lb_loss": 0.9999999883479141 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 54000000, | |
| "cumulative_training_bytes": 54001159, | |
| "metrics": { | |
| "loss": 0.6464495068967486, | |
| "ce_loss": 0.636449515806487, | |
| "lb_loss": 0.999999988335098 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 55000000, | |
| "cumulative_training_bytes": 55001589, | |
| "metrics": { | |
| "loss": 0.6445485554952144, | |
| "ce_loss": 0.6345485644163529, | |
| "lb_loss": 0.9999999882842726 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 56000000, | |
| "cumulative_training_bytes": 56001216, | |
| "metrics": { | |
| "loss": 0.6427827731895931, | |
| "ce_loss": 0.6327827821217245, | |
| "lb_loss": 0.9999999883549252 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 57000000, | |
| "cumulative_training_bytes": 57001016, | |
| "metrics": { | |
| "loss": 0.6409422922614553, | |
| "ce_loss": 0.6309423012041939, | |
| "lb_loss": 0.9999999883426606 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 58000000, | |
| "cumulative_training_bytes": 58002267, | |
| "metrics": { | |
| "loss": 0.6390827834618332, | |
| "ce_loss": 0.6290827924148134, | |
| "lb_loss": 0.999999988324738 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 59000000, | |
| "cumulative_training_bytes": 59002956, | |
| "metrics": { | |
| "loss": 0.6373783727190031, | |
| "ce_loss": 0.6273783816818774, | |
| "lb_loss": 0.9999999883731788 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 60000000, | |
| "cumulative_training_bytes": 60002317, | |
| "metrics": { | |
| "loss": 0.6357118993115848, | |
| "ce_loss": 0.6257119082840237, | |
| "lb_loss": 0.9999999883494669 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 61000000, | |
| "cumulative_training_bytes": 61002300, | |
| "metrics": { | |
| "loss": 0.6340526998164479, | |
| "ce_loss": 0.6240527087981376, | |
| "lb_loss": 0.9999999883149687 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 62000000, | |
| "cumulative_training_bytes": 62001732, | |
| "metrics": { | |
| "loss": 0.6324138432473804, | |
| "ce_loss": 0.6224138522380225, | |
| "lb_loss": 0.9999999883839774 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 63000000, | |
| "cumulative_training_bytes": 63001056, | |
| "metrics": { | |
| "loss": 0.6308046337404284, | |
| "ce_loss": 0.6208046427397389, | |
| "lb_loss": 0.9999999883724198 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 64000000, | |
| "cumulative_training_bytes": 64001869, | |
| "metrics": { | |
| "loss": 0.629222959280014, | |
| "ce_loss": 0.6192229682877219, | |
| "lb_loss": 0.99999998833918 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 65000000, | |
| "cumulative_training_bytes": 65000902, | |
| "metrics": { | |
| "loss": 0.6279146900055459, | |
| "ce_loss": 0.6179146990213928, | |
| "lb_loss": 0.9999999883557972 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 66000000, | |
| "cumulative_training_bytes": 66000374, | |
| "metrics": { | |
| "loss": 0.6265192889859434, | |
| "ce_loss": 0.6165192980096826, | |
| "lb_loss": 0.9999999883986297 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 67000000, | |
| "cumulative_training_bytes": 67001396, | |
| "metrics": { | |
| "loss": 0.6250934401428511, | |
| "ce_loss": 0.6150934491742471, | |
| "lb_loss": 0.9999999883717513 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 68000000, | |
| "cumulative_training_bytes": 68001292, | |
| "metrics": { | |
| "loss": 0.6237620441829789, | |
| "ce_loss": 0.6137620532218064, | |
| "lb_loss": 0.9999999883871563 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 69000000, | |
| "cumulative_training_bytes": 69000668, | |
| "metrics": { | |
| "loss": 0.6224395933143885, | |
| "ce_loss": 0.6124396023604322, | |
| "lb_loss": 0.9999999883612232 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 70000000, | |
| "cumulative_training_bytes": 70004965, | |
| "metrics": { | |
| "loss": 0.6211014537353425, | |
| "ce_loss": 0.6111014627884371, | |
| "lb_loss": 0.999999988337017 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 71000000, | |
| "cumulative_training_bytes": 71004801, | |
| "metrics": { | |
| "loss": 0.6197786367734274, | |
| "ce_loss": 0.6097786458333333, | |
| "lb_loss": 0.9999999884068966 | |
| } | |
| }, | |
| { | |
| "epoch": 1, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.6189756860974089, | |
| "ce_loss": 0.6089756951614902, | |
| "lb_loss": 0.9999999884247052, | |
| "training_bytes": 71629753 | |
| }, | |
| "cumulative_training_bytes": 71629753, | |
| "training_bytes_this_epoch": 71629753 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 72000000, | |
| "cumulative_training_bytes": 72002906, | |
| "metrics": { | |
| "loss": 0.5295622424473838, | |
| "ce_loss": 0.519562251984127, | |
| "lb_loss": 0.9999999914850507 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 73000000, | |
| "cumulative_training_bytes": 73002868, | |
| "metrics": { | |
| "loss": 0.5258186192348085, | |
| "ce_loss": 0.5158186287715517, | |
| "lb_loss": 0.999999987411088 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 74000000, | |
| "cumulative_training_bytes": 74003132, | |
| "metrics": { | |
| "loss": 0.5239981980692419, | |
| "ce_loss": 0.513998207605985, | |
| "lb_loss": 0.999999988554719 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 75000000, | |
| "cumulative_training_bytes": 75003207, | |
| "metrics": { | |
| "loss": 0.5230722218229059, | |
| "ce_loss": 0.5130722313596491, | |
| "lb_loss": 0.9999999898567534 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 76000000, | |
| "cumulative_training_bytes": 76002408, | |
| "metrics": { | |
| "loss": 0.5233044982440417, | |
| "ce_loss": 0.5133045077807848, | |
| "lb_loss": 0.9999999891114655 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 77000000, | |
| "cumulative_training_bytes": 77002547, | |
| "metrics": { | |
| "loss": 0.5234610656284551, | |
| "ce_loss": 0.5134610751651982, | |
| "lb_loss": 0.9999999891031156 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 78000000, | |
| "cumulative_training_bytes": 78001228, | |
| "metrics": { | |
| "loss": 0.5231586487733776, | |
| "ce_loss": 0.5131586583101208, | |
| "lb_loss": 0.9999999895954752 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 79000000, | |
| "cumulative_training_bytes": 79000992, | |
| "metrics": { | |
| "loss": 0.5228191367313146, | |
| "ce_loss": 0.5128191462680578, | |
| "lb_loss": 0.9999999892367214 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 80000000, | |
| "cumulative_training_bytes": 80000829, | |
| "metrics": { | |
| "loss": 0.5224226936434688, | |
| "ce_loss": 0.512422703180212, | |
| "lb_loss": 0.9999999890479098 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 81000000, | |
| "cumulative_training_bytes": 81001739, | |
| "metrics": { | |
| "loss": 0.5220134354601003, | |
| "ce_loss": 0.5120134449968434, | |
| "lb_loss": 0.9999999892756795 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 82000000, | |
| "cumulative_training_bytes": 82001573, | |
| "metrics": { | |
| "loss": 0.5230133960536733, | |
| "ce_loss": 0.5130134055904164, | |
| "lb_loss": 0.9999999893915292 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 83000000, | |
| "cumulative_training_bytes": 83002964, | |
| "metrics": { | |
| "loss": 0.523225756267107, | |
| "ce_loss": 0.5132257658038502, | |
| "lb_loss": 0.999999989052841 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 84000000, | |
| "cumulative_training_bytes": 84002697, | |
| "metrics": { | |
| "loss": 0.5232898338396318, | |
| "ce_loss": 0.513289843376375, | |
| "lb_loss": 0.999999988939932 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 85000000, | |
| "cumulative_training_bytes": 85002372, | |
| "metrics": { | |
| "loss": 0.5232414865915754, | |
| "ce_loss": 0.5132414961283186, | |
| "lb_loss": 0.9999999892395155 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 86000000, | |
| "cumulative_training_bytes": 86002248, | |
| "metrics": { | |
| "loss": 0.5230535232956982, | |
| "ce_loss": 0.5130535328324414, | |
| "lb_loss": 0.9999999892520237 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 87000000, | |
| "cumulative_training_bytes": 87002691, | |
| "metrics": { | |
| "loss": 0.522905801423611, | |
| "ce_loss": 0.5129058109603541, | |
| "lb_loss": 0.9999999892170196 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 88000000, | |
| "cumulative_training_bytes": 88003238, | |
| "metrics": { | |
| "loss": 0.5226462454325377, | |
| "ce_loss": 0.5126462549692808, | |
| "lb_loss": 0.9999999895309515 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 89000000, | |
| "cumulative_training_bytes": 89001864, | |
| "metrics": { | |
| "loss": 0.5224724498041969, | |
| "ce_loss": 0.5124724593409401, | |
| "lb_loss": 0.9999999894636203 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 90000000, | |
| "cumulative_training_bytes": 90001197, | |
| "metrics": { | |
| "loss": 0.5223119117595532, | |
| "ce_loss": 0.5123119212962963, | |
| "lb_loss": 0.9999999893460296 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 91000000, | |
| "cumulative_training_bytes": 91001396, | |
| "metrics": { | |
| "loss": 0.5221548230457248, | |
| "ce_loss": 0.512154832582468, | |
| "lb_loss": 0.9999999894772497 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 92000000, | |
| "cumulative_training_bytes": 92000962, | |
| "metrics": { | |
| "loss": 0.5219621010862601, | |
| "ce_loss": 0.5119621106230032, | |
| "lb_loss": 0.9999999897513943 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 93000000, | |
| "cumulative_training_bytes": 93001493, | |
| "metrics": { | |
| "loss": 0.5217127924081627, | |
| "ce_loss": 0.5117128019449059, | |
| "lb_loss": 0.9999999896863502 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 94000000, | |
| "cumulative_training_bytes": 94001023, | |
| "metrics": { | |
| "loss": 0.521506949601051, | |
| "ce_loss": 0.5115069591377942, | |
| "lb_loss": 0.9999999895010068 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 95000000, | |
| "cumulative_training_bytes": 95001477, | |
| "metrics": { | |
| "loss": 0.521244056918953, | |
| "ce_loss": 0.5112440664556962, | |
| "lb_loss": 0.9999999895729597 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 96000000, | |
| "cumulative_training_bytes": 96001379, | |
| "metrics": { | |
| "loss": 0.5210496801634268, | |
| "ce_loss": 0.51104968970017, | |
| "lb_loss": 0.9999999896100668 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 97000000, | |
| "cumulative_training_bytes": 97000217, | |
| "metrics": { | |
| "loss": 0.5208391965325199, | |
| "ce_loss": 0.510839206069263, | |
| "lb_loss": 0.9999999898249534 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 98000000, | |
| "cumulative_training_bytes": 98005873, | |
| "metrics": { | |
| "loss": 0.5206186290063255, | |
| "ce_loss": 0.5106186385430687, | |
| "lb_loss": 0.9999999899054494 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 99000000, | |
| "cumulative_training_bytes": 99005280, | |
| "metrics": { | |
| "loss": 0.5203992097197946, | |
| "ce_loss": 0.5103992192565378, | |
| "lb_loss": 0.9999999899392203 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 100000000, | |
| "cumulative_training_bytes": 100005594, | |
| "metrics": { | |
| "loss": 0.5202429012222226, | |
| "ce_loss": 0.5102429107589658, | |
| "lb_loss": 0.9999999899457553 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 101000000, | |
| "cumulative_training_bytes": 101004874, | |
| "metrics": { | |
| "loss": 0.5200043963041431, | |
| "ce_loss": 0.5100044058408862, | |
| "lb_loss": 0.9999999900238752 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 102000000, | |
| "cumulative_training_bytes": 102004781, | |
| "metrics": { | |
| "loss": 0.5197838760787613, | |
| "ce_loss": 0.5097838856155045, | |
| "lb_loss": 0.9999999899575345 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 103000000, | |
| "cumulative_training_bytes": 103004802, | |
| "metrics": { | |
| "loss": 0.5195229020227514, | |
| "ce_loss": 0.5095229115594946, | |
| "lb_loss": 0.9999999899403814 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 104000000, | |
| "cumulative_training_bytes": 104005501, | |
| "metrics": { | |
| "loss": 0.5193112426333957, | |
| "ce_loss": 0.5093112521701388, | |
| "lb_loss": 0.9999999898807173 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 105000000, | |
| "cumulative_training_bytes": 105000121, | |
| "metrics": { | |
| "loss": 0.5191852187433987, | |
| "ce_loss": 0.5091852282801419, | |
| "lb_loss": 0.9999999897382783 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 106000000, | |
| "cumulative_training_bytes": 106000572, | |
| "metrics": { | |
| "loss": 0.5189939814685934, | |
| "ce_loss": 0.5089939910053366, | |
| "lb_loss": 0.9999999898316057 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 107000000, | |
| "cumulative_training_bytes": 107000443, | |
| "metrics": { | |
| "loss": 0.5187420054557292, | |
| "ce_loss": 0.5087420149924724, | |
| "lb_loss": 0.9999999897800668 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 108000000, | |
| "cumulative_training_bytes": 108005744, | |
| "metrics": { | |
| "loss": 0.5185298223191449, | |
| "ce_loss": 0.5085298318558881, | |
| "lb_loss": 0.9999999897912019 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 109000000, | |
| "cumulative_training_bytes": 109005847, | |
| "metrics": { | |
| "loss": 0.5183260476304248, | |
| "ce_loss": 0.5083260571671679, | |
| "lb_loss": 0.9999999897057674 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 110000000, | |
| "cumulative_training_bytes": 110000442, | |
| "metrics": { | |
| "loss": 0.5181242074062021, | |
| "ce_loss": 0.5081242169429453, | |
| "lb_loss": 0.9999999897702437 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 111000000, | |
| "cumulative_training_bytes": 111004375, | |
| "metrics": { | |
| "loss": 0.517925187683392, | |
| "ce_loss": 0.5079251972201352, | |
| "lb_loss": 0.9999999897897378 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 112000000, | |
| "cumulative_training_bytes": 112003987, | |
| "metrics": { | |
| "loss": 0.5178511293663929, | |
| "ce_loss": 0.507851138903136, | |
| "lb_loss": 0.9999999898941129 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 113000000, | |
| "cumulative_training_bytes": 113002857, | |
| "metrics": { | |
| "loss": 0.5178904143693057, | |
| "ce_loss": 0.5078904239060489, | |
| "lb_loss": 0.9999999897803562 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 114000000, | |
| "cumulative_training_bytes": 114002857, | |
| "metrics": { | |
| "loss": 0.5177966734777779, | |
| "ce_loss": 0.5077966830145211, | |
| "lb_loss": 0.9999999896802905 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 115000000, | |
| "cumulative_training_bytes": 115002430, | |
| "metrics": { | |
| "loss": 0.5176577004448419, | |
| "ce_loss": 0.5076577099815851, | |
| "lb_loss": 0.9999999896986652 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 116000000, | |
| "cumulative_training_bytes": 116002108, | |
| "metrics": { | |
| "loss": 0.5175234279632568, | |
| "ce_loss": 0.5075234375, | |
| "lb_loss": 0.9999999896844228 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 117000000, | |
| "cumulative_training_bytes": 117001640, | |
| "metrics": { | |
| "loss": 0.5173624745713544, | |
| "ce_loss": 0.5073624841080976, | |
| "lb_loss": 0.9999999896474916 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 118000000, | |
| "cumulative_training_bytes": 118002958, | |
| "metrics": { | |
| "loss": 0.5171740742537646, | |
| "ce_loss": 0.5071740837905078, | |
| "lb_loss": 0.9999999896805941 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 119000000, | |
| "cumulative_training_bytes": 119002472, | |
| "metrics": { | |
| "loss": 0.5171307139708127, | |
| "ce_loss": 0.5071307235075558, | |
| "lb_loss": 0.9999999897718519 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 120000000, | |
| "cumulative_training_bytes": 120002108, | |
| "metrics": { | |
| "loss": 0.5170375470312607, | |
| "ce_loss": 0.5070375565680039, | |
| "lb_loss": 0.9999999898447566 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 121000000, | |
| "cumulative_training_bytes": 121001481, | |
| "metrics": { | |
| "loss": 0.516899705861699, | |
| "ce_loss": 0.5068997153984421, | |
| "lb_loss": 0.99999998980757 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 122000000, | |
| "cumulative_training_bytes": 122002988, | |
| "metrics": { | |
| "loss": 0.5167299444361251, | |
| "ce_loss": 0.5067299539728682, | |
| "lb_loss": 0.9999999897998629 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 123000000, | |
| "cumulative_training_bytes": 123002164, | |
| "metrics": { | |
| "loss": 0.5165764635284417, | |
| "ce_loss": 0.5065764730651848, | |
| "lb_loss": 0.9999999898679655 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 124000000, | |
| "cumulative_training_bytes": 124003331, | |
| "metrics": { | |
| "loss": 0.5166918485885392, | |
| "ce_loss": 0.5066918581252824, | |
| "lb_loss": 0.9999999899469346 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 125000000, | |
| "cumulative_training_bytes": 125004265, | |
| "metrics": { | |
| "loss": 0.516667159430112, | |
| "ce_loss": 0.5066671689668552, | |
| "lb_loss": 0.9999999899568718 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 126000000, | |
| "cumulative_training_bytes": 126004045, | |
| "metrics": { | |
| "loss": 0.5165791951014506, | |
| "ce_loss": 0.5065792046381937, | |
| "lb_loss": 0.9999999899923866 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 127000000, | |
| "cumulative_training_bytes": 127003554, | |
| "metrics": { | |
| "loss": 0.5164666518987735, | |
| "ce_loss": 0.5064666614355167, | |
| "lb_loss": 0.9999999899438258 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 128000000, | |
| "cumulative_training_bytes": 128002625, | |
| "metrics": { | |
| "loss": 0.5163453918460235, | |
| "ce_loss": 0.5063454013827666, | |
| "lb_loss": 0.9999999898281746 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 129000000, | |
| "cumulative_training_bytes": 129002471, | |
| "metrics": { | |
| "loss": 0.516234602327751, | |
| "ce_loss": 0.5062346118644941, | |
| "lb_loss": 0.9999999898394887 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 130000000, | |
| "cumulative_training_bytes": 130003807, | |
| "metrics": { | |
| "loss": 0.5160949440918804, | |
| "ce_loss": 0.5060949536286236, | |
| "lb_loss": 0.999999989862498 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 131000000, | |
| "cumulative_training_bytes": 131005142, | |
| "metrics": { | |
| "loss": 0.5159886006401377, | |
| "ce_loss": 0.5059886101768809, | |
| "lb_loss": 0.9999999899381895 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 132000000, | |
| "cumulative_training_bytes": 132000275, | |
| "metrics": { | |
| "loss": 0.5158503684158198, | |
| "ce_loss": 0.5058503779525629, | |
| "lb_loss": 0.9999999899286085 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 133000000, | |
| "cumulative_training_bytes": 133000355, | |
| "metrics": { | |
| "loss": 0.515711167490349, | |
| "ce_loss": 0.5057111770270921, | |
| "lb_loss": 0.9999999899203098 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 134000000, | |
| "cumulative_training_bytes": 134000325, | |
| "metrics": { | |
| "loss": 0.5156527688998378, | |
| "ce_loss": 0.505652778436581, | |
| "lb_loss": 0.9999999898783498 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 135000000, | |
| "cumulative_training_bytes": 135000221, | |
| "metrics": { | |
| "loss": 0.5155272795038731, | |
| "ce_loss": 0.5055272890406163, | |
| "lb_loss": 0.9999999898154529 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 136000000, | |
| "cumulative_training_bytes": 136000822, | |
| "metrics": { | |
| "loss": 0.5154010211186479, | |
| "ce_loss": 0.5054010306553911, | |
| "lb_loss": 0.9999999898092987 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 137000000, | |
| "cumulative_training_bytes": 137000781, | |
| "metrics": { | |
| "loss": 0.5152766808144517, | |
| "ce_loss": 0.5052766903511948, | |
| "lb_loss": 0.999999989819518 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 138000000, | |
| "cumulative_training_bytes": 138000341, | |
| "metrics": { | |
| "loss": 0.5152090202840646, | |
| "ce_loss": 0.5052090298208077, | |
| "lb_loss": 0.9999999898347433 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 139000000, | |
| "cumulative_training_bytes": 139000421, | |
| "metrics": { | |
| "loss": 0.5150922501462007, | |
| "ce_loss": 0.5050922596829439, | |
| "lb_loss": 0.9999999898495164 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 140000000, | |
| "cumulative_training_bytes": 140000775, | |
| "metrics": { | |
| "loss": 0.514972472613841, | |
| "ce_loss": 0.5049724821505842, | |
| "lb_loss": 0.9999999898071157 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 141000000, | |
| "cumulative_training_bytes": 141000713, | |
| "metrics": { | |
| "loss": 0.514886301395959, | |
| "ce_loss": 0.5048863109327022, | |
| "lb_loss": 0.9999999897913573 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 142000000, | |
| "cumulative_training_bytes": 142001597, | |
| "metrics": { | |
| "loss": 0.5147740040636941, | |
| "ce_loss": 0.5047740136004373, | |
| "lb_loss": 0.9999999898061173 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 143000000, | |
| "cumulative_training_bytes": 143001061, | |
| "metrics": { | |
| "loss": 0.514646232276389, | |
| "ce_loss": 0.5046462418131321, | |
| "lb_loss": 0.9999999898204636 | |
| } | |
| }, | |
| { | |
| "epoch": 2, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.5146146637141241, | |
| "ce_loss": 0.5046146732508673, | |
| "lb_loss": 0.9999999898131496, | |
| "training_bytes": 71629738 | |
| }, | |
| "cumulative_training_bytes": 143259491, | |
| "training_bytes_this_epoch": 71629738 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 144000000, | |
| "cumulative_training_bytes": 144000161, | |
| "metrics": { | |
| "loss": 0.49798436546325686, | |
| "ce_loss": 0.487984375, | |
| "lb_loss": 0.9999999852180481 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 145000000, | |
| "cumulative_training_bytes": 145005821, | |
| "metrics": { | |
| "loss": 0.5014327235141043, | |
| "ce_loss": 0.49143273305084745, | |
| "lb_loss": 0.9999999876749718 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 146000000, | |
| "cumulative_training_bytes": 146004728, | |
| "metrics": { | |
| "loss": 0.5015055804417051, | |
| "ce_loss": 0.4915055899784483, | |
| "lb_loss": 0.9999999870257131 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 147000000, | |
| "cumulative_training_bytes": 147005290, | |
| "metrics": { | |
| "loss": 0.5011692736386123, | |
| "ce_loss": 0.49116928317535546, | |
| "lb_loss": 0.9999999863464873 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 148000000, | |
| "cumulative_training_bytes": 148004934, | |
| "metrics": { | |
| "loss": 0.5008042298647531, | |
| "ce_loss": 0.49080423940149626, | |
| "lb_loss": 0.9999999863251189 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 149000000, | |
| "cumulative_training_bytes": 149004071, | |
| "metrics": { | |
| "loss": 0.5005541871676853, | |
| "ce_loss": 0.4905541967044284, | |
| "lb_loss": 0.9999999872319607 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 150000000, | |
| "cumulative_training_bytes": 150002971, | |
| "metrics": { | |
| "loss": 0.5009830633799235, | |
| "ce_loss": 0.49098307291666665, | |
| "lb_loss": 0.9999999876607928 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 151000000, | |
| "cumulative_training_bytes": 151001320, | |
| "metrics": { | |
| "loss": 0.5009520531064959, | |
| "ce_loss": 0.4909520626432391, | |
| "lb_loss": 0.9999999877512228 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 152000000, | |
| "cumulative_training_bytes": 152002938, | |
| "metrics": { | |
| "loss": 0.5011527835112948, | |
| "ce_loss": 0.4911527930480379, | |
| "lb_loss": 0.9999999878613004 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 153000000, | |
| "cumulative_training_bytes": 153003165, | |
| "metrics": { | |
| "loss": 0.5009091244948294, | |
| "ce_loss": 0.49090913403157255, | |
| "lb_loss": 0.9999999879849775 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 154000000, | |
| "cumulative_training_bytes": 154003230, | |
| "metrics": { | |
| "loss": 0.5006602671703053, | |
| "ce_loss": 0.4906602767070485, | |
| "lb_loss": 0.9999999882497451 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 155000000, | |
| "cumulative_training_bytes": 155002454, | |
| "metrics": { | |
| "loss": 0.5005551305766069, | |
| "ce_loss": 0.4905551401133501, | |
| "lb_loss": 0.9999999881691536 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 156000000, | |
| "cumulative_training_bytes": 156001254, | |
| "metrics": { | |
| "loss": 0.5004347557255595, | |
| "ce_loss": 0.4904347652623027, | |
| "lb_loss": 0.9999999879905219 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 157000000, | |
| "cumulative_training_bytes": 157000532, | |
| "metrics": { | |
| "loss": 0.5002469771184441, | |
| "ce_loss": 0.49024698665518723, | |
| "lb_loss": 0.9999999877865644 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 158000000, | |
| "cumulative_training_bytes": 158000997, | |
| "metrics": { | |
| "loss": 0.5000509660852472, | |
| "ce_loss": 0.49005097562199035, | |
| "lb_loss": 0.9999999877537809 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 159000000, | |
| "cumulative_training_bytes": 159001808, | |
| "metrics": { | |
| "loss": 0.4999657282028284, | |
| "ce_loss": 0.4899657377395716, | |
| "lb_loss": 0.9999999877251615 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 160000000, | |
| "cumulative_training_bytes": 160002107, | |
| "metrics": { | |
| "loss": 0.49982096550742644, | |
| "ce_loss": 0.4898209750441696, | |
| "lb_loss": 0.9999999881422562 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 161000000, | |
| "cumulative_training_bytes": 161000944, | |
| "metrics": { | |
| "loss": 0.49971531223718146, | |
| "ce_loss": 0.48971532177392463, | |
| "lb_loss": 0.999999988075096 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 162000000, | |
| "cumulative_training_bytes": 162001675, | |
| "metrics": { | |
| "loss": 0.49959996974829474, | |
| "ce_loss": 0.4895999792850379, | |
| "lb_loss": 0.9999999881656182 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 163000000, | |
| "cumulative_training_bytes": 163002334, | |
| "metrics": { | |
| "loss": 0.4994611925159988, | |
| "ce_loss": 0.48946120205274196, | |
| "lb_loss": 0.9999999879433218 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 164000000, | |
| "cumulative_training_bytes": 164002386, | |
| "metrics": { | |
| "loss": 0.4994817494122015, | |
| "ce_loss": 0.4894817589489447, | |
| "lb_loss": 0.9999999877594569 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 165000000, | |
| "cumulative_training_bytes": 165002682, | |
| "metrics": { | |
| "loss": 0.4994616188646174, | |
| "ce_loss": 0.48946162840136054, | |
| "lb_loss": 0.9999999877222541 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 166000000, | |
| "cumulative_training_bytes": 166002837, | |
| "metrics": { | |
| "loss": 0.49945240412740877, | |
| "ce_loss": 0.48945241366415193, | |
| "lb_loss": 0.9999999876418049 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 167000000, | |
| "cumulative_training_bytes": 167002163, | |
| "metrics": { | |
| "loss": 0.49971019012685014, | |
| "ce_loss": 0.4897101996635933, | |
| "lb_loss": 0.9999999878503365 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 168000000, | |
| "cumulative_training_bytes": 168003307, | |
| "metrics": { | |
| "loss": 0.4998584052468532, | |
| "ce_loss": 0.4898584147835964, | |
| "lb_loss": 0.9999999878709821 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 169000000, | |
| "cumulative_training_bytes": 169003303, | |
| "metrics": { | |
| "loss": 0.4998855771100047, | |
| "ce_loss": 0.48988558664674786, | |
| "lb_loss": 0.9999999880818109 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 170000000, | |
| "cumulative_training_bytes": 170003861, | |
| "metrics": { | |
| "loss": 0.4998251598493188, | |
| "ce_loss": 0.48982516938606196, | |
| "lb_loss": 0.9999999881054448 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 171000000, | |
| "cumulative_training_bytes": 171002515, | |
| "metrics": { | |
| "loss": 0.49974243954621694, | |
| "ce_loss": 0.4897424490829601, | |
| "lb_loss": 0.9999999880765287 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 172000000, | |
| "cumulative_training_bytes": 172003023, | |
| "metrics": { | |
| "loss": 0.4996642684583165, | |
| "ce_loss": 0.4896642779950597, | |
| "lb_loss": 0.999999988098702 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 173000000, | |
| "cumulative_training_bytes": 173004076, | |
| "metrics": { | |
| "loss": 0.4995929074614665, | |
| "ce_loss": 0.48959291699820967, | |
| "lb_loss": 0.9999999882260966 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 174000000, | |
| "cumulative_training_bytes": 174003533, | |
| "metrics": { | |
| "loss": 0.4995156635771906, | |
| "ce_loss": 0.4895156731139338, | |
| "lb_loss": 0.9999999882649054 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 175000000, | |
| "cumulative_training_bytes": 175003279, | |
| "metrics": { | |
| "loss": 0.4994341848947573, | |
| "ce_loss": 0.4894341944315005, | |
| "lb_loss": 0.9999999882901592 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 176000000, | |
| "cumulative_training_bytes": 176004414, | |
| "metrics": { | |
| "loss": 0.4994536801542579, | |
| "ce_loss": 0.48945368969100106, | |
| "lb_loss": 0.9999999883246413 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 177000000, | |
| "cumulative_training_bytes": 177003681, | |
| "metrics": { | |
| "loss": 0.4994922259095132, | |
| "ce_loss": 0.48949223544625636, | |
| "lb_loss": 0.9999999883884341 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 178000000, | |
| "cumulative_training_bytes": 178003552, | |
| "metrics": { | |
| "loss": 0.49946003388968735, | |
| "ce_loss": 0.4894600434264305, | |
| "lb_loss": 0.9999999885196095 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 179000000, | |
| "cumulative_training_bytes": 179003814, | |
| "metrics": { | |
| "loss": 0.4993792064457101, | |
| "ce_loss": 0.48937921598245326, | |
| "lb_loss": 0.9999999884757118 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 180000000, | |
| "cumulative_training_bytes": 180004777, | |
| "metrics": { | |
| "loss": 0.49927101250431966, | |
| "ce_loss": 0.4892710220410628, | |
| "lb_loss": 0.9999999886165687 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 181000000, | |
| "cumulative_training_bytes": 181004979, | |
| "metrics": { | |
| "loss": 0.4991893716358544, | |
| "ce_loss": 0.4891893811725976, | |
| "lb_loss": 0.9999999887219304 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 182000000, | |
| "cumulative_training_bytes": 182000028, | |
| "metrics": { | |
| "loss": 0.49910491208766494, | |
| "ce_loss": 0.4891049216244081, | |
| "lb_loss": 0.9999999886926885 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 183000000, | |
| "cumulative_training_bytes": 183000406, | |
| "metrics": { | |
| "loss": 0.49903300164923065, | |
| "ce_loss": 0.4890330111859738, | |
| "lb_loss": 0.9999999886843475 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 184000000, | |
| "cumulative_training_bytes": 184005785, | |
| "metrics": { | |
| "loss": 0.49893956214856033, | |
| "ce_loss": 0.4889395716853035, | |
| "lb_loss": 0.9999999886434369 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 185000000, | |
| "cumulative_training_bytes": 185005572, | |
| "metrics": { | |
| "loss": 0.4988476605553901, | |
| "ce_loss": 0.48884767009213326, | |
| "lb_loss": 0.9999999887465079 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 186000000, | |
| "cumulative_training_bytes": 186004863, | |
| "metrics": { | |
| "loss": 0.49877141427624133, | |
| "ce_loss": 0.4887714238129845, | |
| "lb_loss": 0.9999999888200036 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 187000000, | |
| "cumulative_training_bytes": 187005080, | |
| "metrics": { | |
| "loss": 0.49868518042335425, | |
| "ce_loss": 0.4886851899600974, | |
| "lb_loss": 0.9999999888417654 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 188000000, | |
| "cumulative_training_bytes": 188005375, | |
| "metrics": { | |
| "loss": 0.498808651366457, | |
| "ce_loss": 0.4888086609032002, | |
| "lb_loss": 0.99999998895714 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 189000000, | |
| "cumulative_training_bytes": 189005528, | |
| "metrics": { | |
| "loss": 0.49888453402165805, | |
| "ce_loss": 0.4888845435584012, | |
| "lb_loss": 0.9999999890674705 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 190000000, | |
| "cumulative_training_bytes": 190005472, | |
| "metrics": { | |
| "loss": 0.4988654473461682, | |
| "ce_loss": 0.4888654568829114, | |
| "lb_loss": 0.9999999890900866 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 191000000, | |
| "cumulative_training_bytes": 191005752, | |
| "metrics": { | |
| "loss": 0.4988207764187656, | |
| "ce_loss": 0.48882078595550876, | |
| "lb_loss": 0.9999999891930109 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 192000000, | |
| "cumulative_training_bytes": 192004576, | |
| "metrics": { | |
| "loss": 0.4987819687953763, | |
| "ce_loss": 0.48878197833211945, | |
| "lb_loss": 0.9999999892121236 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 193000000, | |
| "cumulative_training_bytes": 193005339, | |
| "metrics": { | |
| "loss": 0.49873961034847153, | |
| "ce_loss": 0.4887396198852147, | |
| "lb_loss": 0.9999999891808389 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 194000000, | |
| "cumulative_training_bytes": 194004395, | |
| "metrics": { | |
| "loss": 0.4987082588138865, | |
| "ce_loss": 0.48870826835062964, | |
| "lb_loss": 0.9999999892411392 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 195000000, | |
| "cumulative_training_bytes": 195004180, | |
| "metrics": { | |
| "loss": 0.498664495180238, | |
| "ce_loss": 0.48866450471698114, | |
| "lb_loss": 0.9999999892241346 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 196000000, | |
| "cumulative_training_bytes": 196003177, | |
| "metrics": { | |
| "loss": 0.49861976167988237, | |
| "ce_loss": 0.48861977121662553, | |
| "lb_loss": 0.9999999891944014 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 197000000, | |
| "cumulative_training_bytes": 197004413, | |
| "metrics": { | |
| "loss": 0.49855583482910515, | |
| "ce_loss": 0.4885558443658483, | |
| "lb_loss": 0.9999999892248347 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 198000000, | |
| "cumulative_training_bytes": 198004837, | |
| "metrics": { | |
| "loss": 0.49850712067834546, | |
| "ce_loss": 0.4885071302150886, | |
| "lb_loss": 0.9999999891768478 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 199000000, | |
| "cumulative_training_bytes": 199004820, | |
| "metrics": { | |
| "loss": 0.4984497884544467, | |
| "ce_loss": 0.48844979799118987, | |
| "lb_loss": 0.999999989181197 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 200000000, | |
| "cumulative_training_bytes": 200005030, | |
| "metrics": { | |
| "loss": 0.49839834649818904, | |
| "ce_loss": 0.4883983560349322, | |
| "lb_loss": 0.9999999892413305 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 201000000, | |
| "cumulative_training_bytes": 201005297, | |
| "metrics": { | |
| "loss": 0.4983296733201069, | |
| "ce_loss": 0.4883296828568501, | |
| "lb_loss": 0.999999989268843 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 202000000, | |
| "cumulative_training_bytes": 202003321, | |
| "metrics": { | |
| "loss": 0.49828891289224553, | |
| "ce_loss": 0.4882889224289887, | |
| "lb_loss": 0.9999999892593967 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 203000000, | |
| "cumulative_training_bytes": 203002998, | |
| "metrics": { | |
| "loss": 0.4982311404954446, | |
| "ce_loss": 0.48823115003218776, | |
| "lb_loss": 0.9999999893270083 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 204000000, | |
| "cumulative_training_bytes": 204002425, | |
| "metrics": { | |
| "loss": 0.4981811680153706, | |
| "ce_loss": 0.48818117755211377, | |
| "lb_loss": 0.9999999893517516 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 205000000, | |
| "cumulative_training_bytes": 205003606, | |
| "metrics": { | |
| "loss": 0.49810324124188643, | |
| "ce_loss": 0.4881032507786296, | |
| "lb_loss": 0.9999999894042534 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 206000000, | |
| "cumulative_training_bytes": 206003279, | |
| "metrics": { | |
| "loss": 0.49805855769024665, | |
| "ce_loss": 0.4880585672269898, | |
| "lb_loss": 0.9999999893820093 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 207000000, | |
| "cumulative_training_bytes": 207003222, | |
| "metrics": { | |
| "loss": 0.4979902569280299, | |
| "ce_loss": 0.48799026646477306, | |
| "lb_loss": 0.9999999893936597 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 208000000, | |
| "cumulative_training_bytes": 208003619, | |
| "metrics": { | |
| "loss": 0.49792424448445954, | |
| "ce_loss": 0.4879242540212027, | |
| "lb_loss": 0.9999999893286877 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 209000000, | |
| "cumulative_training_bytes": 209003539, | |
| "metrics": { | |
| "loss": 0.4978634042536447, | |
| "ce_loss": 0.4878634137903879, | |
| "lb_loss": 0.9999999893193369 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 210000000, | |
| "cumulative_training_bytes": 210003625, | |
| "metrics": { | |
| "loss": 0.49781200564499445, | |
| "ce_loss": 0.4878120151817376, | |
| "lb_loss": 0.9999999893102662 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 211000000, | |
| "cumulative_training_bytes": 211003650, | |
| "metrics": { | |
| "loss": 0.49774608864868786, | |
| "ce_loss": 0.487746098185431, | |
| "lb_loss": 0.9999999893274939 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 212000000, | |
| "cumulative_training_bytes": 212004804, | |
| "metrics": { | |
| "loss": 0.4976889819527559, | |
| "ce_loss": 0.48768899148949907, | |
| "lb_loss": 0.9999999893955241 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 213000000, | |
| "cumulative_training_bytes": 213005524, | |
| "metrics": { | |
| "loss": 0.4976399751921955, | |
| "ce_loss": 0.48763998472893866, | |
| "lb_loss": 0.9999999895425125 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 214000000, | |
| "cumulative_training_bytes": 214003509, | |
| "metrics": { | |
| "loss": 0.49759431418147365, | |
| "ce_loss": 0.4875943237182168, | |
| "lb_loss": 0.9999999895308 | |
| } | |
| }, | |
| { | |
| "epoch": 3, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.49754748757212847, | |
| "ce_loss": 0.48754749710887163, | |
| "lb_loss": 0.9999999895226594, | |
| "training_bytes": 71629674 | |
| }, | |
| "cumulative_training_bytes": 214889165, | |
| "training_bytes_this_epoch": 71629674 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 215000000, | |
| "cumulative_training_bytes": 215001733, | |
| "metrics": { | |
| "loss": 0.48779604309483576, | |
| "ce_loss": 0.4777960526315789, | |
| "lb_loss": 0.9999999874516537 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 216000000, | |
| "cumulative_training_bytes": 216002545, | |
| "metrics": { | |
| "loss": 0.48633393328240576, | |
| "ce_loss": 0.4763339428191489, | |
| "lb_loss": 0.9999999930249884 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 217000000, | |
| "cumulative_training_bytes": 217001927, | |
| "metrics": { | |
| "loss": 0.4868852758274025, | |
| "ce_loss": 0.47688528536414565, | |
| "lb_loss": 0.9999999886467343 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 218000000, | |
| "cumulative_training_bytes": 218001529, | |
| "metrics": { | |
| "loss": 0.48702292297276256, | |
| "ce_loss": 0.4770229325095057, | |
| "lb_loss": 0.9999999900281202 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 219000000, | |
| "cumulative_training_bytes": 219000917, | |
| "metrics": { | |
| "loss": 0.4870795767941921, | |
| "ce_loss": 0.4770795863309353, | |
| "lb_loss": 0.9999999893655023 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 220000000, | |
| "cumulative_training_bytes": 220000410, | |
| "metrics": { | |
| "loss": 0.4870146027317754, | |
| "ce_loss": 0.47701461226851855, | |
| "lb_loss": 0.9999999893070372 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 221000000, | |
| "cumulative_training_bytes": 221000777, | |
| "metrics": { | |
| "loss": 0.4872185740789393, | |
| "ce_loss": 0.4772185836156825, | |
| "lb_loss": 0.9999999897870067 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 222000000, | |
| "cumulative_training_bytes": 222000038, | |
| "metrics": { | |
| "loss": 0.4873976870106778, | |
| "ce_loss": 0.477397696547421, | |
| "lb_loss": 0.9999999891898398 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 223000000, | |
| "cumulative_training_bytes": 223000213, | |
| "metrics": { | |
| "loss": 0.4873916069840446, | |
| "ce_loss": 0.47739161652078776, | |
| "lb_loss": 0.9999999892615994 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 224000000, | |
| "cumulative_training_bytes": 224005748, | |
| "metrics": { | |
| "loss": 0.4875181402036851, | |
| "ce_loss": 0.4775181497404283, | |
| "lb_loss": 0.9999999899820876 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 225000000, | |
| "cumulative_training_bytes": 225000345, | |
| "metrics": { | |
| "loss": 0.48744476811100407, | |
| "ce_loss": 0.47744477764774723, | |
| "lb_loss": 0.9999999900949567 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 226000000, | |
| "cumulative_training_bytes": 226001108, | |
| "metrics": { | |
| "loss": 0.4874496127342898, | |
| "ce_loss": 0.477449622271033, | |
| "lb_loss": 0.9999999902880611 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 227000000, | |
| "cumulative_training_bytes": 227000927, | |
| "metrics": { | |
| "loss": 0.4873878217407361, | |
| "ce_loss": 0.47738783127747925, | |
| "lb_loss": 0.9999999899833913 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 228000000, | |
| "cumulative_training_bytes": 228001827, | |
| "metrics": { | |
| "loss": 0.48735308302869007, | |
| "ce_loss": 0.47735309256543323, | |
| "lb_loss": 0.9999999899134739 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 229000000, | |
| "cumulative_training_bytes": 229005282, | |
| "metrics": { | |
| "loss": 0.48745310236602296, | |
| "ce_loss": 0.4774531119027661, | |
| "lb_loss": 0.9999999899826225 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 230000000, | |
| "cumulative_training_bytes": 230004719, | |
| "metrics": { | |
| "loss": 0.48747751614818835, | |
| "ce_loss": 0.4774775256849315, | |
| "lb_loss": 0.9999999900620045 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 231000000, | |
| "cumulative_training_bytes": 231005169, | |
| "metrics": { | |
| "loss": 0.4874888625263993, | |
| "ce_loss": 0.47748887206314244, | |
| "lb_loss": 0.9999999900221299 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 232000000, | |
| "cumulative_training_bytes": 232004714, | |
| "metrics": { | |
| "loss": 0.48755626854828965, | |
| "ce_loss": 0.4775562780850328, | |
| "lb_loss": 0.9999999898220897 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 233000000, | |
| "cumulative_training_bytes": 233003723, | |
| "metrics": { | |
| "loss": 0.48759518457658146, | |
| "ce_loss": 0.47759519411332463, | |
| "lb_loss": 0.9999999893716081 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 234000000, | |
| "cumulative_training_bytes": 234003904, | |
| "metrics": { | |
| "loss": 0.4876028272397966, | |
| "ce_loss": 0.47760283677653975, | |
| "lb_loss": 0.9999999895032365 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 235000000, | |
| "cumulative_training_bytes": 235004334, | |
| "metrics": { | |
| "loss": 0.48761258237502153, | |
| "ce_loss": 0.4776125919117647, | |
| "lb_loss": 0.9999999895165949 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 236000000, | |
| "cumulative_training_bytes": 236003979, | |
| "metrics": { | |
| "loss": 0.4876520594951986, | |
| "ce_loss": 0.47765206903194174, | |
| "lb_loss": 0.9999999894117835 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 237000000, | |
| "cumulative_training_bytes": 237005117, | |
| "metrics": { | |
| "loss": 0.4876482565815019, | |
| "ce_loss": 0.47764826611824507, | |
| "lb_loss": 0.9999999893483407 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 238000000, | |
| "cumulative_training_bytes": 238005855, | |
| "metrics": { | |
| "loss": 0.4875992913974775, | |
| "ce_loss": 0.47759930093422065, | |
| "lb_loss": 0.9999999891378277 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 239000000, | |
| "cumulative_training_bytes": 239000123, | |
| "metrics": { | |
| "loss": 0.48757476040190717, | |
| "ce_loss": 0.47757476993865033, | |
| "lb_loss": 0.9999999890444469 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 240000000, | |
| "cumulative_training_bytes": 240005576, | |
| "metrics": { | |
| "loss": 0.4875802319237986, | |
| "ce_loss": 0.4775802414605418, | |
| "lb_loss": 0.999999988767087 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 241000000, | |
| "cumulative_training_bytes": 241005599, | |
| "metrics": { | |
| "loss": 0.4875744516945663, | |
| "ce_loss": 0.4775744612313095, | |
| "lb_loss": 0.9999999888460723 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 242000000, | |
| "cumulative_training_bytes": 242000904, | |
| "metrics": { | |
| "loss": 0.487592761769455, | |
| "ce_loss": 0.47759277130619815, | |
| "lb_loss": 0.9999999890078732 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 243000000, | |
| "cumulative_training_bytes": 243001310, | |
| "metrics": { | |
| "loss": 0.487603389747618, | |
| "ce_loss": 0.4776033992843612, | |
| "lb_loss": 0.9999999890475995 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 244000000, | |
| "cumulative_training_bytes": 244002100, | |
| "metrics": { | |
| "loss": 0.48759343876102107, | |
| "ce_loss": 0.47759344829776423, | |
| "lb_loss": 0.9999999889271046 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 245000000, | |
| "cumulative_training_bytes": 245002388, | |
| "metrics": { | |
| "loss": 0.487588754218415, | |
| "ce_loss": 0.4775887637551582, | |
| "lb_loss": 0.9999999890020119 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 246000000, | |
| "cumulative_training_bytes": 246002524, | |
| "metrics": { | |
| "loss": 0.48757954186112673, | |
| "ce_loss": 0.4775795513978699, | |
| "lb_loss": 0.9999999888680561 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 247000000, | |
| "cumulative_training_bytes": 247002019, | |
| "metrics": { | |
| "loss": 0.4875975351472443, | |
| "ce_loss": 0.47759754468398746, | |
| "lb_loss": 0.9999999887314602 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 248000000, | |
| "cumulative_training_bytes": 248002122, | |
| "metrics": { | |
| "loss": 0.4875781433849866, | |
| "ce_loss": 0.4775781529217298, | |
| "lb_loss": 0.9999999888054893 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 249000000, | |
| "cumulative_training_bytes": 249002522, | |
| "metrics": { | |
| "loss": 0.4875829262828579, | |
| "ce_loss": 0.47758293581960104, | |
| "lb_loss": 0.9999999887614486 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 250000000, | |
| "cumulative_training_bytes": 250002149, | |
| "metrics": { | |
| "loss": 0.48756439685017966, | |
| "ce_loss": 0.4775644063869228, | |
| "lb_loss": 0.9999999885391136 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 251000000, | |
| "cumulative_training_bytes": 251002689, | |
| "metrics": { | |
| "loss": 0.4875350526253083, | |
| "ce_loss": 0.47753506216205144, | |
| "lb_loss": 0.9999999884853553 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 252000000, | |
| "cumulative_training_bytes": 252003095, | |
| "metrics": { | |
| "loss": 0.4875272196166369, | |
| "ce_loss": 0.4775272291533801, | |
| "lb_loss": 0.9999999884059843 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 253000000, | |
| "cumulative_training_bytes": 253004268, | |
| "metrics": { | |
| "loss": 0.4875543662395338, | |
| "ce_loss": 0.47755437577627696, | |
| "lb_loss": 0.9999999883492862 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 254000000, | |
| "cumulative_training_bytes": 254003193, | |
| "metrics": { | |
| "loss": 0.4876513354897319, | |
| "ce_loss": 0.47765134502647505, | |
| "lb_loss": 0.9999999883766434 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 255000000, | |
| "cumulative_training_bytes": 255003102, | |
| "metrics": { | |
| "loss": 0.48769247368349583, | |
| "ce_loss": 0.477692483220239, | |
| "lb_loss": 0.9999999884202216 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 256000000, | |
| "cumulative_training_bytes": 256003674, | |
| "metrics": { | |
| "loss": 0.48770237559207086, | |
| "ce_loss": 0.477702385128814, | |
| "lb_loss": 0.9999999885560454 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 257000000, | |
| "cumulative_training_bytes": 257003033, | |
| "metrics": { | |
| "loss": 0.4877074163449486, | |
| "ce_loss": 0.47770742588169174, | |
| "lb_loss": 0.9999999885095444 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 258000000, | |
| "cumulative_training_bytes": 258002592, | |
| "metrics": { | |
| "loss": 0.48769506706564497, | |
| "ce_loss": 0.47769507660238814, | |
| "lb_loss": 0.9999999883997548 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 259000000, | |
| "cumulative_training_bytes": 259002520, | |
| "metrics": { | |
| "loss": 0.48769663910510797, | |
| "ce_loss": 0.47769664864185113, | |
| "lb_loss": 0.9999999883189288 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 260000000, | |
| "cumulative_training_bytes": 260002567, | |
| "metrics": { | |
| "loss": 0.4876953234577479, | |
| "ce_loss": 0.4776953329944911, | |
| "lb_loss": 0.9999999883980466 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 261000000, | |
| "cumulative_training_bytes": 261002603, | |
| "metrics": { | |
| "loss": 0.48768529297833446, | |
| "ce_loss": 0.4776853025150776, | |
| "lb_loss": 0.9999999883590056 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 262000000, | |
| "cumulative_training_bytes": 262003247, | |
| "metrics": { | |
| "loss": 0.4876651401586851, | |
| "ce_loss": 0.4776651496954283, | |
| "lb_loss": 0.9999999884039695 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 263000000, | |
| "cumulative_training_bytes": 263001525, | |
| "metrics": { | |
| "loss": 0.4876595168898956, | |
| "ce_loss": 0.4776595264266388, | |
| "lb_loss": 0.999999988483717 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 264000000, | |
| "cumulative_training_bytes": 264000697, | |
| "metrics": { | |
| "loss": 0.4876635352674737, | |
| "ce_loss": 0.47766354480421686, | |
| "lb_loss": 0.9999999885458544 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 265000000, | |
| "cumulative_training_bytes": 265000767, | |
| "metrics": { | |
| "loss": 0.4876318777138177, | |
| "ce_loss": 0.4776318872505609, | |
| "lb_loss": 0.9999999885773599 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 266000000, | |
| "cumulative_training_bytes": 266000971, | |
| "metrics": { | |
| "loss": 0.487606885505512, | |
| "ce_loss": 0.4776068950422552, | |
| "lb_loss": 0.9999999885455302 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 267000000, | |
| "cumulative_training_bytes": 267001053, | |
| "metrics": { | |
| "loss": 0.48759194501645314, | |
| "ce_loss": 0.4775919545531963, | |
| "lb_loss": 0.9999999885216898 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 268000000, | |
| "cumulative_training_bytes": 268001478, | |
| "metrics": { | |
| "loss": 0.4875803958498433, | |
| "ce_loss": 0.47758040538658647, | |
| "lb_loss": 0.9999999885053877 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 269000000, | |
| "cumulative_training_bytes": 269001884, | |
| "metrics": { | |
| "loss": 0.48757717574209775, | |
| "ce_loss": 0.4775771852788409, | |
| "lb_loss": 0.9999999885353121 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 270000000, | |
| "cumulative_training_bytes": 270002027, | |
| "metrics": { | |
| "loss": 0.48756191001983834, | |
| "ce_loss": 0.4775619195565815, | |
| "lb_loss": 0.9999999885449523 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 271000000, | |
| "cumulative_training_bytes": 271000479, | |
| "metrics": { | |
| "loss": 0.48755995797090207, | |
| "ce_loss": 0.47755996750764523, | |
| "lb_loss": 0.9999999886422447 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 272000000, | |
| "cumulative_training_bytes": 272000318, | |
| "metrics": { | |
| "loss": 0.48754512359628627, | |
| "ce_loss": 0.47754513313302943, | |
| "lb_loss": 0.9999999886373242 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 273000000, | |
| "cumulative_training_bytes": 273000319, | |
| "metrics": { | |
| "loss": 0.48753497608081103, | |
| "ce_loss": 0.4775349856175542, | |
| "lb_loss": 0.9999999886447113 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 274000000, | |
| "cumulative_training_bytes": 274000061, | |
| "metrics": { | |
| "loss": 0.48750523007787144, | |
| "ce_loss": 0.4775052396146146, | |
| "lb_loss": 0.999999988687647 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 275000000, | |
| "cumulative_training_bytes": 275001198, | |
| "metrics": { | |
| "loss": 0.48749050841531905, | |
| "ce_loss": 0.4774905179520622, | |
| "lb_loss": 0.9999999888112947 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 276000000, | |
| "cumulative_training_bytes": 276001015, | |
| "metrics": { | |
| "loss": 0.4874637873019478, | |
| "ce_loss": 0.47746379683869095, | |
| "lb_loss": 0.9999999888731841 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 277000000, | |
| "cumulative_training_bytes": 277005682, | |
| "metrics": { | |
| "loss": 0.48745421537514483, | |
| "ce_loss": 0.477454224911888, | |
| "lb_loss": 0.9999999888943908 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 278000000, | |
| "cumulative_training_bytes": 278005146, | |
| "metrics": { | |
| "loss": 0.487448876337917, | |
| "ce_loss": 0.47744888587466017, | |
| "lb_loss": 0.9999999889306458 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 279000000, | |
| "cumulative_training_bytes": 279005553, | |
| "metrics": { | |
| "loss": 0.4874283830896873, | |
| "ce_loss": 0.47742839262643044, | |
| "lb_loss": 0.9999999888832607 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 280000000, | |
| "cumulative_training_bytes": 280005819, | |
| "metrics": { | |
| "loss": 0.4874081643046925, | |
| "ce_loss": 0.4774081738414357, | |
| "lb_loss": 0.999999988875244 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 281000000, | |
| "cumulative_training_bytes": 281005718, | |
| "metrics": { | |
| "loss": 0.48739380087134704, | |
| "ce_loss": 0.4773938104080902, | |
| "lb_loss": 0.9999999888781381 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 282000000, | |
| "cumulative_training_bytes": 282000131, | |
| "metrics": { | |
| "loss": 0.4873868256223998, | |
| "ce_loss": 0.477386835159143, | |
| "lb_loss": 0.9999999888747105 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 283000000, | |
| "cumulative_training_bytes": 283000083, | |
| "metrics": { | |
| "loss": 0.4873677663949743, | |
| "ce_loss": 0.4773677759317175, | |
| "lb_loss": 0.9999999888153911 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 284000000, | |
| "cumulative_training_bytes": 284005053, | |
| "metrics": { | |
| "loss": 0.4873527026668353, | |
| "ce_loss": 0.47735271220357844, | |
| "lb_loss": 0.9999999888608048 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 285000000, | |
| "cumulative_training_bytes": 285005105, | |
| "metrics": { | |
| "loss": 0.4873396199463792, | |
| "ce_loss": 0.47733962948312236, | |
| "lb_loss": 0.9999999888687697 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 286000000, | |
| "cumulative_training_bytes": 286004749, | |
| "metrics": { | |
| "loss": 0.4873238175807375, | |
| "ce_loss": 0.47732382711748067, | |
| "lb_loss": 0.9999999888616331 | |
| } | |
| }, | |
| { | |
| "epoch": 4, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.48730673014812387, | |
| "ce_loss": 0.47730673968486703, | |
| "lb_loss": 0.9999999889466027, | |
| "training_bytes": 71629719 | |
| }, | |
| "cumulative_training_bytes": 286518884, | |
| "training_bytes_this_epoch": 71629719 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 287000000, | |
| "cumulative_training_bytes": 287004957, | |
| "metrics": { | |
| "loss": 0.4769635956461837, | |
| "ce_loss": 0.46696360518292684, | |
| "lb_loss": 0.9999999905504832 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 288000000, | |
| "cumulative_training_bytes": 288004914, | |
| "metrics": { | |
| "loss": 0.4783764844871612, | |
| "ce_loss": 0.46837649402390436, | |
| "lb_loss": 0.9999999943007511 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 289000000, | |
| "cumulative_training_bytes": 289004460, | |
| "metrics": { | |
| "loss": 0.4789918059394473, | |
| "ce_loss": 0.46899181547619045, | |
| "lb_loss": 0.9999999903497242 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 290000000, | |
| "cumulative_training_bytes": 290004004, | |
| "metrics": { | |
| "loss": 0.4792706027298103, | |
| "ce_loss": 0.4692706122665535, | |
| "lb_loss": 0.999999990791133 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 291000000, | |
| "cumulative_training_bytes": 291005649, | |
| "metrics": { | |
| "loss": 0.47934778070072387, | |
| "ce_loss": 0.46934779023746703, | |
| "lb_loss": 0.9999999906425425 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 292000000, | |
| "cumulative_training_bytes": 292000020, | |
| "metrics": { | |
| "loss": 0.4794418101176845, | |
| "ce_loss": 0.46944181965442766, | |
| "lb_loss": 0.9999999907310272 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 293000000, | |
| "cumulative_training_bytes": 293000330, | |
| "metrics": { | |
| "loss": 0.47953124046325685, | |
| "ce_loss": 0.46953125, | |
| "lb_loss": 0.9999999902564096 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 294000000, | |
| "cumulative_training_bytes": 294005832, | |
| "metrics": { | |
| "loss": 0.47959454332886947, | |
| "ce_loss": 0.46959455286561264, | |
| "lb_loss": 0.9999999900109212 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 295000000, | |
| "cumulative_training_bytes": 295000237, | |
| "metrics": { | |
| "loss": 0.4795432345316448, | |
| "ce_loss": 0.469543244068388, | |
| "lb_loss": 0.9999999901421488 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 296000000, | |
| "cumulative_training_bytes": 296000199, | |
| "metrics": { | |
| "loss": 0.47965827893079743, | |
| "ce_loss": 0.4696582884675406, | |
| "lb_loss": 0.9999999900286861 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 297000000, | |
| "cumulative_training_bytes": 297005707, | |
| "metrics": { | |
| "loss": 0.47973316625332457, | |
| "ce_loss": 0.46973317579006774, | |
| "lb_loss": 0.9999999901443787 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 298000000, | |
| "cumulative_training_bytes": 298005767, | |
| "metrics": { | |
| "loss": 0.47985183068736814, | |
| "ce_loss": 0.4698518402241113, | |
| "lb_loss": 0.999999989989122 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 299000000, | |
| "cumulative_training_bytes": 299000563, | |
| "metrics": { | |
| "loss": 0.4799011210346177, | |
| "ce_loss": 0.46990113057136085, | |
| "lb_loss": 0.9999999897408791 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 300000000, | |
| "cumulative_training_bytes": 300005848, | |
| "metrics": { | |
| "loss": 0.47993609182350255, | |
| "ce_loss": 0.4699361013602457, | |
| "lb_loss": 0.9999999892507639 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 301000000, | |
| "cumulative_training_bytes": 301000438, | |
| "metrics": { | |
| "loss": 0.4799799723288065, | |
| "ce_loss": 0.4699799818655497, | |
| "lb_loss": 0.9999999894772348 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 302000000, | |
| "cumulative_training_bytes": 302002460, | |
| "metrics": { | |
| "loss": 0.47996920037342505, | |
| "ce_loss": 0.4699692099101682, | |
| "lb_loss": 0.9999999895418455 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 303000000, | |
| "cumulative_training_bytes": 303002560, | |
| "metrics": { | |
| "loss": 0.4800305672137057, | |
| "ce_loss": 0.47003057675044885, | |
| "lb_loss": 0.9999999895344088 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 304000000, | |
| "cumulative_training_bytes": 304002632, | |
| "metrics": { | |
| "loss": 0.4800855735709075, | |
| "ce_loss": 0.47008558310765064, | |
| "lb_loss": 0.9999999897497768 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 305000000, | |
| "cumulative_training_bytes": 305003714, | |
| "metrics": { | |
| "loss": 0.4801058582186203, | |
| "ce_loss": 0.47010586775536345, | |
| "lb_loss": 0.9999999895028644 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 306000000, | |
| "cumulative_training_bytes": 306003953, | |
| "metrics": { | |
| "loss": 0.4801525383065132, | |
| "ce_loss": 0.4701525478432564, | |
| "lb_loss": 0.9999999896615273 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 307000000, | |
| "cumulative_training_bytes": 307005112, | |
| "metrics": { | |
| "loss": 0.48019917422517533, | |
| "ce_loss": 0.4701991837619185, | |
| "lb_loss": 0.9999999893052631 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 308000000, | |
| "cumulative_training_bytes": 308003803, | |
| "metrics": { | |
| "loss": 0.4802888162208326, | |
| "ce_loss": 0.47028882575757575, | |
| "lb_loss": 0.9999999895732922 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 309000000, | |
| "cumulative_training_bytes": 309001801, | |
| "metrics": { | |
| "loss": 0.48038025089363323, | |
| "ce_loss": 0.4703802604303764, | |
| "lb_loss": 0.9999999895350624 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 310000000, | |
| "cumulative_training_bytes": 310001190, | |
| "metrics": { | |
| "loss": 0.4804092607190532, | |
| "ce_loss": 0.4704092702557964, | |
| "lb_loss": 0.9999999891696197 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 311000000, | |
| "cumulative_training_bytes": 311001090, | |
| "metrics": { | |
| "loss": 0.4804406215062832, | |
| "ce_loss": 0.47044063104302636, | |
| "lb_loss": 0.9999999890357422 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 312000000, | |
| "cumulative_training_bytes": 312001765, | |
| "metrics": { | |
| "loss": 0.4804813137621421, | |
| "ce_loss": 0.47048132329888526, | |
| "lb_loss": 0.9999999891061645 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 313000000, | |
| "cumulative_training_bytes": 313002485, | |
| "metrics": { | |
| "loss": 0.4805145679772233, | |
| "ce_loss": 0.47051457751396647, | |
| "lb_loss": 0.9999999891313095 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 314000000, | |
| "cumulative_training_bytes": 314002181, | |
| "metrics": { | |
| "loss": 0.4805563366895704, | |
| "ce_loss": 0.47055634622631354, | |
| "lb_loss": 0.9999999891931286 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 315000000, | |
| "cumulative_training_bytes": 315002170, | |
| "metrics": { | |
| "loss": 0.48057691104293687, | |
| "ce_loss": 0.47057692057968004, | |
| "lb_loss": 0.9999999892629904 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 316000000, | |
| "cumulative_training_bytes": 316003419, | |
| "metrics": { | |
| "loss": 0.4805855045640196, | |
| "ce_loss": 0.47058551410076277, | |
| "lb_loss": 0.9999999891486526 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 317000000, | |
| "cumulative_training_bytes": 317003347, | |
| "metrics": { | |
| "loss": 0.48059125914894896, | |
| "ce_loss": 0.4705912686856921, | |
| "lb_loss": 0.9999999891343891 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 318000000, | |
| "cumulative_training_bytes": 318002949, | |
| "metrics": { | |
| "loss": 0.48062565129502377, | |
| "ce_loss": 0.47062566083176693, | |
| "lb_loss": 0.999999989210663 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 319000000, | |
| "cumulative_training_bytes": 319002955, | |
| "metrics": { | |
| "loss": 0.4806742936150149, | |
| "ce_loss": 0.47067430315175807, | |
| "lb_loss": 0.9999999891845097 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 320000000, | |
| "cumulative_training_bytes": 320003241, | |
| "metrics": { | |
| "loss": 0.4806913787630094, | |
| "ce_loss": 0.47069138829975254, | |
| "lb_loss": 0.9999999892757991 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 321000000, | |
| "cumulative_training_bytes": 321002538, | |
| "metrics": { | |
| "loss": 0.4807014395365364, | |
| "ce_loss": 0.47070144907327954, | |
| "lb_loss": 0.9999999892185867 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 322000000, | |
| "cumulative_training_bytes": 322002968, | |
| "metrics": { | |
| "loss": 0.4807102816990807, | |
| "ce_loss": 0.47071029123582386, | |
| "lb_loss": 0.9999999893733547 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 323000000, | |
| "cumulative_training_bytes": 323002242, | |
| "metrics": { | |
| "loss": 0.48073701398312707, | |
| "ce_loss": 0.47073702351987023, | |
| "lb_loss": 0.9999999893552776 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 324000000, | |
| "cumulative_training_bytes": 324001740, | |
| "metrics": { | |
| "loss": 0.48075183566376206, | |
| "ce_loss": 0.4707518452005052, | |
| "lb_loss": 0.9999999893193445 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 325000000, | |
| "cumulative_training_bytes": 325002302, | |
| "metrics": { | |
| "loss": 0.48077099278910645, | |
| "ce_loss": 0.4707710023258496, | |
| "lb_loss": 0.9999999893127763 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 326000000, | |
| "cumulative_training_bytes": 326001646, | |
| "metrics": { | |
| "loss": 0.4807988396651453, | |
| "ce_loss": 0.47079884920188847, | |
| "lb_loss": 0.9999999894673447 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 327000000, | |
| "cumulative_training_bytes": 327000938, | |
| "metrics": { | |
| "loss": 0.4808235976661511, | |
| "ce_loss": 0.4708236072028943, | |
| "lb_loss": 0.9999999893267519 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 328000000, | |
| "cumulative_training_bytes": 328001021, | |
| "metrics": { | |
| "loss": 0.48082542956810703, | |
| "ce_loss": 0.4708254391048502, | |
| "lb_loss": 0.9999999892609606 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 329000000, | |
| "cumulative_training_bytes": 329000730, | |
| "metrics": { | |
| "loss": 0.4808347929252989, | |
| "ce_loss": 0.47083480246204207, | |
| "lb_loss": 0.9999999892314774 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 330000000, | |
| "cumulative_training_bytes": 330005619, | |
| "metrics": { | |
| "loss": 0.48085167946856366, | |
| "ce_loss": 0.47085168900530683, | |
| "lb_loss": 0.9999999891967088 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 331000000, | |
| "cumulative_training_bytes": 331004616, | |
| "metrics": { | |
| "loss": 0.4808701624671142, | |
| "ce_loss": 0.4708701720038574, | |
| "lb_loss": 0.9999999892492819 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 332000000, | |
| "cumulative_training_bytes": 332005466, | |
| "metrics": { | |
| "loss": 0.4808840215384487, | |
| "ce_loss": 0.4708840310751919, | |
| "lb_loss": 0.999999989369329 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 333000000, | |
| "cumulative_training_bytes": 333005711, | |
| "metrics": { | |
| "loss": 0.4808831132356601, | |
| "ce_loss": 0.47088312277240324, | |
| "lb_loss": 0.999999989431101 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 334000000, | |
| "cumulative_training_bytes": 334000923, | |
| "metrics": { | |
| "loss": 0.480881291871532, | |
| "ce_loss": 0.47088130140827517, | |
| "lb_loss": 0.999999989399822 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 335000000, | |
| "cumulative_training_bytes": 335005467, | |
| "metrics": { | |
| "loss": 0.48089165852830446, | |
| "ce_loss": 0.4708916680650476, | |
| "lb_loss": 0.9999999894233398 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 336000000, | |
| "cumulative_training_bytes": 336004881, | |
| "metrics": { | |
| "loss": 0.4808885490696182, | |
| "ce_loss": 0.47088855860636136, | |
| "lb_loss": 0.9999999894375124 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 337000000, | |
| "cumulative_training_bytes": 337004670, | |
| "metrics": { | |
| "loss": 0.4809070810486999, | |
| "ce_loss": 0.47090709058544306, | |
| "lb_loss": 0.9999999893812634 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 338000000, | |
| "cumulative_training_bytes": 338005568, | |
| "metrics": { | |
| "loss": 0.48091389403755863, | |
| "ce_loss": 0.4709139035743018, | |
| "lb_loss": 0.9999999893751518 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 339000000, | |
| "cumulative_training_bytes": 339000221, | |
| "metrics": { | |
| "loss": 0.480916728187352, | |
| "ce_loss": 0.47091673772409515, | |
| "lb_loss": 0.9999999893008689 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 340000000, | |
| "cumulative_training_bytes": 340000243, | |
| "metrics": { | |
| "loss": 0.4809181364164544, | |
| "ce_loss": 0.4709181459531976, | |
| "lb_loss": 0.999999989316273 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 341000000, | |
| "cumulative_training_bytes": 341005321, | |
| "metrics": { | |
| "loss": 0.480930711371706, | |
| "ce_loss": 0.4709307209084492, | |
| "lb_loss": 0.9999999892610658 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 342000000, | |
| "cumulative_training_bytes": 342004954, | |
| "metrics": { | |
| "loss": 0.4809334829848522, | |
| "ce_loss": 0.4709334925215954, | |
| "lb_loss": 0.9999999893020564 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 343000000, | |
| "cumulative_training_bytes": 343000349, | |
| "metrics": { | |
| "loss": 0.48092697284932284, | |
| "ce_loss": 0.470926982386066, | |
| "lb_loss": 0.9999999892842776 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 344000000, | |
| "cumulative_training_bytes": 344005952, | |
| "metrics": { | |
| "loss": 0.4809306952368029, | |
| "ce_loss": 0.47093070477354604, | |
| "lb_loss": 0.9999999892754586 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 345000000, | |
| "cumulative_training_bytes": 345000555, | |
| "metrics": { | |
| "loss": 0.48093492924702697, | |
| "ce_loss": 0.47093493878377013, | |
| "lb_loss": 0.9999999892527089 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 346000000, | |
| "cumulative_training_bytes": 346000337, | |
| "metrics": { | |
| "loss": 0.4809374438307459, | |
| "ce_loss": 0.47093745336748905, | |
| "lb_loss": 0.9999999893088763 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 347000000, | |
| "cumulative_training_bytes": 347005484, | |
| "metrics": { | |
| "loss": 0.4809448199364519, | |
| "ce_loss": 0.47094482947319505, | |
| "lb_loss": 0.999999989370058 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 348000000, | |
| "cumulative_training_bytes": 348000477, | |
| "metrics": { | |
| "loss": 0.4809486189521885, | |
| "ce_loss": 0.47094862848893165, | |
| "lb_loss": 0.999999989364099 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 349000000, | |
| "cumulative_training_bytes": 349000951, | |
| "metrics": { | |
| "loss": 0.48095005379785294, | |
| "ce_loss": 0.4709500633345961, | |
| "lb_loss": 0.9999999893762722 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 350000000, | |
| "cumulative_training_bytes": 350001130, | |
| "metrics": { | |
| "loss": 0.4809361505117281, | |
| "ce_loss": 0.4709361600484713, | |
| "lb_loss": 0.999999989365838 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 351000000, | |
| "cumulative_training_bytes": 351000377, | |
| "metrics": { | |
| "loss": 0.4809302063483628, | |
| "ce_loss": 0.470930215885106, | |
| "lb_loss": 0.999999989306499 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 352000000, | |
| "cumulative_training_bytes": 352004390, | |
| "metrics": { | |
| "loss": 0.48093818933151383, | |
| "ce_loss": 0.470938198868257, | |
| "lb_loss": 0.9999999893145735 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 353000000, | |
| "cumulative_training_bytes": 353003368, | |
| "metrics": { | |
| "loss": 0.48094108521450285, | |
| "ce_loss": 0.470941094751246, | |
| "lb_loss": 0.9999999893214534 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 354000000, | |
| "cumulative_training_bytes": 354002832, | |
| "metrics": { | |
| "loss": 0.4809529716118759, | |
| "ce_loss": 0.47095298114861905, | |
| "lb_loss": 0.9999999893124508 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 355000000, | |
| "cumulative_training_bytes": 355002761, | |
| "metrics": { | |
| "loss": 0.48095117955086697, | |
| "ce_loss": 0.47095118908761013, | |
| "lb_loss": 0.9999999892213132 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 356000000, | |
| "cumulative_training_bytes": 356002307, | |
| "metrics": { | |
| "loss": 0.48095010436089797, | |
| "ce_loss": 0.47095011389764113, | |
| "lb_loss": 0.9999999893053746 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 357000000, | |
| "cumulative_training_bytes": 357002171, | |
| "metrics": { | |
| "loss": 0.48094610834858254, | |
| "ce_loss": 0.4709461178853257, | |
| "lb_loss": 0.9999999892319346 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 358000000, | |
| "cumulative_training_bytes": 358002343, | |
| "metrics": { | |
| "loss": 0.4809467508721634, | |
| "ce_loss": 0.47094676040890654, | |
| "lb_loss": 0.9999999893085618 | |
| } | |
| }, | |
| { | |
| "epoch": 5, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.4809443117192456, | |
| "ce_loss": 0.4709443212559888, | |
| "lb_loss": 0.9999999892863285, | |
| "training_bytes": 71629726 | |
| }, | |
| "cumulative_training_bytes": 358148610, | |
| "training_bytes_this_epoch": 71629726 | |
| } | |
| ] | |
| } |