| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.3020833333333333, | |
| "eval_steps": 250, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0026041666666666665, | |
| "grad_norm": 29.526924338468568, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7869, | |
| "num_input_tokens_seen": 172856, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0026041666666666665, | |
| "loss": 0.8704751133918762, | |
| "loss_ce": 0.5689004063606262, | |
| "loss_iou": 0.435546875, | |
| "loss_num": 0.060302734375, | |
| "loss_xval": 0.30078125, | |
| "num_input_tokens_seen": 172856, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005208333333333333, | |
| "grad_norm": 70.41913440760779, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4046, | |
| "num_input_tokens_seen": 345648, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.005208333333333333, | |
| "loss": 0.36316120624542236, | |
| "loss_ce": 0.12207232415676117, | |
| "loss_iou": 0.52734375, | |
| "loss_num": 0.048095703125, | |
| "loss_xval": 0.2412109375, | |
| "num_input_tokens_seen": 345648, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0078125, | |
| "grad_norm": 27.92164379971255, | |
| "learning_rate": 5e-06, | |
| "loss": 1.9793, | |
| "num_input_tokens_seen": 518228, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0078125, | |
| "loss": 1.6886000633239746, | |
| "loss_ce": 0.06360010802745819, | |
| "loss_iou": 0.98828125, | |
| "loss_num": 0.32421875, | |
| "loss_xval": 1.625, | |
| "num_input_tokens_seen": 518228, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.010416666666666666, | |
| "grad_norm": 8.939505658432642, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3657, | |
| "num_input_tokens_seen": 690760, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010416666666666666, | |
| "loss": 0.330140084028244, | |
| "loss_ce": 0.023987744003534317, | |
| "loss_iou": 0.154296875, | |
| "loss_num": 0.061279296875, | |
| "loss_xval": 0.306640625, | |
| "num_input_tokens_seen": 690760, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013020833333333334, | |
| "grad_norm": 19.28831412717169, | |
| "learning_rate": 5e-06, | |
| "loss": 0.468, | |
| "num_input_tokens_seen": 863320, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.013020833333333334, | |
| "loss": 0.5520405769348145, | |
| "loss_ce": 0.0034565767273306847, | |
| "loss_iou": 0.162109375, | |
| "loss_num": 0.10986328125, | |
| "loss_xval": 0.546875, | |
| "num_input_tokens_seen": 863320, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.015625, | |
| "grad_norm": 10.714818321426886, | |
| "learning_rate": 5e-06, | |
| "loss": 0.457, | |
| "num_input_tokens_seen": 1035776, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.015625, | |
| "loss": 0.5038242340087891, | |
| "loss_ce": 0.0006504527991637588, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.1005859375, | |
| "loss_xval": 0.50390625, | |
| "num_input_tokens_seen": 1035776, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.018229166666666668, | |
| "grad_norm": 21.33070900107311, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3384, | |
| "num_input_tokens_seen": 1208264, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.018229166666666668, | |
| "loss": 0.3143744468688965, | |
| "loss_ce": 0.0038276039995253086, | |
| "loss_iou": 0.25390625, | |
| "loss_num": 0.06201171875, | |
| "loss_xval": 0.310546875, | |
| "num_input_tokens_seen": 1208264, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.020833333333333332, | |
| "grad_norm": 15.620099673180961, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3601, | |
| "num_input_tokens_seen": 1380784, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.020833333333333332, | |
| "loss": 0.37209784984588623, | |
| "loss_ce": 0.0007599706877954304, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.07421875, | |
| "loss_xval": 0.37109375, | |
| "num_input_tokens_seen": 1380784, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0234375, | |
| "grad_norm": 8.787794677847923, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2684, | |
| "num_input_tokens_seen": 1553796, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0234375, | |
| "loss": 0.20013384521007538, | |
| "loss_ce": 0.0006099226884543896, | |
| "loss_iou": 0.427734375, | |
| "loss_num": 0.039794921875, | |
| "loss_xval": 0.19921875, | |
| "num_input_tokens_seen": 1553796, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.026041666666666668, | |
| "grad_norm": 8.085511029078585, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2969, | |
| "num_input_tokens_seen": 1726712, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.026041666666666668, | |
| "loss": 0.2954822778701782, | |
| "loss_ce": 0.0004383414634503424, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.05908203125, | |
| "loss_xval": 0.294921875, | |
| "num_input_tokens_seen": 1726712, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.028645833333333332, | |
| "grad_norm": 19.923996243710985, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3582, | |
| "num_input_tokens_seen": 1898600, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.028645833333333332, | |
| "loss": 0.3439289331436157, | |
| "loss_ce": 0.00030101489392109215, | |
| "loss_iou": 0.166015625, | |
| "loss_num": 0.06884765625, | |
| "loss_xval": 0.34375, | |
| "num_input_tokens_seen": 1898600, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03125, | |
| "grad_norm": 8.414953842541747, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2661, | |
| "num_input_tokens_seen": 2071032, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03125, | |
| "loss": 0.2936800718307495, | |
| "loss_ce": 0.0003450897347647697, | |
| "loss_iou": 0.23828125, | |
| "loss_num": 0.05859375, | |
| "loss_xval": 0.29296875, | |
| "num_input_tokens_seen": 2071032, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.033854166666666664, | |
| "grad_norm": 19.99273085290305, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3361, | |
| "num_input_tokens_seen": 2243868, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.033854166666666664, | |
| "loss": 0.31856128573417664, | |
| "loss_ce": 0.00044603750575333834, | |
| "loss_iou": 0.22265625, | |
| "loss_num": 0.0634765625, | |
| "loss_xval": 0.318359375, | |
| "num_input_tokens_seen": 2243868, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.036458333333333336, | |
| "grad_norm": 8.014341595032883, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2489, | |
| "num_input_tokens_seen": 2415868, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.036458333333333336, | |
| "loss": 0.17592763900756836, | |
| "loss_ce": 0.00026846557739190757, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.03515625, | |
| "loss_xval": 0.17578125, | |
| "num_input_tokens_seen": 2415868, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0390625, | |
| "grad_norm": 14.081056874922753, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2469, | |
| "num_input_tokens_seen": 2588144, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0390625, | |
| "loss": 0.3065241575241089, | |
| "loss_ce": 0.00037182882078923285, | |
| "loss_iou": 0.298828125, | |
| "loss_num": 0.061279296875, | |
| "loss_xval": 0.306640625, | |
| "num_input_tokens_seen": 2588144, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.041666666666666664, | |
| "grad_norm": 6.867314736910267, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3062, | |
| "num_input_tokens_seen": 2760456, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.041666666666666664, | |
| "loss": 0.4008222818374634, | |
| "loss_ce": 0.0002485612640157342, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.080078125, | |
| "loss_xval": 0.400390625, | |
| "num_input_tokens_seen": 2760456, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.044270833333333336, | |
| "grad_norm": 6.841838623253362, | |
| "learning_rate": 5e-06, | |
| "loss": 0.195, | |
| "num_input_tokens_seen": 2933256, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.044270833333333336, | |
| "loss": 0.20536868274211884, | |
| "loss_ce": 0.00022953077859710902, | |
| "loss_iou": 0.53515625, | |
| "loss_num": 0.041015625, | |
| "loss_xval": 0.205078125, | |
| "num_input_tokens_seen": 2933256, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.046875, | |
| "grad_norm": 8.073482751973284, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2924, | |
| "num_input_tokens_seen": 3105724, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.046875, | |
| "loss": 0.2312113493680954, | |
| "loss_ce": 0.000376395124476403, | |
| "loss_iou": 0.26953125, | |
| "loss_num": 0.046142578125, | |
| "loss_xval": 0.23046875, | |
| "num_input_tokens_seen": 3105724, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.049479166666666664, | |
| "grad_norm": 7.523822902492111, | |
| "learning_rate": 5e-06, | |
| "loss": 0.152, | |
| "num_input_tokens_seen": 3278360, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.049479166666666664, | |
| "loss": 0.14852207899093628, | |
| "loss_ce": 0.00020665550255216658, | |
| "loss_iou": 0.45703125, | |
| "loss_num": 0.0296630859375, | |
| "loss_xval": 0.1484375, | |
| "num_input_tokens_seen": 3278360, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.052083333333333336, | |
| "grad_norm": 6.544223523818296, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2287, | |
| "num_input_tokens_seen": 3450936, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.052083333333333336, | |
| "loss": 0.28778478503227234, | |
| "loss_ce": 0.00030919513665139675, | |
| "loss_iou": 0.453125, | |
| "loss_num": 0.0576171875, | |
| "loss_xval": 0.287109375, | |
| "num_input_tokens_seen": 3450936, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0546875, | |
| "grad_norm": 11.63193790977644, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1695, | |
| "num_input_tokens_seen": 3623740, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0546875, | |
| "loss": 0.19103749096393585, | |
| "loss_ce": 0.00018056559201795608, | |
| "loss_iou": 0.306640625, | |
| "loss_num": 0.0380859375, | |
| "loss_xval": 0.1904296875, | |
| "num_input_tokens_seen": 3623740, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.057291666666666664, | |
| "grad_norm": 7.497321698776006, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1598, | |
| "num_input_tokens_seen": 3796836, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.057291666666666664, | |
| "loss": 0.1259785294532776, | |
| "loss_ce": 0.00018507592903915793, | |
| "loss_iou": 0.5078125, | |
| "loss_num": 0.025146484375, | |
| "loss_xval": 0.1259765625, | |
| "num_input_tokens_seen": 3796836, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.059895833333333336, | |
| "grad_norm": 30.78448133351319, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1943, | |
| "num_input_tokens_seen": 3969500, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.059895833333333336, | |
| "loss": 0.15796104073524475, | |
| "loss_ce": 0.0006429227069020271, | |
| "loss_iou": 0.55859375, | |
| "loss_num": 0.031494140625, | |
| "loss_xval": 0.1572265625, | |
| "num_input_tokens_seen": 3969500, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0625, | |
| "grad_norm": 5.237378782764295, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1909, | |
| "num_input_tokens_seen": 4141940, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0625, | |
| "loss": 0.17854130268096924, | |
| "loss_ce": 0.00025760685093700886, | |
| "loss_iou": 0.4296875, | |
| "loss_num": 0.03564453125, | |
| "loss_xval": 0.1787109375, | |
| "num_input_tokens_seen": 4141940, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06510416666666667, | |
| "grad_norm": 13.60608392035419, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1772, | |
| "num_input_tokens_seen": 4314172, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06510416666666667, | |
| "loss": 0.19210708141326904, | |
| "loss_ce": 0.0002125638711731881, | |
| "loss_iou": 0.484375, | |
| "loss_num": 0.038330078125, | |
| "loss_xval": 0.19140625, | |
| "num_input_tokens_seen": 4314172, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06770833333333333, | |
| "grad_norm": 7.390014761942961, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2151, | |
| "num_input_tokens_seen": 4486776, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06770833333333333, | |
| "loss": 0.24654103815555573, | |
| "loss_ce": 0.00044729292858392, | |
| "loss_iou": 0.53125, | |
| "loss_num": 0.04931640625, | |
| "loss_xval": 0.24609375, | |
| "num_input_tokens_seen": 4486776, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0703125, | |
| "grad_norm": 6.597800961680885, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1804, | |
| "num_input_tokens_seen": 4659796, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0703125, | |
| "loss": 0.18685418367385864, | |
| "loss_ce": 0.00020867137936875224, | |
| "loss_iou": 0.546875, | |
| "loss_num": 0.037353515625, | |
| "loss_xval": 0.1865234375, | |
| "num_input_tokens_seen": 4659796, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07291666666666667, | |
| "grad_norm": 15.848602164160235, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1935, | |
| "num_input_tokens_seen": 4832580, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07291666666666667, | |
| "loss": 0.1529167890548706, | |
| "loss_ce": 0.00038992700865492225, | |
| "loss_iou": 0.453125, | |
| "loss_num": 0.030517578125, | |
| "loss_xval": 0.15234375, | |
| "num_input_tokens_seen": 4832580, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07552083333333333, | |
| "grad_norm": 7.656983950050504, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2008, | |
| "num_input_tokens_seen": 5005204, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.07552083333333333, | |
| "loss": 0.26389509439468384, | |
| "loss_ce": 0.00028428525547496974, | |
| "loss_iou": 0.484375, | |
| "loss_num": 0.052734375, | |
| "loss_xval": 0.263671875, | |
| "num_input_tokens_seen": 5005204, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.078125, | |
| "grad_norm": 4.507917280431056, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1874, | |
| "num_input_tokens_seen": 5177580, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.078125, | |
| "loss": 0.13707002997398376, | |
| "loss_ce": 0.0004123126564081758, | |
| "loss_iou": 0.515625, | |
| "loss_num": 0.02734375, | |
| "loss_xval": 0.13671875, | |
| "num_input_tokens_seen": 5177580, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08072916666666667, | |
| "grad_norm": 10.885923079707904, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2153, | |
| "num_input_tokens_seen": 5350148, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08072916666666667, | |
| "loss": 0.2522280216217041, | |
| "loss_ce": 0.0002138598938472569, | |
| "loss_iou": 0.59375, | |
| "loss_num": 0.050537109375, | |
| "loss_xval": 0.251953125, | |
| "num_input_tokens_seen": 5350148, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08333333333333333, | |
| "grad_norm": 5.223864875647863, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2242, | |
| "num_input_tokens_seen": 5522620, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.08333333333333333, | |
| "loss": 0.22422021627426147, | |
| "loss_ce": 0.0002517293323762715, | |
| "loss_iou": 0.69921875, | |
| "loss_num": 0.044677734375, | |
| "loss_xval": 0.2236328125, | |
| "num_input_tokens_seen": 5522620, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0859375, | |
| "grad_norm": 8.823576859140516, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1796, | |
| "num_input_tokens_seen": 5695340, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0859375, | |
| "loss": 0.19266511499881744, | |
| "loss_ce": 0.0003433418460190296, | |
| "loss_iou": 0.6953125, | |
| "loss_num": 0.03857421875, | |
| "loss_xval": 0.1923828125, | |
| "num_input_tokens_seen": 5695340, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.08854166666666667, | |
| "grad_norm": 54.77946225550538, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1289, | |
| "num_input_tokens_seen": 5868252, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.08854166666666667, | |
| "loss": 0.13593435287475586, | |
| "loss_ce": 0.0002532090584281832, | |
| "loss_iou": 0.58984375, | |
| "loss_num": 0.027099609375, | |
| "loss_xval": 0.1357421875, | |
| "num_input_tokens_seen": 5868252, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09114583333333333, | |
| "grad_norm": 10.116131484083123, | |
| "learning_rate": 5e-06, | |
| "loss": 0.132, | |
| "num_input_tokens_seen": 6041036, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09114583333333333, | |
| "loss": 0.15703758597373962, | |
| "loss_ce": 0.0003603329823818058, | |
| "loss_iou": 0.6796875, | |
| "loss_num": 0.03125, | |
| "loss_xval": 0.15625, | |
| "num_input_tokens_seen": 6041036, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09375, | |
| "grad_norm": 3.6841467905553884, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1766, | |
| "num_input_tokens_seen": 6213444, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09375, | |
| "loss": 0.11154159903526306, | |
| "loss_ce": 0.00021347634901758283, | |
| "loss_iou": 0.486328125, | |
| "loss_num": 0.022216796875, | |
| "loss_xval": 0.111328125, | |
| "num_input_tokens_seen": 6213444, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09635416666666667, | |
| "grad_norm": 7.922965723176142, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1496, | |
| "num_input_tokens_seen": 6386028, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.09635416666666667, | |
| "loss": 0.14449915289878845, | |
| "loss_ce": 0.00021204788936302066, | |
| "loss_iou": 0.6796875, | |
| "loss_num": 0.02880859375, | |
| "loss_xval": 0.14453125, | |
| "num_input_tokens_seen": 6386028, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.09895833333333333, | |
| "grad_norm": 5.266919761801281, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1401, | |
| "num_input_tokens_seen": 6558240, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.09895833333333333, | |
| "loss": 0.1739426553249359, | |
| "loss_ce": 0.00029765223735012114, | |
| "loss_iou": 0.7578125, | |
| "loss_num": 0.03466796875, | |
| "loss_xval": 0.173828125, | |
| "num_input_tokens_seen": 6558240, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1015625, | |
| "grad_norm": 7.135945650156497, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1703, | |
| "num_input_tokens_seen": 6731156, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1015625, | |
| "loss": 0.1407906413078308, | |
| "loss_ce": 0.00022666863515041769, | |
| "loss_iou": 0.62890625, | |
| "loss_num": 0.0281982421875, | |
| "loss_xval": 0.140625, | |
| "num_input_tokens_seen": 6731156, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.10416666666666667, | |
| "grad_norm": 14.956590253309306, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1741, | |
| "num_input_tokens_seen": 6903828, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10416666666666667, | |
| "loss": 0.14085114002227783, | |
| "loss_ce": 0.000409250904340297, | |
| "loss_iou": 0.55078125, | |
| "loss_num": 0.028076171875, | |
| "loss_xval": 0.140625, | |
| "num_input_tokens_seen": 6903828, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10677083333333333, | |
| "grad_norm": 6.97548951750633, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1407, | |
| "num_input_tokens_seen": 7076944, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.10677083333333333, | |
| "loss": 0.13742247223854065, | |
| "loss_ce": 0.000459590955870226, | |
| "loss_iou": 0.6484375, | |
| "loss_num": 0.02734375, | |
| "loss_xval": 0.13671875, | |
| "num_input_tokens_seen": 7076944, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.109375, | |
| "grad_norm": 5.706351230194716, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1664, | |
| "num_input_tokens_seen": 7249880, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.109375, | |
| "loss": 0.1694188117980957, | |
| "loss_ce": 0.0002903800050262362, | |
| "loss_iou": 0.71875, | |
| "loss_num": 0.03369140625, | |
| "loss_xval": 0.1689453125, | |
| "num_input_tokens_seen": 7249880, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.11197916666666667, | |
| "grad_norm": 7.30786008091978, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1396, | |
| "num_input_tokens_seen": 7422732, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.11197916666666667, | |
| "loss": 0.12171518802642822, | |
| "loss_ce": 0.0002552264486439526, | |
| "loss_iou": 0.609375, | |
| "loss_num": 0.0242919921875, | |
| "loss_xval": 0.12158203125, | |
| "num_input_tokens_seen": 7422732, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.11458333333333333, | |
| "grad_norm": 10.925715703737882, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1589, | |
| "num_input_tokens_seen": 7595068, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.11458333333333333, | |
| "loss": 0.13566899299621582, | |
| "loss_ce": 0.0002930228365585208, | |
| "loss_iou": 0.58984375, | |
| "loss_num": 0.027099609375, | |
| "loss_xval": 0.1357421875, | |
| "num_input_tokens_seen": 7595068, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1171875, | |
| "grad_norm": 5.054139739954058, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1463, | |
| "num_input_tokens_seen": 7767900, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.1171875, | |
| "loss": 0.12349405884742737, | |
| "loss_ce": 0.00020303628116380423, | |
| "loss_iou": 0.7109375, | |
| "loss_num": 0.024658203125, | |
| "loss_xval": 0.123046875, | |
| "num_input_tokens_seen": 7767900, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.11979166666666667, | |
| "grad_norm": 12.342418471503326, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1424, | |
| "num_input_tokens_seen": 7940544, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.11979166666666667, | |
| "loss": 0.11029690504074097, | |
| "loss_ce": 0.00021999998716637492, | |
| "loss_iou": 0.81640625, | |
| "loss_num": 0.02197265625, | |
| "loss_xval": 0.10986328125, | |
| "num_input_tokens_seen": 7940544, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.12239583333333333, | |
| "grad_norm": 5.062819394898654, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1906, | |
| "num_input_tokens_seen": 8113664, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.12239583333333333, | |
| "loss": 0.1845826804637909, | |
| "loss_ce": 0.0001954784820554778, | |
| "loss_iou": 0.310546875, | |
| "loss_num": 0.036865234375, | |
| "loss_xval": 0.1845703125, | |
| "num_input_tokens_seen": 8113664, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 9.659514849549943, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1428, | |
| "num_input_tokens_seen": 8286408, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "loss": 0.13132745027542114, | |
| "loss_ce": 0.00022393176914192736, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.0262451171875, | |
| "loss_xval": 0.130859375, | |
| "num_input_tokens_seen": 8286408, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.12760416666666666, | |
| "grad_norm": 3.4602191470453296, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1523, | |
| "num_input_tokens_seen": 8459480, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.12760416666666666, | |
| "loss": 0.09740308672189713, | |
| "loss_ce": 0.00011304817599011585, | |
| "loss_iou": 0.734375, | |
| "loss_num": 0.0194091796875, | |
| "loss_xval": 0.09716796875, | |
| "num_input_tokens_seen": 8459480, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.13020833333333334, | |
| "grad_norm": 2.792621267506476, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1739, | |
| "num_input_tokens_seen": 8632048, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13020833333333334, | |
| "loss": 0.20529168844223022, | |
| "loss_ce": 0.00015252322191372514, | |
| "loss_iou": 0.55078125, | |
| "loss_num": 0.041015625, | |
| "loss_xval": 0.205078125, | |
| "num_input_tokens_seen": 8632048, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1328125, | |
| "grad_norm": 112.48651552153446, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1474, | |
| "num_input_tokens_seen": 8804436, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.1328125, | |
| "loss": 0.14565327763557434, | |
| "loss_ce": 0.00020651462546084076, | |
| "loss_iou": 0.796875, | |
| "loss_num": 0.029052734375, | |
| "loss_xval": 0.1455078125, | |
| "num_input_tokens_seen": 8804436, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.13541666666666666, | |
| "grad_norm": 23.381698600452545, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1281, | |
| "num_input_tokens_seen": 8976692, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.13541666666666666, | |
| "loss": 0.07739880681037903, | |
| "loss_ce": 0.0002808899153023958, | |
| "loss_iou": 0.71484375, | |
| "loss_num": 0.01544189453125, | |
| "loss_xval": 0.0771484375, | |
| "num_input_tokens_seen": 8976692, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.13802083333333334, | |
| "grad_norm": 20.24541765865236, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1416, | |
| "num_input_tokens_seen": 9149400, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.13802083333333334, | |
| "loss": 0.09311097115278244, | |
| "loss_ce": 0.00012390354822855443, | |
| "loss_iou": 0.7421875, | |
| "loss_num": 0.0185546875, | |
| "loss_xval": 0.0927734375, | |
| "num_input_tokens_seen": 9149400, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.140625, | |
| "grad_norm": 5.275500097506868, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1424, | |
| "num_input_tokens_seen": 9321876, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.140625, | |
| "loss": 0.11511102318763733, | |
| "loss_ce": 0.00018182306666858494, | |
| "loss_iou": 0.55078125, | |
| "loss_num": 0.02294921875, | |
| "loss_xval": 0.11474609375, | |
| "num_input_tokens_seen": 9321876, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.14322916666666666, | |
| "grad_norm": 6.68044187324112, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1389, | |
| "num_input_tokens_seen": 9494628, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.14322916666666666, | |
| "loss": 0.14306305348873138, | |
| "loss_ce": 0.0001797609293134883, | |
| "loss_iou": 0.6953125, | |
| "loss_num": 0.028564453125, | |
| "loss_xval": 0.142578125, | |
| "num_input_tokens_seen": 9494628, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.14583333333333334, | |
| "grad_norm": 6.008068200145323, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1457, | |
| "num_input_tokens_seen": 9666508, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.14583333333333334, | |
| "loss": 0.10107017308473587, | |
| "loss_ce": 0.00024009394110180438, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0201416015625, | |
| "loss_xval": 0.1005859375, | |
| "num_input_tokens_seen": 9666508, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1484375, | |
| "grad_norm": 5.2880560255216436, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1537, | |
| "num_input_tokens_seen": 9839556, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.1484375, | |
| "loss": 0.12539099156856537, | |
| "loss_ce": 0.003534301184117794, | |
| "loss_iou": 0.55078125, | |
| "loss_num": 0.0244140625, | |
| "loss_xval": 0.1220703125, | |
| "num_input_tokens_seen": 9839556, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.15104166666666666, | |
| "grad_norm": 12.763217046347364, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1706, | |
| "num_input_tokens_seen": 10011988, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.15104166666666666, | |
| "loss": 0.17848367989063263, | |
| "loss_ce": 0.00016946055984590203, | |
| "loss_iou": 0.52734375, | |
| "loss_num": 0.03564453125, | |
| "loss_xval": 0.1787109375, | |
| "num_input_tokens_seen": 10011988, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.15364583333333334, | |
| "grad_norm": 8.269658303130955, | |
| "learning_rate": 5e-06, | |
| "loss": 0.157, | |
| "num_input_tokens_seen": 10184712, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.15364583333333334, | |
| "loss": 0.16671502590179443, | |
| "loss_ce": 0.0010350943775847554, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.033203125, | |
| "loss_xval": 0.166015625, | |
| "num_input_tokens_seen": 10184712, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.15625, | |
| "grad_norm": 10.823127549550875, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1397, | |
| "num_input_tokens_seen": 10357876, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.15625, | |
| "loss": 0.15665964782238007, | |
| "loss_ce": 0.00022653902124147862, | |
| "loss_iou": 0.5390625, | |
| "loss_num": 0.03125, | |
| "loss_xval": 0.15625, | |
| "num_input_tokens_seen": 10357876, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.15885416666666666, | |
| "grad_norm": 6.373677246488681, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1239, | |
| "num_input_tokens_seen": 10530560, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.15885416666666666, | |
| "loss": 0.1182846650481224, | |
| "loss_ce": 0.0001816382718970999, | |
| "loss_iou": 0.78515625, | |
| "loss_num": 0.023681640625, | |
| "loss_xval": 0.1181640625, | |
| "num_input_tokens_seen": 10530560, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.16145833333333334, | |
| "grad_norm": 2.5506045315044688, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1465, | |
| "num_input_tokens_seen": 10702880, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.16145833333333334, | |
| "loss": 0.11390332132577896, | |
| "loss_ce": 0.00019482464995235205, | |
| "loss_iou": 0.890625, | |
| "loss_num": 0.022705078125, | |
| "loss_xval": 0.11376953125, | |
| "num_input_tokens_seen": 10702880, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1640625, | |
| "grad_norm": 7.222980659687508, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1252, | |
| "num_input_tokens_seen": 10875396, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.1640625, | |
| "loss": 0.14197739958763123, | |
| "loss_ce": 0.00019273148791398853, | |
| "loss_iou": 0.65234375, | |
| "loss_num": 0.0283203125, | |
| "loss_xval": 0.1416015625, | |
| "num_input_tokens_seen": 10875396, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 5.146091397448424, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1305, | |
| "num_input_tokens_seen": 11047776, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "loss": 0.14921408891677856, | |
| "loss_ce": 0.00013572629541158676, | |
| "loss_iou": 0.72265625, | |
| "loss_num": 0.02978515625, | |
| "loss_xval": 0.1494140625, | |
| "num_input_tokens_seen": 11047776, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.16927083333333334, | |
| "grad_norm": 4.80224094246898, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1154, | |
| "num_input_tokens_seen": 11220188, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.16927083333333334, | |
| "loss": 0.07668769359588623, | |
| "loss_ce": 0.001126162358559668, | |
| "loss_iou": 0.91015625, | |
| "loss_num": 0.01507568359375, | |
| "loss_xval": 0.07568359375, | |
| "num_input_tokens_seen": 11220188, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.171875, | |
| "grad_norm": 10.700514293201024, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1739, | |
| "num_input_tokens_seen": 11392944, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.171875, | |
| "loss": 0.22246834635734558, | |
| "loss_ce": 0.00011727018863894045, | |
| "loss_iou": 0.75390625, | |
| "loss_num": 0.04443359375, | |
| "loss_xval": 0.22265625, | |
| "num_input_tokens_seen": 11392944, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.17447916666666666, | |
| "grad_norm": 9.514503857806982, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2101, | |
| "num_input_tokens_seen": 11565084, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.17447916666666666, | |
| "loss": 0.13935251533985138, | |
| "loss_ce": 0.0004364975611679256, | |
| "loss_iou": 0.58203125, | |
| "loss_num": 0.02783203125, | |
| "loss_xval": 0.138671875, | |
| "num_input_tokens_seen": 11565084, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.17708333333333334, | |
| "grad_norm": 28.845888384168894, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1395, | |
| "num_input_tokens_seen": 11737388, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.17708333333333334, | |
| "loss": 0.17835211753845215, | |
| "loss_ce": 0.00019049833645112813, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.03564453125, | |
| "loss_xval": 0.177734375, | |
| "num_input_tokens_seen": 11737388, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1796875, | |
| "grad_norm": 12.901299207431718, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1475, | |
| "num_input_tokens_seen": 11910160, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.1796875, | |
| "loss": 0.14130395650863647, | |
| "loss_ce": 0.00040430587250739336, | |
| "loss_iou": 0.71875, | |
| "loss_num": 0.028076171875, | |
| "loss_xval": 0.140625, | |
| "num_input_tokens_seen": 11910160, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.18229166666666666, | |
| "grad_norm": 4.066104418883702, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1535, | |
| "num_input_tokens_seen": 12083060, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.18229166666666666, | |
| "loss": 0.22210073471069336, | |
| "loss_ce": 0.00048208353109657764, | |
| "loss_iou": 0.52734375, | |
| "loss_num": 0.044189453125, | |
| "loss_xval": 0.2216796875, | |
| "num_input_tokens_seen": 12083060, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.18489583333333334, | |
| "grad_norm": 7.20629091266797, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1526, | |
| "num_input_tokens_seen": 12255100, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.18489583333333334, | |
| "loss": 0.10638897120952606, | |
| "loss_ce": 0.00015728682046756148, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.021240234375, | |
| "loss_xval": 0.1064453125, | |
| "num_input_tokens_seen": 12255100, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1875, | |
| "grad_norm": 5.6974371825888515, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1194, | |
| "num_input_tokens_seen": 12428188, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1875, | |
| "loss": 0.17051713168621063, | |
| "loss_ce": 0.0001069810678018257, | |
| "loss_iou": 0.77734375, | |
| "loss_num": 0.0341796875, | |
| "loss_xval": 0.169921875, | |
| "num_input_tokens_seen": 12428188, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.19010416666666666, | |
| "grad_norm": 9.689078279769502, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1445, | |
| "num_input_tokens_seen": 12601004, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.19010416666666666, | |
| "loss": 0.1433650702238083, | |
| "loss_ce": 0.00020711585239041597, | |
| "loss_iou": 0.7578125, | |
| "loss_num": 0.0286865234375, | |
| "loss_xval": 0.1435546875, | |
| "num_input_tokens_seen": 12601004, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.19270833333333334, | |
| "grad_norm": 5.827672891178693, | |
| "learning_rate": 5e-06, | |
| "loss": 0.134, | |
| "num_input_tokens_seen": 12773320, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.19270833333333334, | |
| "loss": 0.11652399599552155, | |
| "loss_ce": 9.94274887489155e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0233154296875, | |
| "loss_xval": 0.1162109375, | |
| "num_input_tokens_seen": 12773320, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1953125, | |
| "grad_norm": 3.990167602163436, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1368, | |
| "num_input_tokens_seen": 12945852, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.1953125, | |
| "loss": 0.14618420600891113, | |
| "loss_ce": 0.0001270811044378206, | |
| "loss_iou": 0.59765625, | |
| "loss_num": 0.0291748046875, | |
| "loss_xval": 0.146484375, | |
| "num_input_tokens_seen": 12945852, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.19791666666666666, | |
| "grad_norm": 13.270667333466802, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1563, | |
| "num_input_tokens_seen": 13118484, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.19791666666666666, | |
| "loss": 0.15230345726013184, | |
| "loss_ce": 0.00026487442664802074, | |
| "loss_iou": 0.76953125, | |
| "loss_num": 0.0303955078125, | |
| "loss_xval": 0.15234375, | |
| "num_input_tokens_seen": 13118484, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.20052083333333334, | |
| "grad_norm": 7.7363268532740745, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1283, | |
| "num_input_tokens_seen": 13291272, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.20052083333333334, | |
| "loss": 0.14224107563495636, | |
| "loss_ce": 5.9679325204342604e-05, | |
| "loss_iou": 0.6796875, | |
| "loss_num": 0.0284423828125, | |
| "loss_xval": 0.142578125, | |
| "num_input_tokens_seen": 13291272, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.203125, | |
| "grad_norm": 6.61313432355891, | |
| "learning_rate": 5e-06, | |
| "loss": 0.124, | |
| "num_input_tokens_seen": 13464624, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.203125, | |
| "loss": 0.1373731642961502, | |
| "loss_ce": 0.001081653987057507, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.0272216796875, | |
| "loss_xval": 0.13671875, | |
| "num_input_tokens_seen": 13464624, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.20572916666666666, | |
| "grad_norm": 8.581672711095537, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1277, | |
| "num_input_tokens_seen": 13637932, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.20572916666666666, | |
| "loss": 0.07391411066055298, | |
| "loss_ce": 0.0002141633303835988, | |
| "loss_iou": 0.921875, | |
| "loss_num": 0.0147705078125, | |
| "loss_xval": 0.07373046875, | |
| "num_input_tokens_seen": 13637932, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.20833333333333334, | |
| "grad_norm": 9.343129724950805, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1228, | |
| "num_input_tokens_seen": 13810536, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.20833333333333334, | |
| "loss": 0.1317322701215744, | |
| "loss_ce": 0.00010995224147336558, | |
| "loss_iou": 0.5625, | |
| "loss_num": 0.0263671875, | |
| "loss_xval": 0.1318359375, | |
| "num_input_tokens_seen": 13810536, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2109375, | |
| "grad_norm": 3.335680846026802, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1278, | |
| "num_input_tokens_seen": 13982952, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2109375, | |
| "loss": 0.11719675362110138, | |
| "loss_ce": 7.029056723695248e-05, | |
| "loss_iou": 0.72265625, | |
| "loss_num": 0.0234375, | |
| "loss_xval": 0.1171875, | |
| "num_input_tokens_seen": 13982952, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.21354166666666666, | |
| "grad_norm": 5.015919335288412, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1141, | |
| "num_input_tokens_seen": 14156116, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.21354166666666666, | |
| "loss": 0.12401551753282547, | |
| "loss_ce": 0.0005414030747488141, | |
| "loss_iou": 0.8125, | |
| "loss_num": 0.024658203125, | |
| "loss_xval": 0.12353515625, | |
| "num_input_tokens_seen": 14156116, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.21614583333333334, | |
| "grad_norm": 2.9623089480232765, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1462, | |
| "num_input_tokens_seen": 14328100, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.21614583333333334, | |
| "loss": 0.13083317875862122, | |
| "loss_ce": 6.534742715302855e-05, | |
| "loss_iou": 0.84765625, | |
| "loss_num": 0.026123046875, | |
| "loss_xval": 0.130859375, | |
| "num_input_tokens_seen": 14328100, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.21875, | |
| "grad_norm": 10.594036737745725, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1571, | |
| "num_input_tokens_seen": 14501028, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.21875, | |
| "loss": 0.12298892438411713, | |
| "loss_ce": 9.464097092859447e-05, | |
| "loss_iou": 0.765625, | |
| "loss_num": 0.0245361328125, | |
| "loss_xval": 0.123046875, | |
| "num_input_tokens_seen": 14501028, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.22135416666666666, | |
| "grad_norm": 15.787971676382128, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1346, | |
| "num_input_tokens_seen": 14673688, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.22135416666666666, | |
| "loss": 0.1362449675798416, | |
| "loss_ce": 0.00013656073133461177, | |
| "loss_iou": 0.77734375, | |
| "loss_num": 0.0272216796875, | |
| "loss_xval": 0.1357421875, | |
| "num_input_tokens_seen": 14673688, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.22395833333333334, | |
| "grad_norm": 17.628757977108236, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1309, | |
| "num_input_tokens_seen": 14846388, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.22395833333333334, | |
| "loss": 0.2271496057510376, | |
| "loss_ce": 0.00012933027755934745, | |
| "loss_iou": 0.6875, | |
| "loss_num": 0.04541015625, | |
| "loss_xval": 0.2265625, | |
| "num_input_tokens_seen": 14846388, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2265625, | |
| "grad_norm": 4.200455159585171, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1237, | |
| "num_input_tokens_seen": 15019332, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.2265625, | |
| "loss": 0.1411372572183609, | |
| "loss_ce": 0.000146055273944512, | |
| "loss_iou": 0.7109375, | |
| "loss_num": 0.0281982421875, | |
| "loss_xval": 0.140625, | |
| "num_input_tokens_seen": 15019332, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.22916666666666666, | |
| "grad_norm": 16.128679810445924, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1205, | |
| "num_input_tokens_seen": 15191728, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.22916666666666666, | |
| "loss": 0.12598924338817596, | |
| "loss_ce": 0.0001957894128281623, | |
| "loss_iou": 0.7265625, | |
| "loss_num": 0.025146484375, | |
| "loss_xval": 0.1259765625, | |
| "num_input_tokens_seen": 15191728, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.23177083333333334, | |
| "grad_norm": 3.347768447216801, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1091, | |
| "num_input_tokens_seen": 15364328, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.23177083333333334, | |
| "loss": 0.13717257976531982, | |
| "loss_ce": 0.00014865108823869377, | |
| "loss_iou": 0.671875, | |
| "loss_num": 0.0274658203125, | |
| "loss_xval": 0.13671875, | |
| "num_input_tokens_seen": 15364328, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.234375, | |
| "grad_norm": 14.428792014632464, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1169, | |
| "num_input_tokens_seen": 15537008, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.234375, | |
| "loss": 0.09786561131477356, | |
| "loss_ce": 8.728736429475248e-05, | |
| "loss_iou": 0.8984375, | |
| "loss_num": 0.01953125, | |
| "loss_xval": 0.09765625, | |
| "num_input_tokens_seen": 15537008, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.23697916666666666, | |
| "grad_norm": 9.593480834109474, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1477, | |
| "num_input_tokens_seen": 15709404, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.23697916666666666, | |
| "loss": 0.13464778661727905, | |
| "loss_ce": 0.0004925199900753796, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.02685546875, | |
| "loss_xval": 0.1337890625, | |
| "num_input_tokens_seen": 15709404, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.23958333333333334, | |
| "grad_norm": 5.187519307559665, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1256, | |
| "num_input_tokens_seen": 15882256, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.23958333333333334, | |
| "loss": 0.14495471119880676, | |
| "loss_ce": 0.00020984606817364693, | |
| "loss_iou": 0.59375, | |
| "loss_num": 0.0289306640625, | |
| "loss_xval": 0.14453125, | |
| "num_input_tokens_seen": 15882256, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2421875, | |
| "grad_norm": 3.797220087224051, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0953, | |
| "num_input_tokens_seen": 16055680, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.2421875, | |
| "loss": 0.11551543325185776, | |
| "loss_ce": 0.00012846880417782813, | |
| "loss_iou": 0.84765625, | |
| "loss_num": 0.0230712890625, | |
| "loss_xval": 0.115234375, | |
| "num_input_tokens_seen": 16055680, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.24479166666666666, | |
| "grad_norm": 12.640483904974769, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1633, | |
| "num_input_tokens_seen": 16228580, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.24479166666666666, | |
| "loss": 0.22419461607933044, | |
| "loss_ce": 7.351529347943142e-05, | |
| "loss_iou": 0.57421875, | |
| "loss_num": 0.044921875, | |
| "loss_xval": 0.224609375, | |
| "num_input_tokens_seen": 16228580, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.24739583333333334, | |
| "grad_norm": 5.298357496019875, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1399, | |
| "num_input_tokens_seen": 16401760, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.24739583333333334, | |
| "loss": 0.13041989505290985, | |
| "loss_ce": 0.0002624165790621191, | |
| "loss_iou": 0.86328125, | |
| "loss_num": 0.0260009765625, | |
| "loss_xval": 0.1298828125, | |
| "num_input_tokens_seen": 16401760, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 9.952010853168657, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1295, | |
| "num_input_tokens_seen": 16574496, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "loss": 0.1939561665058136, | |
| "loss_ce": 7.798791921231896e-05, | |
| "loss_iou": 0.765625, | |
| "loss_num": 0.038818359375, | |
| "loss_xval": 0.1943359375, | |
| "num_input_tokens_seen": 16574496, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2526041666666667, | |
| "grad_norm": 3.049823911893114, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1111, | |
| "num_input_tokens_seen": 16747728, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2526041666666667, | |
| "loss": 0.09351100027561188, | |
| "loss_ce": 6.617652252316475e-05, | |
| "loss_iou": 0.65625, | |
| "loss_num": 0.0186767578125, | |
| "loss_xval": 0.09326171875, | |
| "num_input_tokens_seen": 16747728, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2552083333333333, | |
| "grad_norm": 16.778434870585635, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1019, | |
| "num_input_tokens_seen": 16920748, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2552083333333333, | |
| "loss": 0.06255725026130676, | |
| "loss_ce": 0.00036242493661120534, | |
| "loss_iou": 0.796875, | |
| "loss_num": 0.012451171875, | |
| "loss_xval": 0.062255859375, | |
| "num_input_tokens_seen": 16920748, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2578125, | |
| "grad_norm": 3.1538744569690427, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0905, | |
| "num_input_tokens_seen": 17093788, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.2578125, | |
| "loss": 0.07921752333641052, | |
| "loss_ce": 5.4928314057178795e-05, | |
| "loss_iou": 0.75390625, | |
| "loss_num": 0.015869140625, | |
| "loss_xval": 0.0791015625, | |
| "num_input_tokens_seen": 17093788, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.2604166666666667, | |
| "grad_norm": 5.0492536766824925, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1333, | |
| "num_input_tokens_seen": 17266068, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2604166666666667, | |
| "loss": 0.2560691237449646, | |
| "loss_ce": 7.242064020829275e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.05126953125, | |
| "loss_xval": 0.255859375, | |
| "num_input_tokens_seen": 17266068, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2630208333333333, | |
| "grad_norm": 3.206773313570764, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1103, | |
| "num_input_tokens_seen": 17438252, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2630208333333333, | |
| "loss": 0.10068385303020477, | |
| "loss_ce": 6.739624950569123e-05, | |
| "loss_iou": 0.66015625, | |
| "loss_num": 0.0201416015625, | |
| "loss_xval": 0.1005859375, | |
| "num_input_tokens_seen": 17438252, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.265625, | |
| "grad_norm": 4.023617188811506, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1235, | |
| "num_input_tokens_seen": 17611072, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.265625, | |
| "loss": 0.17407888174057007, | |
| "loss_ce": 0.0001286847109440714, | |
| "loss_iou": 0.8125, | |
| "loss_num": 0.03466796875, | |
| "loss_xval": 0.173828125, | |
| "num_input_tokens_seen": 17611072, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2682291666666667, | |
| "grad_norm": 4.68999361675466, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1292, | |
| "num_input_tokens_seen": 17784276, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.2682291666666667, | |
| "loss": 0.14088758826255798, | |
| "loss_ce": 0.00014050997560843825, | |
| "loss_iou": 0.7578125, | |
| "loss_num": 0.028076171875, | |
| "loss_xval": 0.140625, | |
| "num_input_tokens_seen": 17784276, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.2708333333333333, | |
| "grad_norm": 3.670856513287625, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1135, | |
| "num_input_tokens_seen": 17956592, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2708333333333333, | |
| "loss": 0.1348290890455246, | |
| "loss_ce": 0.00012450128269847482, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0269775390625, | |
| "loss_xval": 0.134765625, | |
| "num_input_tokens_seen": 17956592, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2734375, | |
| "grad_norm": 7.988958032566027, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1302, | |
| "num_input_tokens_seen": 18129536, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2734375, | |
| "loss": 0.06641676276922226, | |
| "loss_ce": 0.0001936157641466707, | |
| "loss_iou": 0.58203125, | |
| "loss_num": 0.01324462890625, | |
| "loss_xval": 0.06640625, | |
| "num_input_tokens_seen": 18129536, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2760416666666667, | |
| "grad_norm": 6.167808656422766, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0922, | |
| "num_input_tokens_seen": 18302572, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2760416666666667, | |
| "loss": 0.0900074690580368, | |
| "loss_ce": 0.00028579036006703973, | |
| "loss_iou": 0.8828125, | |
| "loss_num": 0.0179443359375, | |
| "loss_xval": 0.08984375, | |
| "num_input_tokens_seen": 18302572, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2786458333333333, | |
| "grad_norm": 15.444621610105179, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1253, | |
| "num_input_tokens_seen": 18474752, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2786458333333333, | |
| "loss": 0.08840958774089813, | |
| "loss_ce": 0.0001527518907096237, | |
| "loss_iou": 0.734375, | |
| "loss_num": 0.0177001953125, | |
| "loss_xval": 0.08837890625, | |
| "num_input_tokens_seen": 18474752, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.28125, | |
| "grad_norm": 9.197225254514525, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1234, | |
| "num_input_tokens_seen": 18647420, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.28125, | |
| "loss": 0.1322825402021408, | |
| "loss_ce": 8.039205567911267e-05, | |
| "loss_iou": 0.81640625, | |
| "loss_num": 0.0264892578125, | |
| "loss_xval": 0.1318359375, | |
| "num_input_tokens_seen": 18647420, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2838541666666667, | |
| "grad_norm": 4.014816953952452, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1298, | |
| "num_input_tokens_seen": 18820060, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2838541666666667, | |
| "loss": 0.18935684859752655, | |
| "loss_ce": 8.681887993589044e-05, | |
| "loss_iou": 0.703125, | |
| "loss_num": 0.037841796875, | |
| "loss_xval": 0.189453125, | |
| "num_input_tokens_seen": 18820060, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2864583333333333, | |
| "grad_norm": 5.301291477011863, | |
| "learning_rate": 5e-06, | |
| "loss": 0.123, | |
| "num_input_tokens_seen": 18992164, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2864583333333333, | |
| "loss": 0.1676991879940033, | |
| "loss_ce": 9.665168181527406e-05, | |
| "loss_iou": 0.671875, | |
| "loss_num": 0.03369140625, | |
| "loss_xval": 0.16796875, | |
| "num_input_tokens_seen": 18992164, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2890625, | |
| "grad_norm": 3.7618362724585865, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0973, | |
| "num_input_tokens_seen": 19164016, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2890625, | |
| "loss": 0.05550282821059227, | |
| "loss_ce": 5.23876296938397e-05, | |
| "loss_iou": 0.953125, | |
| "loss_num": 0.0111083984375, | |
| "loss_xval": 0.055419921875, | |
| "num_input_tokens_seen": 19164016, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2916666666666667, | |
| "grad_norm": 6.877157018975216, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1429, | |
| "num_input_tokens_seen": 19336416, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2916666666666667, | |
| "loss": 0.21898075938224792, | |
| "loss_ce": 0.00023076393699739128, | |
| "loss_iou": 0.63671875, | |
| "loss_num": 0.043701171875, | |
| "loss_xval": 0.21875, | |
| "num_input_tokens_seen": 19336416, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2942708333333333, | |
| "grad_norm": 8.699267895879803, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1221, | |
| "num_input_tokens_seen": 19508784, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.2942708333333333, | |
| "loss": 0.11330369114875793, | |
| "loss_ce": 0.00014450862363446504, | |
| "loss_iou": 0.703125, | |
| "loss_num": 0.0225830078125, | |
| "loss_xval": 0.11328125, | |
| "num_input_tokens_seen": 19508784, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.296875, | |
| "grad_norm": 6.679175716055245, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1118, | |
| "num_input_tokens_seen": 19681104, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.296875, | |
| "loss": 0.09517869353294373, | |
| "loss_ce": 8.592366793891415e-05, | |
| "loss_iou": 0.77734375, | |
| "loss_num": 0.01904296875, | |
| "loss_xval": 0.09521484375, | |
| "num_input_tokens_seen": 19681104, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2994791666666667, | |
| "grad_norm": 18.55418733227958, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1435, | |
| "num_input_tokens_seen": 19853176, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2994791666666667, | |
| "loss": 0.1039402186870575, | |
| "loss_ce": 5.8387617173139006e-05, | |
| "loss_iou": 0.72265625, | |
| "loss_num": 0.020751953125, | |
| "loss_xval": 0.10400390625, | |
| "num_input_tokens_seen": 19853176, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3020833333333333, | |
| "grad_norm": 4.232168331373671, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1276, | |
| "num_input_tokens_seen": 20025704, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3020833333333333, | |
| "loss": 0.08785620331764221, | |
| "loss_ce": 8.76499543664977e-05, | |
| "loss_iou": 0.69140625, | |
| "loss_num": 0.017578125, | |
| "loss_xval": 0.087890625, | |
| "num_input_tokens_seen": 20025704, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3046875, | |
| "grad_norm": 6.847887859581621, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1147, | |
| "num_input_tokens_seen": 20198488, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3046875, | |
| "loss": 0.1606612354516983, | |
| "loss_ce": 7.774594996590167e-05, | |
| "loss_iou": 0.859375, | |
| "loss_num": 0.031982421875, | |
| "loss_xval": 0.16015625, | |
| "num_input_tokens_seen": 20198488, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3072916666666667, | |
| "grad_norm": 4.391317523713796, | |
| "learning_rate": 5e-06, | |
| "loss": 0.12, | |
| "num_input_tokens_seen": 20371684, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3072916666666667, | |
| "loss": 0.10194739699363708, | |
| "loss_ce": 0.00014075401122681797, | |
| "loss_iou": 0.68359375, | |
| "loss_num": 0.0203857421875, | |
| "loss_xval": 0.1015625, | |
| "num_input_tokens_seen": 20371684, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3098958333333333, | |
| "grad_norm": 8.958657986306372, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1174, | |
| "num_input_tokens_seen": 20544172, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3098958333333333, | |
| "loss": 0.1237276941537857, | |
| "loss_ce": 0.0004366845532786101, | |
| "loss_iou": 0.71484375, | |
| "loss_num": 0.024658203125, | |
| "loss_xval": 0.123046875, | |
| "num_input_tokens_seen": 20544172, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3125, | |
| "grad_norm": 3.5170839929817417, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1268, | |
| "num_input_tokens_seen": 20717160, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3125, | |
| "loss": 0.15593719482421875, | |
| "loss_ce": 0.00017548247706145048, | |
| "loss_iou": 0.890625, | |
| "loss_num": 0.0311279296875, | |
| "loss_xval": 0.15625, | |
| "num_input_tokens_seen": 20717160, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3151041666666667, | |
| "grad_norm": 6.739906995090889, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1242, | |
| "num_input_tokens_seen": 20890032, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3151041666666667, | |
| "loss": 0.1494368314743042, | |
| "loss_ce": 0.0012434859527274966, | |
| "loss_iou": 0.671875, | |
| "loss_num": 0.0296630859375, | |
| "loss_xval": 0.1484375, | |
| "num_input_tokens_seen": 20890032, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3177083333333333, | |
| "grad_norm": 6.127165000561302, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1151, | |
| "num_input_tokens_seen": 21062984, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3177083333333333, | |
| "loss": 0.09486885368824005, | |
| "loss_ce": 0.0001422952045686543, | |
| "loss_iou": 0.80859375, | |
| "loss_num": 0.0189208984375, | |
| "loss_xval": 0.0947265625, | |
| "num_input_tokens_seen": 21062984, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3203125, | |
| "grad_norm": 8.718508748737245, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1031, | |
| "num_input_tokens_seen": 21235792, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3203125, | |
| "loss": 0.11195512861013412, | |
| "loss_ce": 0.0001387260272167623, | |
| "loss_iou": 0.49609375, | |
| "loss_num": 0.0223388671875, | |
| "loss_xval": 0.11181640625, | |
| "num_input_tokens_seen": 21235792, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3229166666666667, | |
| "grad_norm": 13.341861393347486, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1039, | |
| "num_input_tokens_seen": 21407888, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3229166666666667, | |
| "loss": 0.11872819066047668, | |
| "loss_ce": 0.0001673972437856719, | |
| "loss_iou": 0.703125, | |
| "loss_num": 0.023681640625, | |
| "loss_xval": 0.11865234375, | |
| "num_input_tokens_seen": 21407888, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3255208333333333, | |
| "grad_norm": 10.748431502763593, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1201, | |
| "num_input_tokens_seen": 21580252, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3255208333333333, | |
| "loss": 0.14679506421089172, | |
| "loss_ce": 6.655443576164544e-05, | |
| "loss_iou": 0.373046875, | |
| "loss_num": 0.029296875, | |
| "loss_xval": 0.146484375, | |
| "num_input_tokens_seen": 21580252, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.328125, | |
| "grad_norm": 9.981967396091962, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1147, | |
| "num_input_tokens_seen": 21753052, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.328125, | |
| "loss": 0.09238539636135101, | |
| "loss_ce": 0.00010023377399193123, | |
| "loss_iou": 0.6328125, | |
| "loss_num": 0.0184326171875, | |
| "loss_xval": 0.09228515625, | |
| "num_input_tokens_seen": 21753052, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3307291666666667, | |
| "grad_norm": 8.119992313803278, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1072, | |
| "num_input_tokens_seen": 21925632, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.3307291666666667, | |
| "loss": 0.07983443140983582, | |
| "loss_ce": 9.200449858326465e-05, | |
| "loss_iou": 0.8046875, | |
| "loss_num": 0.0159912109375, | |
| "loss_xval": 0.07958984375, | |
| "num_input_tokens_seen": 21925632, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 6.9850353772680105, | |
| "learning_rate": 5e-06, | |
| "loss": 0.125, | |
| "num_input_tokens_seen": 22098616, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "loss": 0.12042608857154846, | |
| "loss_ce": 0.0001257982075912878, | |
| "loss_iou": 0.65234375, | |
| "loss_num": 0.0240478515625, | |
| "loss_xval": 0.1201171875, | |
| "num_input_tokens_seen": 22098616, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3359375, | |
| "grad_norm": 3.3562574299779073, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0891, | |
| "num_input_tokens_seen": 22270980, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3359375, | |
| "loss": 0.07171538472175598, | |
| "loss_ce": 0.0001516598858870566, | |
| "loss_iou": 0.9609375, | |
| "loss_num": 0.01434326171875, | |
| "loss_xval": 0.07177734375, | |
| "num_input_tokens_seen": 22270980, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3385416666666667, | |
| "grad_norm": 2.474071432452823, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0986, | |
| "num_input_tokens_seen": 22443752, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3385416666666667, | |
| "loss": 0.071955606341362, | |
| "loss_ce": 0.00020878079521935433, | |
| "loss_iou": 0.7890625, | |
| "loss_num": 0.01434326171875, | |
| "loss_xval": 0.07177734375, | |
| "num_input_tokens_seen": 22443752, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3411458333333333, | |
| "grad_norm": 4.769496774720465, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0827, | |
| "num_input_tokens_seen": 22616684, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.3411458333333333, | |
| "loss": 0.08630406856536865, | |
| "loss_ce": 0.00021397518867161125, | |
| "loss_iou": 0.68359375, | |
| "loss_num": 0.0172119140625, | |
| "loss_xval": 0.0859375, | |
| "num_input_tokens_seen": 22616684, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.34375, | |
| "grad_norm": 14.025079611665472, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0834, | |
| "num_input_tokens_seen": 22789044, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.34375, | |
| "loss": 0.10616521537303925, | |
| "loss_ce": 0.00011663565237540752, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.021240234375, | |
| "loss_xval": 0.10595703125, | |
| "num_input_tokens_seen": 22789044, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.3463541666666667, | |
| "grad_norm": 3.9669475156886946, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1167, | |
| "num_input_tokens_seen": 22962080, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3463541666666667, | |
| "loss": 0.11822222173213959, | |
| "loss_ce": 0.00014970809570513666, | |
| "loss_iou": 0.7109375, | |
| "loss_num": 0.023681640625, | |
| "loss_xval": 0.1181640625, | |
| "num_input_tokens_seen": 22962080, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3489583333333333, | |
| "grad_norm": 6.482768707064352, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1163, | |
| "num_input_tokens_seen": 23135120, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3489583333333333, | |
| "loss": 0.09390418976545334, | |
| "loss_ce": 0.00012366939336061478, | |
| "loss_iou": 0.8125, | |
| "loss_num": 0.018798828125, | |
| "loss_xval": 0.09375, | |
| "num_input_tokens_seen": 23135120, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3515625, | |
| "grad_norm": 4.75477602454939, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1443, | |
| "num_input_tokens_seen": 23308372, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3515625, | |
| "loss": 0.1679096817970276, | |
| "loss_ce": 6.299919914454222e-05, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.033447265625, | |
| "loss_xval": 0.16796875, | |
| "num_input_tokens_seen": 23308372, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3541666666666667, | |
| "grad_norm": 2.514900037834426, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1001, | |
| "num_input_tokens_seen": 23480360, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3541666666666667, | |
| "loss": 0.10733547061681747, | |
| "loss_ce": 6.619012128794566e-05, | |
| "loss_iou": 0.6875, | |
| "loss_num": 0.021484375, | |
| "loss_xval": 0.107421875, | |
| "num_input_tokens_seen": 23480360, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3567708333333333, | |
| "grad_norm": 4.934909863394261, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1083, | |
| "num_input_tokens_seen": 23653652, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3567708333333333, | |
| "loss": 0.11394178867340088, | |
| "loss_ce": 8.070516923908144e-05, | |
| "loss_iou": 0.8125, | |
| "loss_num": 0.0228271484375, | |
| "loss_xval": 0.11376953125, | |
| "num_input_tokens_seen": 23653652, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.359375, | |
| "grad_norm": 3.707663252931766, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0869, | |
| "num_input_tokens_seen": 23826220, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.359375, | |
| "loss": 0.08403357863426208, | |
| "loss_ce": 0.0001407563831890002, | |
| "loss_iou": 0.8046875, | |
| "loss_num": 0.0167236328125, | |
| "loss_xval": 0.083984375, | |
| "num_input_tokens_seen": 23826220, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3619791666666667, | |
| "grad_norm": 5.810148215517029, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0944, | |
| "num_input_tokens_seen": 23998808, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.3619791666666667, | |
| "loss": 0.15188120305538177, | |
| "loss_ce": 8.676404104335234e-05, | |
| "loss_iou": 0.66796875, | |
| "loss_num": 0.0303955078125, | |
| "loss_xval": 0.1513671875, | |
| "num_input_tokens_seen": 23998808, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.3645833333333333, | |
| "grad_norm": 5.097709919840357, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1118, | |
| "num_input_tokens_seen": 24171244, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3645833333333333, | |
| "loss": 0.0743027776479721, | |
| "loss_ce": 8.402515959460288e-05, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.01483154296875, | |
| "loss_xval": 0.07421875, | |
| "num_input_tokens_seen": 24171244, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3671875, | |
| "grad_norm": 29.485026694205214, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1345, | |
| "num_input_tokens_seen": 24343728, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3671875, | |
| "loss": 0.20851582288742065, | |
| "loss_ce": 0.00012653246812988073, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.041748046875, | |
| "loss_xval": 0.2080078125, | |
| "num_input_tokens_seen": 24343728, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3697916666666667, | |
| "grad_norm": 15.306197535117493, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1169, | |
| "num_input_tokens_seen": 24516776, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3697916666666667, | |
| "loss": 0.06858328729867935, | |
| "loss_ce": 7.132141035981476e-05, | |
| "loss_iou": 0.6328125, | |
| "loss_num": 0.013671875, | |
| "loss_xval": 0.068359375, | |
| "num_input_tokens_seen": 24516776, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3723958333333333, | |
| "grad_norm": 7.8570075555495205, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0911, | |
| "num_input_tokens_seen": 24689788, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3723958333333333, | |
| "loss": 0.10039569437503815, | |
| "loss_ce": 8.441291720373556e-05, | |
| "loss_iou": 0.443359375, | |
| "loss_num": 0.02001953125, | |
| "loss_xval": 0.10009765625, | |
| "num_input_tokens_seen": 24689788, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 7.50330036811974, | |
| "learning_rate": 5e-06, | |
| "loss": 0.125, | |
| "num_input_tokens_seen": 24862452, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "loss": 0.06121515482664108, | |
| "loss_ce": 5.792636875412427e-05, | |
| "loss_iou": 0.6875, | |
| "loss_num": 0.01226806640625, | |
| "loss_xval": 0.06103515625, | |
| "num_input_tokens_seen": 24862452, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3776041666666667, | |
| "grad_norm": 9.259685096230124, | |
| "learning_rate": 5e-06, | |
| "loss": 0.115, | |
| "num_input_tokens_seen": 25035336, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3776041666666667, | |
| "loss": 0.09985796362161636, | |
| "loss_ce": 0.0002180726150982082, | |
| "loss_iou": 0.79296875, | |
| "loss_num": 0.0198974609375, | |
| "loss_xval": 0.099609375, | |
| "num_input_tokens_seen": 25035336, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3802083333333333, | |
| "grad_norm": 4.49972816018969, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0953, | |
| "num_input_tokens_seen": 25207968, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3802083333333333, | |
| "loss": 0.10796058923006058, | |
| "loss_ce": 0.0020951118785887957, | |
| "loss_iou": 0.54296875, | |
| "loss_num": 0.021240234375, | |
| "loss_xval": 0.10595703125, | |
| "num_input_tokens_seen": 25207968, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3828125, | |
| "grad_norm": 5.73441077024277, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1225, | |
| "num_input_tokens_seen": 25380784, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3828125, | |
| "loss": 0.09899605065584183, | |
| "loss_ce": 5.806491753901355e-05, | |
| "loss_iou": 0.7578125, | |
| "loss_num": 0.019775390625, | |
| "loss_xval": 0.09912109375, | |
| "num_input_tokens_seen": 25380784, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3854166666666667, | |
| "grad_norm": 3.6755366051445137, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1046, | |
| "num_input_tokens_seen": 25553764, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3854166666666667, | |
| "loss": 0.07523618638515472, | |
| "loss_ce": 7.139628723962232e-05, | |
| "loss_iou": 0.71875, | |
| "loss_num": 0.0150146484375, | |
| "loss_xval": 0.0751953125, | |
| "num_input_tokens_seen": 25553764, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3880208333333333, | |
| "grad_norm": 38.91246982097314, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1093, | |
| "num_input_tokens_seen": 25726456, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3880208333333333, | |
| "loss": 0.08839882165193558, | |
| "loss_ce": 8.095278462860733e-05, | |
| "loss_iou": 0.80078125, | |
| "loss_num": 0.0177001953125, | |
| "loss_xval": 0.08837890625, | |
| "num_input_tokens_seen": 25726456, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.390625, | |
| "grad_norm": 10.207658282865648, | |
| "learning_rate": 5e-06, | |
| "loss": 0.123, | |
| "num_input_tokens_seen": 25899148, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.390625, | |
| "loss": 0.0730600580573082, | |
| "loss_ce": 0.000214598243474029, | |
| "loss_iou": 0.625, | |
| "loss_num": 0.01458740234375, | |
| "loss_xval": 0.07275390625, | |
| "num_input_tokens_seen": 25899148, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3932291666666667, | |
| "grad_norm": 4.730292038840616, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0989, | |
| "num_input_tokens_seen": 26072084, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.3932291666666667, | |
| "loss": 0.15038591623306274, | |
| "loss_ce": 0.00011736503802239895, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.030029296875, | |
| "loss_xval": 0.150390625, | |
| "num_input_tokens_seen": 26072084, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.3958333333333333, | |
| "grad_norm": 3.321333890252999, | |
| "learning_rate": 5e-06, | |
| "loss": 0.103, | |
| "num_input_tokens_seen": 26244756, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3958333333333333, | |
| "loss": 0.08549217134714127, | |
| "loss_ce": 0.0001497594639658928, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.01708984375, | |
| "loss_xval": 0.08544921875, | |
| "num_input_tokens_seen": 26244756, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3984375, | |
| "grad_norm": 6.087065910058266, | |
| "learning_rate": 5e-06, | |
| "loss": 0.08, | |
| "num_input_tokens_seen": 26417208, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3984375, | |
| "loss": 0.073136106133461, | |
| "loss_ce": 4.6509514504577965e-05, | |
| "loss_iou": 0.58203125, | |
| "loss_num": 0.0146484375, | |
| "loss_xval": 0.0732421875, | |
| "num_input_tokens_seen": 26417208, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.4010416666666667, | |
| "grad_norm": 4.65746127286459, | |
| "learning_rate": 5e-06, | |
| "loss": 0.114, | |
| "num_input_tokens_seen": 26590204, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4010416666666667, | |
| "loss": 0.10405319184064865, | |
| "loss_ce": 7.980540976859629e-05, | |
| "loss_iou": 0.6953125, | |
| "loss_num": 0.020751953125, | |
| "loss_xval": 0.10400390625, | |
| "num_input_tokens_seen": 26590204, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4036458333333333, | |
| "grad_norm": 6.888837612325361, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1096, | |
| "num_input_tokens_seen": 26762676, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.4036458333333333, | |
| "loss": 0.11900262534618378, | |
| "loss_ce": 7.562051177956164e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0238037109375, | |
| "loss_xval": 0.119140625, | |
| "num_input_tokens_seen": 26762676, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.40625, | |
| "grad_norm": 5.172471219817385, | |
| "learning_rate": 5e-06, | |
| "loss": 0.111, | |
| "num_input_tokens_seen": 26934984, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.40625, | |
| "loss": 0.11719199270009995, | |
| "loss_ce": 6.55300755170174e-05, | |
| "loss_iou": 0.64453125, | |
| "loss_num": 0.0234375, | |
| "loss_xval": 0.1171875, | |
| "num_input_tokens_seen": 26934984, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.4088541666666667, | |
| "grad_norm": 4.328240204635411, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1044, | |
| "num_input_tokens_seen": 27106980, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.4088541666666667, | |
| "loss": 0.18060433864593506, | |
| "loss_ce": 0.0002149457432096824, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.0361328125, | |
| "loss_xval": 0.1806640625, | |
| "num_input_tokens_seen": 27106980, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.4114583333333333, | |
| "grad_norm": 24.038857971844152, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0807, | |
| "num_input_tokens_seen": 27279788, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4114583333333333, | |
| "loss": 0.07357801496982574, | |
| "loss_ce": 0.00018323655240237713, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.01470947265625, | |
| "loss_xval": 0.0732421875, | |
| "num_input_tokens_seen": 27279788, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4140625, | |
| "grad_norm": 7.8628512106902315, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0787, | |
| "num_input_tokens_seen": 27452544, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.4140625, | |
| "loss": 0.07588262856006622, | |
| "loss_ce": 9.22220351640135e-05, | |
| "loss_iou": 0.55859375, | |
| "loss_num": 0.01519775390625, | |
| "loss_xval": 0.07568359375, | |
| "num_input_tokens_seen": 27452544, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 13.319740473348578, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0969, | |
| "num_input_tokens_seen": 27625396, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "loss": 0.09103557467460632, | |
| "loss_ce": 6.267878779908642e-05, | |
| "loss_iou": 0.5546875, | |
| "loss_num": 0.0181884765625, | |
| "loss_xval": 0.0908203125, | |
| "num_input_tokens_seen": 27625396, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4192708333333333, | |
| "grad_norm": 4.7866046147187715, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1014, | |
| "num_input_tokens_seen": 27797456, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.4192708333333333, | |
| "loss": 0.08366774767637253, | |
| "loss_ce": 8.010101737454534e-05, | |
| "loss_iou": 0.5546875, | |
| "loss_num": 0.0167236328125, | |
| "loss_xval": 0.08349609375, | |
| "num_input_tokens_seen": 27797456, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.421875, | |
| "grad_norm": 2.272455331760193, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0512, | |
| "num_input_tokens_seen": 27969760, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.421875, | |
| "loss": 0.03556237369775772, | |
| "loss_ce": 3.9912010834086686e-05, | |
| "loss_iou": 0.7109375, | |
| "loss_num": 0.007110595703125, | |
| "loss_xval": 0.03564453125, | |
| "num_input_tokens_seen": 27969760, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.4244791666666667, | |
| "grad_norm": 14.623479662016367, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0969, | |
| "num_input_tokens_seen": 28141788, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.4244791666666667, | |
| "loss": 0.06145535781979561, | |
| "loss_ce": 0.0001302829186897725, | |
| "loss_iou": 0.478515625, | |
| "loss_num": 0.01226806640625, | |
| "loss_xval": 0.061279296875, | |
| "num_input_tokens_seen": 28141788, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.4270833333333333, | |
| "grad_norm": 5.18949662678828, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0929, | |
| "num_input_tokens_seen": 28314784, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4270833333333333, | |
| "loss": 0.09928463399410248, | |
| "loss_ce": 7.19891395419836e-05, | |
| "loss_iou": 0.66015625, | |
| "loss_num": 0.0198974609375, | |
| "loss_xval": 0.09912109375, | |
| "num_input_tokens_seen": 28314784, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4296875, | |
| "grad_norm": 11.297198645176522, | |
| "learning_rate": 5e-06, | |
| "loss": 0.168, | |
| "num_input_tokens_seen": 28488116, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.4296875, | |
| "loss": 0.2097131311893463, | |
| "loss_ce": 0.00036255159648135304, | |
| "loss_iou": 0.58203125, | |
| "loss_num": 0.0419921875, | |
| "loss_xval": 0.208984375, | |
| "num_input_tokens_seen": 28488116, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.4322916666666667, | |
| "grad_norm": 3.7511749963618604, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1104, | |
| "num_input_tokens_seen": 28660644, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4322916666666667, | |
| "loss": 0.14858925342559814, | |
| "loss_ce": 0.00012123005581088364, | |
| "loss_iou": 0.72265625, | |
| "loss_num": 0.0296630859375, | |
| "loss_xval": 0.1484375, | |
| "num_input_tokens_seen": 28660644, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4348958333333333, | |
| "grad_norm": 3.143289835870396, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1021, | |
| "num_input_tokens_seen": 28833256, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.4348958333333333, | |
| "loss": 0.07967463880777359, | |
| "loss_ce": 0.00023738775053061545, | |
| "loss_iou": 0.6796875, | |
| "loss_num": 0.015869140625, | |
| "loss_xval": 0.07958984375, | |
| "num_input_tokens_seen": 28833256, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.4375, | |
| "grad_norm": 2.7797894675264336, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0608, | |
| "num_input_tokens_seen": 29005644, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4375, | |
| "loss": 0.03842185065150261, | |
| "loss_ce": 9.177176252705976e-05, | |
| "loss_iou": 0.6875, | |
| "loss_num": 0.007659912109375, | |
| "loss_xval": 0.038330078125, | |
| "num_input_tokens_seen": 29005644, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4401041666666667, | |
| "grad_norm": 5.829730930450416, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0882, | |
| "num_input_tokens_seen": 29178140, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.4401041666666667, | |
| "loss": 0.08472438156604767, | |
| "loss_ce": 0.0001754334516590461, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0169677734375, | |
| "loss_xval": 0.08447265625, | |
| "num_input_tokens_seen": 29178140, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.4427083333333333, | |
| "grad_norm": 19.34918748164043, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0919, | |
| "num_input_tokens_seen": 29350724, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4427083333333333, | |
| "loss": 0.14068102836608887, | |
| "loss_ce": 0.00017810959252528846, | |
| "loss_iou": 0.68359375, | |
| "loss_num": 0.028076171875, | |
| "loss_xval": 0.140625, | |
| "num_input_tokens_seen": 29350724, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4453125, | |
| "grad_norm": 11.305442675935751, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0914, | |
| "num_input_tokens_seen": 29523556, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.4453125, | |
| "loss": 0.08168038725852966, | |
| "loss_ce": 0.00010690485214581713, | |
| "loss_iou": 0.4921875, | |
| "loss_num": 0.016357421875, | |
| "loss_xval": 0.08154296875, | |
| "num_input_tokens_seen": 29523556, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.4479166666666667, | |
| "grad_norm": 2.746755888252267, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0956, | |
| "num_input_tokens_seen": 29696232, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4479166666666667, | |
| "loss": 0.08047676831483841, | |
| "loss_ce": 6.295397179201245e-05, | |
| "loss_iou": 0.40234375, | |
| "loss_num": 0.01611328125, | |
| "loss_xval": 0.08056640625, | |
| "num_input_tokens_seen": 29696232, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4505208333333333, | |
| "grad_norm": 6.619988685929648, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0578, | |
| "num_input_tokens_seen": 29868892, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.4505208333333333, | |
| "loss": 0.03938157111406326, | |
| "loss_ce": 0.0005632122629322112, | |
| "loss_iou": 0.46875, | |
| "loss_num": 0.00775146484375, | |
| "loss_xval": 0.038818359375, | |
| "num_input_tokens_seen": 29868892, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.453125, | |
| "grad_norm": 11.839215400516537, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1263, | |
| "num_input_tokens_seen": 30041044, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.453125, | |
| "loss": 0.06526083499193192, | |
| "loss_ce": 0.0001820992911234498, | |
| "loss_iou": 0.671875, | |
| "loss_num": 0.01300048828125, | |
| "loss_xval": 0.06494140625, | |
| "num_input_tokens_seen": 30041044, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4557291666666667, | |
| "grad_norm": 4.532895192393366, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0624, | |
| "num_input_tokens_seen": 30213960, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.4557291666666667, | |
| "loss": 0.05709821730852127, | |
| "loss_ce": 0.00015241916116792709, | |
| "loss_iou": 0.4140625, | |
| "loss_num": 0.0113525390625, | |
| "loss_xval": 0.056884765625, | |
| "num_input_tokens_seen": 30213960, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.4583333333333333, | |
| "grad_norm": 4.373257654750305, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0684, | |
| "num_input_tokens_seen": 30386860, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.4583333333333333, | |
| "loss": 0.048836298286914825, | |
| "loss_ce": 6.920905434526503e-05, | |
| "loss_iou": 0.4296875, | |
| "loss_num": 0.009765625, | |
| "loss_xval": 0.048828125, | |
| "num_input_tokens_seen": 30386860, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.4609375, | |
| "grad_norm": 7.579139401570638, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0843, | |
| "num_input_tokens_seen": 30559656, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.4609375, | |
| "loss": 0.12145084142684937, | |
| "loss_ce": 6.717803626088426e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0242919921875, | |
| "loss_xval": 0.12158203125, | |
| "num_input_tokens_seen": 30559656, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.4635416666666667, | |
| "grad_norm": 5.807914334628034, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1275, | |
| "num_input_tokens_seen": 30732276, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4635416666666667, | |
| "loss": 0.10631553828716278, | |
| "loss_ce": 0.00014487920270767063, | |
| "loss_iou": 0.671875, | |
| "loss_num": 0.021240234375, | |
| "loss_xval": 0.10595703125, | |
| "num_input_tokens_seen": 30732276, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4661458333333333, | |
| "grad_norm": 2.6998654471345827, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0584, | |
| "num_input_tokens_seen": 30905448, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.4661458333333333, | |
| "loss": 0.043617475777864456, | |
| "loss_ce": 0.00012992750271223485, | |
| "loss_iou": 0.67578125, | |
| "loss_num": 0.0086669921875, | |
| "loss_xval": 0.04345703125, | |
| "num_input_tokens_seen": 30905448, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.46875, | |
| "grad_norm": 10.092481931653555, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0841, | |
| "num_input_tokens_seen": 31078192, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.46875, | |
| "loss": 0.07613378763198853, | |
| "loss_ce": 0.00038915983168408275, | |
| "loss_iou": 0.439453125, | |
| "loss_num": 0.01513671875, | |
| "loss_xval": 0.07568359375, | |
| "num_input_tokens_seen": 31078192, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4713541666666667, | |
| "grad_norm": 4.850400427659922, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1259, | |
| "num_input_tokens_seen": 31250936, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.4713541666666667, | |
| "loss": 0.06517961621284485, | |
| "loss_ce": 5.5098360462579876e-05, | |
| "loss_iou": 0.703125, | |
| "loss_num": 0.01300048828125, | |
| "loss_xval": 0.06494140625, | |
| "num_input_tokens_seen": 31250936, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.4739583333333333, | |
| "grad_norm": 8.116429898780023, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0679, | |
| "num_input_tokens_seen": 31423824, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4739583333333333, | |
| "loss": 0.04832879453897476, | |
| "loss_ce": 0.00014153837400954217, | |
| "loss_iou": 0.67578125, | |
| "loss_num": 0.0096435546875, | |
| "loss_xval": 0.048095703125, | |
| "num_input_tokens_seen": 31423824, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4765625, | |
| "grad_norm": 15.778873010591404, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0796, | |
| "num_input_tokens_seen": 31596028, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.4765625, | |
| "loss": 0.058887895196676254, | |
| "loss_ce": 9.577826858730987e-05, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.01177978515625, | |
| "loss_xval": 0.058837890625, | |
| "num_input_tokens_seen": 31596028, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.4791666666666667, | |
| "grad_norm": 4.58612996328364, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1133, | |
| "num_input_tokens_seen": 31768480, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4791666666666667, | |
| "loss": 0.07175838947296143, | |
| "loss_ce": 7.259925041580573e-05, | |
| "loss_iou": 0.625, | |
| "loss_num": 0.01434326171875, | |
| "loss_xval": 0.07177734375, | |
| "num_input_tokens_seen": 31768480, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4817708333333333, | |
| "grad_norm": 15.298267591347829, | |
| "learning_rate": 5e-06, | |
| "loss": 0.137, | |
| "num_input_tokens_seen": 31941340, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4817708333333333, | |
| "loss": 0.1580718755722046, | |
| "loss_ce": 0.00023497387883253396, | |
| "loss_iou": 0.6484375, | |
| "loss_num": 0.031494140625, | |
| "loss_xval": 0.158203125, | |
| "num_input_tokens_seen": 31941340, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.484375, | |
| "grad_norm": 9.445985569352896, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1414, | |
| "num_input_tokens_seen": 32114196, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.484375, | |
| "loss": 0.1261276751756668, | |
| "loss_ce": 0.00012059589062118903, | |
| "loss_iou": 0.68359375, | |
| "loss_num": 0.025146484375, | |
| "loss_xval": 0.1259765625, | |
| "num_input_tokens_seen": 32114196, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4869791666666667, | |
| "grad_norm": 4.074608010814493, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1168, | |
| "num_input_tokens_seen": 32286624, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.4869791666666667, | |
| "loss": 0.08998198807239532, | |
| "loss_ce": 0.0001382330956403166, | |
| "loss_iou": 0.6328125, | |
| "loss_num": 0.0179443359375, | |
| "loss_xval": 0.08984375, | |
| "num_input_tokens_seen": 32286624, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.4895833333333333, | |
| "grad_norm": 3.9575106116123293, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1015, | |
| "num_input_tokens_seen": 32459076, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4895833333333333, | |
| "loss": 0.10690590739250183, | |
| "loss_ce": 0.00015541848551947623, | |
| "loss_iou": 0.6328125, | |
| "loss_num": 0.0213623046875, | |
| "loss_xval": 0.10693359375, | |
| "num_input_tokens_seen": 32459076, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4921875, | |
| "grad_norm": 3.7334350922271793, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0944, | |
| "num_input_tokens_seen": 32631908, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4921875, | |
| "loss": 0.12938711047172546, | |
| "loss_ce": 8.413316390942782e-05, | |
| "loss_iou": 0.5, | |
| "loss_num": 0.02587890625, | |
| "loss_xval": 0.12890625, | |
| "num_input_tokens_seen": 32631908, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4947916666666667, | |
| "grad_norm": 12.613411687656823, | |
| "learning_rate": 5e-06, | |
| "loss": 0.089, | |
| "num_input_tokens_seen": 32804848, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4947916666666667, | |
| "loss": 0.17628361284732819, | |
| "loss_ce": 0.00019717792747542262, | |
| "loss_iou": 0.56640625, | |
| "loss_num": 0.03515625, | |
| "loss_xval": 0.17578125, | |
| "num_input_tokens_seen": 32804848, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4973958333333333, | |
| "grad_norm": 10.089904229108118, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0984, | |
| "num_input_tokens_seen": 32977460, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.4973958333333333, | |
| "loss": 0.09072966128587723, | |
| "loss_ce": 0.00012297437933739275, | |
| "loss_iou": 0.6796875, | |
| "loss_num": 0.0181884765625, | |
| "loss_xval": 0.0908203125, | |
| "num_input_tokens_seen": 32977460, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 5.567747432819187, | |
| "learning_rate": 5e-06, | |
| "loss": 0.107, | |
| "num_input_tokens_seen": 33150480, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "loss": 0.08254844695329666, | |
| "loss_ce": 8.995212556328624e-05, | |
| "loss_iou": 0.68359375, | |
| "loss_num": 0.0164794921875, | |
| "loss_xval": 0.08251953125, | |
| "num_input_tokens_seen": 33150480, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5026041666666666, | |
| "grad_norm": 5.606336333733017, | |
| "learning_rate": 5e-06, | |
| "loss": 0.092, | |
| "num_input_tokens_seen": 33322812, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5026041666666666, | |
| "loss": 0.12639451026916504, | |
| "loss_ce": 8.224871271522716e-05, | |
| "loss_iou": 0.416015625, | |
| "loss_num": 0.0252685546875, | |
| "loss_xval": 0.1259765625, | |
| "num_input_tokens_seen": 33322812, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5052083333333334, | |
| "grad_norm": 10.892578547201238, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0985, | |
| "num_input_tokens_seen": 33494972, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5052083333333334, | |
| "loss": 0.04478445649147034, | |
| "loss_ce": 9.146681259153411e-05, | |
| "loss_iou": 0.50390625, | |
| "loss_num": 0.0089111328125, | |
| "loss_xval": 0.044677734375, | |
| "num_input_tokens_seen": 33494972, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5078125, | |
| "grad_norm": 6.379235584994378, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0909, | |
| "num_input_tokens_seen": 33667632, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5078125, | |
| "loss": 0.08593515306711197, | |
| "loss_ce": 8.921044354792684e-05, | |
| "loss_iou": 0.56640625, | |
| "loss_num": 0.0172119140625, | |
| "loss_xval": 0.0859375, | |
| "num_input_tokens_seen": 33667632, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5104166666666666, | |
| "grad_norm": 9.027964931503206, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1088, | |
| "num_input_tokens_seen": 33840020, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5104166666666666, | |
| "loss": 0.07991337776184082, | |
| "loss_ce": 0.00010991313320118934, | |
| "loss_iou": 0.7265625, | |
| "loss_num": 0.0159912109375, | |
| "loss_xval": 0.07958984375, | |
| "num_input_tokens_seen": 33840020, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5130208333333334, | |
| "grad_norm": 7.170409659790098, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1238, | |
| "num_input_tokens_seen": 34013036, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.5130208333333334, | |
| "loss": 0.18947342038154602, | |
| "loss_ce": 0.00014236349670682102, | |
| "loss_iou": 0.60546875, | |
| "loss_num": 0.037841796875, | |
| "loss_xval": 0.189453125, | |
| "num_input_tokens_seen": 34013036, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.515625, | |
| "grad_norm": 4.032339612187944, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0872, | |
| "num_input_tokens_seen": 34186220, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.515625, | |
| "loss": 0.07599274069070816, | |
| "loss_ce": 6.500923336716369e-05, | |
| "loss_iou": 0.5625, | |
| "loss_num": 0.01519775390625, | |
| "loss_xval": 0.076171875, | |
| "num_input_tokens_seen": 34186220, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5182291666666666, | |
| "grad_norm": 4.904239326276205, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0764, | |
| "num_input_tokens_seen": 34359052, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5182291666666666, | |
| "loss": 0.08559094369411469, | |
| "loss_ce": 0.0001112048194045201, | |
| "loss_iou": 0.59765625, | |
| "loss_num": 0.01708984375, | |
| "loss_xval": 0.08544921875, | |
| "num_input_tokens_seen": 34359052, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5208333333333334, | |
| "grad_norm": 6.516342930606259, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0773, | |
| "num_input_tokens_seen": 34531672, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5208333333333334, | |
| "loss": 0.05880989879369736, | |
| "loss_ce": 0.00010933385055977851, | |
| "loss_iou": 0.66796875, | |
| "loss_num": 0.01171875, | |
| "loss_xval": 0.05859375, | |
| "num_input_tokens_seen": 34531672, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5234375, | |
| "grad_norm": 3.361383386602773, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0848, | |
| "num_input_tokens_seen": 34704136, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5234375, | |
| "loss": 0.05374922603368759, | |
| "loss_ce": 6.880733417347074e-05, | |
| "loss_iou": 0.484375, | |
| "loss_num": 0.0107421875, | |
| "loss_xval": 0.0537109375, | |
| "num_input_tokens_seen": 34704136, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5260416666666666, | |
| "grad_norm": 11.210671135103166, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1067, | |
| "num_input_tokens_seen": 34877364, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5260416666666666, | |
| "loss": 0.09961295872926712, | |
| "loss_ce": 6.461787415901199e-05, | |
| "loss_iou": 0.7109375, | |
| "loss_num": 0.0198974609375, | |
| "loss_xval": 0.099609375, | |
| "num_input_tokens_seen": 34877364, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5286458333333334, | |
| "grad_norm": 6.444880899253943, | |
| "learning_rate": 5e-06, | |
| "loss": 0.111, | |
| "num_input_tokens_seen": 35050192, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5286458333333334, | |
| "loss": 0.09913990646600723, | |
| "loss_ce": 7.98513792688027e-05, | |
| "loss_iou": 0.5234375, | |
| "loss_num": 0.019775390625, | |
| "loss_xval": 0.09912109375, | |
| "num_input_tokens_seen": 35050192, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.53125, | |
| "grad_norm": 3.8614428868304533, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1063, | |
| "num_input_tokens_seen": 35223020, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.53125, | |
| "loss": 0.06953012943267822, | |
| "loss_ce": 7.212607306428254e-05, | |
| "loss_iou": 0.7421875, | |
| "loss_num": 0.013916015625, | |
| "loss_xval": 0.0693359375, | |
| "num_input_tokens_seen": 35223020, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5338541666666666, | |
| "grad_norm": 6.191654916504458, | |
| "learning_rate": 5e-06, | |
| "loss": 0.091, | |
| "num_input_tokens_seen": 35396176, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5338541666666666, | |
| "loss": 0.15430204570293427, | |
| "loss_ce": 0.00027982849860563874, | |
| "loss_iou": 0.49609375, | |
| "loss_num": 0.03076171875, | |
| "loss_xval": 0.154296875, | |
| "num_input_tokens_seen": 35396176, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5364583333333334, | |
| "grad_norm": 5.468880474808822, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0667, | |
| "num_input_tokens_seen": 35568912, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5364583333333334, | |
| "loss": 0.06578241288661957, | |
| "loss_ce": 6.280931120272726e-05, | |
| "loss_iou": 0.55078125, | |
| "loss_num": 0.01318359375, | |
| "loss_xval": 0.06591796875, | |
| "num_input_tokens_seen": 35568912, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5390625, | |
| "grad_norm": 5.886325106674437, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1381, | |
| "num_input_tokens_seen": 35741540, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5390625, | |
| "loss": 0.11603943258523941, | |
| "loss_ce": 0.00010315363761037588, | |
| "loss_iou": 0.765625, | |
| "loss_num": 0.023193359375, | |
| "loss_xval": 0.11572265625, | |
| "num_input_tokens_seen": 35741540, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5416666666666666, | |
| "grad_norm": 4.502393531758672, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0893, | |
| "num_input_tokens_seen": 35914024, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5416666666666666, | |
| "loss": 0.08899325132369995, | |
| "loss_ce": 6.502882024506107e-05, | |
| "loss_iou": 0.75, | |
| "loss_num": 0.017822265625, | |
| "loss_xval": 0.0888671875, | |
| "num_input_tokens_seen": 35914024, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5442708333333334, | |
| "grad_norm": 10.086026290203142, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1033, | |
| "num_input_tokens_seen": 36087084, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5442708333333334, | |
| "loss": 0.060362037271261215, | |
| "loss_ce": 0.00013559818034991622, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.01202392578125, | |
| "loss_xval": 0.060302734375, | |
| "num_input_tokens_seen": 36087084, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.546875, | |
| "grad_norm": 6.731766943850301, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0989, | |
| "num_input_tokens_seen": 36259864, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.546875, | |
| "loss": 0.04847151041030884, | |
| "loss_ce": 0.00013166893040761352, | |
| "loss_iou": 0.53515625, | |
| "loss_num": 0.0096435546875, | |
| "loss_xval": 0.04833984375, | |
| "num_input_tokens_seen": 36259864, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5494791666666666, | |
| "grad_norm": 6.316474875770928, | |
| "learning_rate": 5e-06, | |
| "loss": 0.087, | |
| "num_input_tokens_seen": 36433104, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5494791666666666, | |
| "loss": 0.09729330986738205, | |
| "loss_ce": 0.0003694796178024262, | |
| "loss_iou": 0.6015625, | |
| "loss_num": 0.0194091796875, | |
| "loss_xval": 0.0966796875, | |
| "num_input_tokens_seen": 36433104, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5520833333333334, | |
| "grad_norm": 8.68013938900971, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1649, | |
| "num_input_tokens_seen": 36605948, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5520833333333334, | |
| "loss": 0.10549305379390717, | |
| "loss_ce": 5.4825890401843935e-05, | |
| "loss_iou": 0.48046875, | |
| "loss_num": 0.0211181640625, | |
| "loss_xval": 0.10546875, | |
| "num_input_tokens_seen": 36605948, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5546875, | |
| "grad_norm": 2.9587466587848543, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0597, | |
| "num_input_tokens_seen": 36778360, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5546875, | |
| "loss": 0.054243359714746475, | |
| "loss_ce": 0.00010517801274545491, | |
| "loss_iou": 0.69921875, | |
| "loss_num": 0.01080322265625, | |
| "loss_xval": 0.05419921875, | |
| "num_input_tokens_seen": 36778360, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5572916666666666, | |
| "grad_norm": 3.540440347425946, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0541, | |
| "num_input_tokens_seen": 36950340, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5572916666666666, | |
| "loss": 0.044868774712085724, | |
| "loss_ce": 0.0001605255965841934, | |
| "loss_iou": 0.5234375, | |
| "loss_num": 0.0089111328125, | |
| "loss_xval": 0.044677734375, | |
| "num_input_tokens_seen": 36950340, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5598958333333334, | |
| "grad_norm": 1.7960907214462793, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0558, | |
| "num_input_tokens_seen": 37123392, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5598958333333334, | |
| "loss": 0.03447698801755905, | |
| "loss_ce": 0.00014471304893959314, | |
| "loss_iou": 0.5, | |
| "loss_num": 0.006866455078125, | |
| "loss_xval": 0.034423828125, | |
| "num_input_tokens_seen": 37123392, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5625, | |
| "grad_norm": 4.431604970837842, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0965, | |
| "num_input_tokens_seen": 37295368, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5625, | |
| "loss": 0.1555291712284088, | |
| "loss_ce": 8.788481500232592e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0311279296875, | |
| "loss_xval": 0.1552734375, | |
| "num_input_tokens_seen": 37295368, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5651041666666666, | |
| "grad_norm": 8.013606775608135, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1017, | |
| "num_input_tokens_seen": 37467908, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5651041666666666, | |
| "loss": 0.12359996885061264, | |
| "loss_ce": 0.00015636239550076425, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.024658203125, | |
| "loss_xval": 0.12353515625, | |
| "num_input_tokens_seen": 37467908, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5677083333333334, | |
| "grad_norm": 9.000183276004282, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0964, | |
| "num_input_tokens_seen": 37640328, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5677083333333334, | |
| "loss": 0.09344692528247833, | |
| "loss_ce": 6.314014899544418e-05, | |
| "loss_iou": 0.609375, | |
| "loss_num": 0.0186767578125, | |
| "loss_xval": 0.09326171875, | |
| "num_input_tokens_seen": 37640328, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5703125, | |
| "grad_norm": 28.397075300946053, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1042, | |
| "num_input_tokens_seen": 37812984, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5703125, | |
| "loss": 0.11136841773986816, | |
| "loss_ce": 7.081658986862749e-05, | |
| "loss_iou": 0.734375, | |
| "loss_num": 0.022216796875, | |
| "loss_xval": 0.111328125, | |
| "num_input_tokens_seen": 37812984, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5729166666666666, | |
| "grad_norm": 3.6482189206456126, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0685, | |
| "num_input_tokens_seen": 37985152, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5729166666666666, | |
| "loss": 0.05360790342092514, | |
| "loss_ce": 6.481433956651017e-05, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.0107421875, | |
| "loss_xval": 0.053466796875, | |
| "num_input_tokens_seen": 37985152, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5755208333333334, | |
| "grad_norm": 24.217399076672056, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0891, | |
| "num_input_tokens_seen": 38157616, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.5755208333333334, | |
| "loss": 0.11579165607690811, | |
| "loss_ce": 0.0005420194938778877, | |
| "loss_iou": 0.3984375, | |
| "loss_num": 0.0230712890625, | |
| "loss_xval": 0.115234375, | |
| "num_input_tokens_seen": 38157616, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.578125, | |
| "grad_norm": 3.7151820904220063, | |
| "learning_rate": 5e-06, | |
| "loss": 0.057, | |
| "num_input_tokens_seen": 38330496, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.578125, | |
| "loss": 0.04516543075442314, | |
| "loss_ce": 9.09663358470425e-05, | |
| "loss_iou": 0.515625, | |
| "loss_num": 0.009033203125, | |
| "loss_xval": 0.045166015625, | |
| "num_input_tokens_seen": 38330496, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5807291666666666, | |
| "grad_norm": 15.97315866564612, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1208, | |
| "num_input_tokens_seen": 38503204, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5807291666666666, | |
| "loss": 0.11001908779144287, | |
| "loss_ce": 6.425123137887567e-05, | |
| "loss_iou": 0.6484375, | |
| "loss_num": 0.02197265625, | |
| "loss_xval": 0.10986328125, | |
| "num_input_tokens_seen": 38503204, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5833333333333334, | |
| "grad_norm": 15.54736656112959, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0667, | |
| "num_input_tokens_seen": 38675744, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5833333333333334, | |
| "loss": 0.11892453581094742, | |
| "loss_ce": 0.00011960987467318773, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0238037109375, | |
| "loss_xval": 0.11865234375, | |
| "num_input_tokens_seen": 38675744, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5859375, | |
| "grad_norm": 8.708565681630517, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0858, | |
| "num_input_tokens_seen": 38848284, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5859375, | |
| "loss": 0.06076966971158981, | |
| "loss_ce": 0.000436413218267262, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0120849609375, | |
| "loss_xval": 0.060302734375, | |
| "num_input_tokens_seen": 38848284, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5885416666666666, | |
| "grad_norm": 11.665207638748996, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1059, | |
| "num_input_tokens_seen": 39021192, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5885416666666666, | |
| "loss": 0.08464138209819794, | |
| "loss_ce": 0.00010768979700515047, | |
| "loss_iou": 0.57421875, | |
| "loss_num": 0.016845703125, | |
| "loss_xval": 0.08447265625, | |
| "num_input_tokens_seen": 39021192, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5911458333333334, | |
| "grad_norm": 6.122755854158079, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0916, | |
| "num_input_tokens_seen": 39194408, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.5911458333333334, | |
| "loss": 0.11753110587596893, | |
| "loss_ce": 6.894973921589553e-05, | |
| "loss_iou": 0.6796875, | |
| "loss_num": 0.0234375, | |
| "loss_xval": 0.11767578125, | |
| "num_input_tokens_seen": 39194408, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.59375, | |
| "grad_norm": 25.91736548090707, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0936, | |
| "num_input_tokens_seen": 39366972, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.59375, | |
| "loss": 0.09147345274686813, | |
| "loss_ce": 7.33033666620031e-05, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.018310546875, | |
| "loss_xval": 0.09130859375, | |
| "num_input_tokens_seen": 39366972, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5963541666666666, | |
| "grad_norm": 22.31114946018542, | |
| "learning_rate": 5e-06, | |
| "loss": 0.094, | |
| "num_input_tokens_seen": 39539944, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5963541666666666, | |
| "loss": 0.11777202785015106, | |
| "loss_ce": 3.521383769111708e-05, | |
| "loss_iou": 0.71484375, | |
| "loss_num": 0.0235595703125, | |
| "loss_xval": 0.11767578125, | |
| "num_input_tokens_seen": 39539944, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5989583333333334, | |
| "grad_norm": 4.025666229457589, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0886, | |
| "num_input_tokens_seen": 39712932, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5989583333333334, | |
| "loss": 0.1201152354478836, | |
| "loss_ce": 0.00015063578030094504, | |
| "loss_iou": 0.6796875, | |
| "loss_num": 0.02392578125, | |
| "loss_xval": 0.1201171875, | |
| "num_input_tokens_seen": 39712932, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6015625, | |
| "grad_norm": 3.7609078788021097, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0683, | |
| "num_input_tokens_seen": 39885616, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6015625, | |
| "loss": 0.07680265605449677, | |
| "loss_ce": 0.0001730183430481702, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.01531982421875, | |
| "loss_xval": 0.07666015625, | |
| "num_input_tokens_seen": 39885616, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6041666666666666, | |
| "grad_norm": 5.989352644027437, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0674, | |
| "num_input_tokens_seen": 40057968, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6041666666666666, | |
| "loss": 0.1005856841802597, | |
| "loss_ce": 0.00013706949539482594, | |
| "loss_iou": 0.462890625, | |
| "loss_num": 0.02001953125, | |
| "loss_xval": 0.1005859375, | |
| "num_input_tokens_seen": 40057968, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6067708333333334, | |
| "grad_norm": 4.762149494132162, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0698, | |
| "num_input_tokens_seen": 40230848, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6067708333333334, | |
| "loss": 0.10376375913619995, | |
| "loss_ce": 3.4517663152655587e-05, | |
| "loss_iou": 0.63671875, | |
| "loss_num": 0.020751953125, | |
| "loss_xval": 0.103515625, | |
| "num_input_tokens_seen": 40230848, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.609375, | |
| "grad_norm": 5.409386698496161, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1021, | |
| "num_input_tokens_seen": 40403276, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.609375, | |
| "loss": 0.06936685740947723, | |
| "loss_ce": 0.0003818793629761785, | |
| "loss_iou": 0.482421875, | |
| "loss_num": 0.0137939453125, | |
| "loss_xval": 0.06884765625, | |
| "num_input_tokens_seen": 40403276, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6119791666666666, | |
| "grad_norm": 10.974609444669646, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1001, | |
| "num_input_tokens_seen": 40576292, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6119791666666666, | |
| "loss": 0.11177849024534225, | |
| "loss_ce": 0.00026726460782811046, | |
| "loss_iou": 0.61328125, | |
| "loss_num": 0.0223388671875, | |
| "loss_xval": 0.111328125, | |
| "num_input_tokens_seen": 40576292, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6145833333333334, | |
| "grad_norm": 3.802157730607013, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0768, | |
| "num_input_tokens_seen": 40749076, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6145833333333334, | |
| "loss": 0.07335153222084045, | |
| "loss_ce": 0.000506069976836443, | |
| "loss_iou": 0.6171875, | |
| "loss_num": 0.0145263671875, | |
| "loss_xval": 0.07275390625, | |
| "num_input_tokens_seen": 40749076, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6171875, | |
| "grad_norm": 3.5754950924222406, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0497, | |
| "num_input_tokens_seen": 40922288, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6171875, | |
| "loss": 0.02886682003736496, | |
| "loss_ce": 0.00014977881801314652, | |
| "loss_iou": 0.55078125, | |
| "loss_num": 0.0057373046875, | |
| "loss_xval": 0.0286865234375, | |
| "num_input_tokens_seen": 40922288, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6197916666666666, | |
| "grad_norm": 4.288040219675324, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0508, | |
| "num_input_tokens_seen": 41094828, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6197916666666666, | |
| "loss": 0.05992227792739868, | |
| "loss_ce": 0.0003519634483382106, | |
| "loss_iou": 0.640625, | |
| "loss_num": 0.01190185546875, | |
| "loss_xval": 0.0595703125, | |
| "num_input_tokens_seen": 41094828, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6223958333333334, | |
| "grad_norm": 6.504525689859585, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0925, | |
| "num_input_tokens_seen": 41267332, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6223958333333334, | |
| "loss": 0.06373357772827148, | |
| "loss_ce": 4.338783037383109e-05, | |
| "loss_iou": 0.6875, | |
| "loss_num": 0.01275634765625, | |
| "loss_xval": 0.0634765625, | |
| "num_input_tokens_seen": 41267332, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 5.068763378329545, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0829, | |
| "num_input_tokens_seen": 41439728, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "loss": 0.08366407454013824, | |
| "loss_ce": 0.00022901550983078778, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.0167236328125, | |
| "loss_xval": 0.08349609375, | |
| "num_input_tokens_seen": 41439728, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6276041666666666, | |
| "grad_norm": 9.15531863667315, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0909, | |
| "num_input_tokens_seen": 41612180, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6276041666666666, | |
| "loss": 0.11120368540287018, | |
| "loss_ce": 5.867354047950357e-05, | |
| "loss_iou": 0.478515625, | |
| "loss_num": 0.022216796875, | |
| "loss_xval": 0.111328125, | |
| "num_input_tokens_seen": 41612180, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6302083333333334, | |
| "grad_norm": 2.0214181878511566, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0741, | |
| "num_input_tokens_seen": 41784848, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6302083333333334, | |
| "loss": 0.05301050841808319, | |
| "loss_ce": 9.30292735574767e-05, | |
| "loss_iou": 0.51171875, | |
| "loss_num": 0.0106201171875, | |
| "loss_xval": 0.052978515625, | |
| "num_input_tokens_seen": 41784848, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6328125, | |
| "grad_norm": 4.1167075841800385, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0462, | |
| "num_input_tokens_seen": 41957024, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6328125, | |
| "loss": 0.05533324182033539, | |
| "loss_ce": 8.116720709949732e-05, | |
| "loss_iou": 0.498046875, | |
| "loss_num": 0.01104736328125, | |
| "loss_xval": 0.05517578125, | |
| "num_input_tokens_seen": 41957024, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6354166666666666, | |
| "grad_norm": 12.037461080324686, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1285, | |
| "num_input_tokens_seen": 42129920, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6354166666666666, | |
| "loss": 0.07943513244390488, | |
| "loss_ce": 8.94309050636366e-05, | |
| "loss_iou": 0.7265625, | |
| "loss_num": 0.015869140625, | |
| "loss_xval": 0.0791015625, | |
| "num_input_tokens_seen": 42129920, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6380208333333334, | |
| "grad_norm": 6.295206206189768, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0889, | |
| "num_input_tokens_seen": 42302912, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6380208333333334, | |
| "loss": 0.09156939387321472, | |
| "loss_ce": 4.717556657851674e-05, | |
| "loss_iou": 0.5234375, | |
| "loss_num": 0.018310546875, | |
| "loss_xval": 0.09130859375, | |
| "num_input_tokens_seen": 42302912, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.640625, | |
| "grad_norm": 25.409557942414935, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0661, | |
| "num_input_tokens_seen": 42475584, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.640625, | |
| "loss": 0.097844198346138, | |
| "loss_ce": 6.587710231542587e-05, | |
| "loss_iou": 0.431640625, | |
| "loss_num": 0.01953125, | |
| "loss_xval": 0.09765625, | |
| "num_input_tokens_seen": 42475584, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6432291666666666, | |
| "grad_norm": 4.450370043022936, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 42647808, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6432291666666666, | |
| "loss": 0.1061711385846138, | |
| "loss_ce": 0.00015306829300243407, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.021240234375, | |
| "loss_xval": 0.10595703125, | |
| "num_input_tokens_seen": 42647808, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6458333333333334, | |
| "grad_norm": 4.116581907360989, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0858, | |
| "num_input_tokens_seen": 42820508, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6458333333333334, | |
| "loss": 0.07190299779176712, | |
| "loss_ce": 7.987646677065641e-05, | |
| "loss_iou": 0.5546875, | |
| "loss_num": 0.01434326171875, | |
| "loss_xval": 0.07177734375, | |
| "num_input_tokens_seen": 42820508, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6484375, | |
| "grad_norm": 4.147716000593212, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0854, | |
| "num_input_tokens_seen": 42992784, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6484375, | |
| "loss": 0.05028773471713066, | |
| "loss_ce": 5.58009123778902e-05, | |
| "loss_iou": 0.6640625, | |
| "loss_num": 0.01007080078125, | |
| "loss_xval": 0.05029296875, | |
| "num_input_tokens_seen": 42992784, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6510416666666666, | |
| "grad_norm": 16.495817541741257, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0747, | |
| "num_input_tokens_seen": 43165896, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6510416666666666, | |
| "eval_seeclick_CIoU": 0.4124833643436432, | |
| "eval_seeclick_GIoU": 0.41358618438243866, | |
| "eval_seeclick_IoU": 0.445960208773613, | |
| "eval_seeclick_MAE_all": 0.0724409706890583, | |
| "eval_seeclick_MAE_h": 0.06929375603795052, | |
| "eval_seeclick_MAE_w": 0.09850849956274033, | |
| "eval_seeclick_MAE_x": 0.07914602756500244, | |
| "eval_seeclick_MAE_y": 0.04281560517847538, | |
| "eval_seeclick_NUM_probability": 0.9999896287918091, | |
| "eval_seeclick_inside_bbox": 0.921875, | |
| "eval_seeclick_loss": 0.9194074273109436, | |
| "eval_seeclick_loss_ce": 0.6105623841285706, | |
| "eval_seeclick_loss_iou": 0.67578125, | |
| "eval_seeclick_loss_num": 0.0633697509765625, | |
| "eval_seeclick_loss_xval": 0.31683349609375, | |
| "eval_seeclick_runtime": 73.8784, | |
| "eval_seeclick_samples_per_second": 0.582, | |
| "eval_seeclick_steps_per_second": 0.027, | |
| "num_input_tokens_seen": 43165896, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6510416666666666, | |
| "eval_icons_CIoU": 0.6936101317405701, | |
| "eval_icons_GIoU": 0.697041928768158, | |
| "eval_icons_IoU": 0.704749345779419, | |
| "eval_icons_MAE_all": 0.039153311401605606, | |
| "eval_icons_MAE_h": 0.04060409218072891, | |
| "eval_icons_MAE_w": 0.05324110668152571, | |
| "eval_icons_MAE_x": 0.03839818201959133, | |
| "eval_icons_MAE_y": 0.02436987590044737, | |
| "eval_icons_NUM_probability": 0.9999879896640778, | |
| "eval_icons_inside_bbox": 0.9565972089767456, | |
| "eval_icons_loss": 0.15028713643550873, | |
| "eval_icons_loss_ce": 0.00046230135194491595, | |
| "eval_icons_loss_iou": 0.600341796875, | |
| "eval_icons_loss_num": 0.02852630615234375, | |
| "eval_icons_loss_xval": 0.142608642578125, | |
| "eval_icons_runtime": 80.0672, | |
| "eval_icons_samples_per_second": 0.624, | |
| "eval_icons_steps_per_second": 0.025, | |
| "num_input_tokens_seen": 43165896, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6510416666666666, | |
| "eval_screenspot_CIoU": 0.42071565985679626, | |
| "eval_screenspot_GIoU": 0.4120611349741618, | |
| "eval_screenspot_IoU": 0.48129573464393616, | |
| "eval_screenspot_MAE_all": 0.11981111764907837, | |
| "eval_screenspot_MAE_h": 0.08953167746464412, | |
| "eval_screenspot_MAE_w": 0.19297573963801065, | |
| "eval_screenspot_MAE_x": 0.1243693083524704, | |
| "eval_screenspot_MAE_y": 0.07236775507529576, | |
| "eval_screenspot_NUM_probability": 0.9999845623970032, | |
| "eval_screenspot_inside_bbox": 0.7979166706403097, | |
| "eval_screenspot_loss": 0.8879116177558899, | |
| "eval_screenspot_loss_ce": 0.3984930415948232, | |
| "eval_screenspot_loss_iou": 0.5793863932291666, | |
| "eval_screenspot_loss_num": 0.09791056315104167, | |
| "eval_screenspot_loss_xval": 0.4894205729166667, | |
| "eval_screenspot_runtime": 139.1948, | |
| "eval_screenspot_samples_per_second": 0.639, | |
| "eval_screenspot_steps_per_second": 0.022, | |
| "num_input_tokens_seen": 43165896, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6510416666666666, | |
| "eval_compot_CIoU": 0.8471810519695282, | |
| "eval_compot_GIoU": 0.8459496200084686, | |
| "eval_compot_IoU": 0.8493484258651733, | |
| "eval_compot_MAE_all": 0.01606033928692341, | |
| "eval_compot_MAE_h": 0.015686397906392813, | |
| "eval_compot_MAE_w": 0.024428557604551315, | |
| "eval_compot_MAE_x": 0.013795553240925074, | |
| "eval_compot_MAE_y": 0.01033084886148572, | |
| "eval_compot_NUM_probability": 0.9999726712703705, | |
| "eval_compot_inside_bbox": 1.0, | |
| "eval_compot_loss": 0.07782306522130966, | |
| "eval_compot_loss_ce": 0.0001593182678334415, | |
| "eval_compot_loss_iou": 0.693115234375, | |
| "eval_compot_loss_num": 0.01538848876953125, | |
| "eval_compot_loss_xval": 0.076904296875, | |
| "eval_compot_runtime": 81.1661, | |
| "eval_compot_samples_per_second": 0.616, | |
| "eval_compot_steps_per_second": 0.025, | |
| "num_input_tokens_seen": 43165896, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6510416666666666, | |
| "loss": 0.0681569054722786, | |
| "loss_ce": 0.00022477866150438786, | |
| "loss_iou": 0.7265625, | |
| "loss_num": 0.01361083984375, | |
| "loss_xval": 0.06787109375, | |
| "num_input_tokens_seen": 43165896, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6536458333333334, | |
| "grad_norm": 3.6701809821868308, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0838, | |
| "num_input_tokens_seen": 43338688, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6536458333333334, | |
| "loss": 0.06336190551519394, | |
| "loss_ce": 9.896879782900214e-05, | |
| "loss_iou": 0.74609375, | |
| "loss_num": 0.01263427734375, | |
| "loss_xval": 0.0634765625, | |
| "num_input_tokens_seen": 43338688, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.65625, | |
| "grad_norm": 1.8275074603928982, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0518, | |
| "num_input_tokens_seen": 43511012, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.65625, | |
| "loss": 0.04357748478651047, | |
| "loss_ce": 0.00015097142022568733, | |
| "loss_iou": 0.5078125, | |
| "loss_num": 0.0086669921875, | |
| "loss_xval": 0.04345703125, | |
| "num_input_tokens_seen": 43511012, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6588541666666666, | |
| "grad_norm": 2.1416791842803238, | |
| "learning_rate": 5e-06, | |
| "loss": 0.057, | |
| "num_input_tokens_seen": 43683084, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6588541666666666, | |
| "loss": 0.05978800728917122, | |
| "loss_ce": 9.562318882672116e-05, | |
| "loss_iou": 0.447265625, | |
| "loss_num": 0.011962890625, | |
| "loss_xval": 0.0595703125, | |
| "num_input_tokens_seen": 43683084, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6614583333333334, | |
| "grad_norm": 3.5604873362214167, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0713, | |
| "num_input_tokens_seen": 43855336, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6614583333333334, | |
| "loss": 0.06191530451178551, | |
| "loss_ce": 0.0003155705926474184, | |
| "loss_iou": 0.453125, | |
| "loss_num": 0.0123291015625, | |
| "loss_xval": 0.0615234375, | |
| "num_input_tokens_seen": 43855336, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6640625, | |
| "grad_norm": 7.717445783436579, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0799, | |
| "num_input_tokens_seen": 44028296, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.6640625, | |
| "loss": 0.09458325803279877, | |
| "loss_ce": 3.980396650149487e-05, | |
| "loss_iou": 0.70703125, | |
| "loss_num": 0.0189208984375, | |
| "loss_xval": 0.0947265625, | |
| "num_input_tokens_seen": 44028296, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 15.823495393044448, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0915, | |
| "num_input_tokens_seen": 44200980, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "loss": 0.10397086292505264, | |
| "loss_ce": 5.850698289577849e-05, | |
| "loss_iou": 0.73046875, | |
| "loss_num": 0.020751953125, | |
| "loss_xval": 0.10400390625, | |
| "num_input_tokens_seen": 44200980, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6692708333333334, | |
| "grad_norm": 4.767840698347708, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0808, | |
| "num_input_tokens_seen": 44373548, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.6692708333333334, | |
| "loss": 0.04920345917344093, | |
| "loss_ce": 7.015664596110582e-05, | |
| "loss_iou": 0.69140625, | |
| "loss_num": 0.00982666015625, | |
| "loss_xval": 0.049072265625, | |
| "num_input_tokens_seen": 44373548, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.671875, | |
| "grad_norm": 7.0061287275719195, | |
| "learning_rate": 5e-06, | |
| "loss": 0.087, | |
| "num_input_tokens_seen": 44545956, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.671875, | |
| "loss": 0.12110073864459991, | |
| "loss_ce": 3.750172254513018e-05, | |
| "loss_iou": 0.6015625, | |
| "loss_num": 0.024169921875, | |
| "loss_xval": 0.12109375, | |
| "num_input_tokens_seen": 44545956, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6744791666666666, | |
| "grad_norm": 16.13195894853677, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1143, | |
| "num_input_tokens_seen": 44719164, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6744791666666666, | |
| "loss": 0.15427453815937042, | |
| "loss_ce": 9.973209671443328e-05, | |
| "loss_iou": 0.65234375, | |
| "loss_num": 0.0308837890625, | |
| "loss_xval": 0.154296875, | |
| "num_input_tokens_seen": 44719164, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6770833333333334, | |
| "grad_norm": 3.769620732282852, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0715, | |
| "num_input_tokens_seen": 44891856, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6770833333333334, | |
| "loss": 0.08834376931190491, | |
| "loss_ce": 8.69391078595072e-05, | |
| "loss_iou": 0.625, | |
| "loss_num": 0.0177001953125, | |
| "loss_xval": 0.08837890625, | |
| "num_input_tokens_seen": 44891856, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6796875, | |
| "grad_norm": 15.395477460165395, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0877, | |
| "num_input_tokens_seen": 45064988, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.6796875, | |
| "loss": 0.08463309705257416, | |
| "loss_ce": 0.00025199196534231305, | |
| "loss_iou": 0.390625, | |
| "loss_num": 0.016845703125, | |
| "loss_xval": 0.08447265625, | |
| "num_input_tokens_seen": 45064988, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.6822916666666666, | |
| "grad_norm": 1.9846406004820456, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0567, | |
| "num_input_tokens_seen": 45237912, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6822916666666666, | |
| "loss": 0.10574272274971008, | |
| "loss_ce": 4.509550126385875e-05, | |
| "loss_iou": 0.4140625, | |
| "loss_num": 0.0211181640625, | |
| "loss_xval": 0.10546875, | |
| "num_input_tokens_seen": 45237912, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6848958333333334, | |
| "grad_norm": 10.620697806562827, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0748, | |
| "num_input_tokens_seen": 45410408, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.6848958333333334, | |
| "loss": 0.0699179470539093, | |
| "loss_ce": 0.0001700148859526962, | |
| "loss_iou": 0.400390625, | |
| "loss_num": 0.013916015625, | |
| "loss_xval": 0.06982421875, | |
| "num_input_tokens_seen": 45410408, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.6875, | |
| "grad_norm": 4.4669986375425985, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0521, | |
| "num_input_tokens_seen": 45583132, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6875, | |
| "loss": 0.046352967619895935, | |
| "loss_ce": 0.00013409550592768937, | |
| "loss_iou": 0.78515625, | |
| "loss_num": 0.00921630859375, | |
| "loss_xval": 0.046142578125, | |
| "num_input_tokens_seen": 45583132, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6901041666666666, | |
| "grad_norm": 6.203574811391586, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0928, | |
| "num_input_tokens_seen": 45755472, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.6901041666666666, | |
| "loss": 0.061830393970012665, | |
| "loss_ce": 3.229987487429753e-05, | |
| "loss_iou": 0.6328125, | |
| "loss_num": 0.01239013671875, | |
| "loss_xval": 0.061767578125, | |
| "num_input_tokens_seen": 45755472, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.6927083333333334, | |
| "grad_norm": 10.894591035750713, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1005, | |
| "num_input_tokens_seen": 45928200, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6927083333333334, | |
| "loss": 0.1057087630033493, | |
| "loss_ce": 0.00011794811143772677, | |
| "loss_iou": 0.6875, | |
| "loss_num": 0.0211181640625, | |
| "loss_xval": 0.10546875, | |
| "num_input_tokens_seen": 45928200, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6953125, | |
| "grad_norm": 3.559473609758924, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0525, | |
| "num_input_tokens_seen": 46101500, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.6953125, | |
| "loss": 0.059102512896060944, | |
| "loss_ce": 5.099709960632026e-05, | |
| "loss_iou": 0.470703125, | |
| "loss_num": 0.0118408203125, | |
| "loss_xval": 0.05908203125, | |
| "num_input_tokens_seen": 46101500, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.6979166666666666, | |
| "grad_norm": 32.963299647312084, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1156, | |
| "num_input_tokens_seen": 46273792, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6979166666666666, | |
| "loss": 0.17293627560138702, | |
| "loss_ce": 6.945877976249903e-05, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.03466796875, | |
| "loss_xval": 0.1728515625, | |
| "num_input_tokens_seen": 46273792, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7005208333333334, | |
| "grad_norm": 5.379185511033478, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1206, | |
| "num_input_tokens_seen": 46446840, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7005208333333334, | |
| "loss": 0.0702916830778122, | |
| "loss_ce": 4.0215229091700166e-05, | |
| "loss_iou": 0.65625, | |
| "loss_num": 0.0140380859375, | |
| "loss_xval": 0.0703125, | |
| "num_input_tokens_seen": 46446840, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.703125, | |
| "grad_norm": 4.833206887807392, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1057, | |
| "num_input_tokens_seen": 46619224, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.703125, | |
| "loss": 0.06372040510177612, | |
| "loss_ce": 4.5477234380086884e-05, | |
| "loss_iou": 0.482421875, | |
| "loss_num": 0.01275634765625, | |
| "loss_xval": 0.0634765625, | |
| "num_input_tokens_seen": 46619224, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7057291666666666, | |
| "grad_norm": 4.9691828426728515, | |
| "learning_rate": 5e-06, | |
| "loss": 0.073, | |
| "num_input_tokens_seen": 46791948, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7057291666666666, | |
| "loss": 0.041718438267707825, | |
| "loss_ce": 0.00013823516201227903, | |
| "loss_iou": 0.66796875, | |
| "loss_num": 0.00830078125, | |
| "loss_xval": 0.04150390625, | |
| "num_input_tokens_seen": 46791948, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7083333333333334, | |
| "grad_norm": 5.792546307908184, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0812, | |
| "num_input_tokens_seen": 46964400, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7083333333333334, | |
| "loss": 0.07085588574409485, | |
| "loss_ce": 0.00013139640213921666, | |
| "loss_iou": 0.671875, | |
| "loss_num": 0.01416015625, | |
| "loss_xval": 0.07080078125, | |
| "num_input_tokens_seen": 46964400, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7109375, | |
| "grad_norm": 8.864261104979098, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0973, | |
| "num_input_tokens_seen": 47137156, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7109375, | |
| "loss": 0.08093667030334473, | |
| "loss_ce": 0.00015664326201658696, | |
| "loss_iou": 0.640625, | |
| "loss_num": 0.0162353515625, | |
| "loss_xval": 0.08056640625, | |
| "num_input_tokens_seen": 47137156, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7135416666666666, | |
| "grad_norm": 3.8762493026111633, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0958, | |
| "num_input_tokens_seen": 47309640, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7135416666666666, | |
| "loss": 0.1435449719429016, | |
| "loss_ce": 5.13083505211398e-05, | |
| "loss_iou": 0.5234375, | |
| "loss_num": 0.0286865234375, | |
| "loss_xval": 0.1435546875, | |
| "num_input_tokens_seen": 47309640, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7161458333333334, | |
| "grad_norm": 4.845607455502515, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0587, | |
| "num_input_tokens_seen": 47482920, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7161458333333334, | |
| "loss": 0.06996987760066986, | |
| "loss_ce": 0.000145662619615905, | |
| "loss_iou": 0.45703125, | |
| "loss_num": 0.01397705078125, | |
| "loss_xval": 0.06982421875, | |
| "num_input_tokens_seen": 47482920, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.71875, | |
| "grad_norm": 6.023028440412175, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1017, | |
| "num_input_tokens_seen": 47655164, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.71875, | |
| "loss": 0.11321437358856201, | |
| "loss_ce": 5.519590195035562e-05, | |
| "loss_iou": 0.58984375, | |
| "loss_num": 0.0225830078125, | |
| "loss_xval": 0.11328125, | |
| "num_input_tokens_seen": 47655164, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7213541666666666, | |
| "grad_norm": 4.375656119857942, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0618, | |
| "num_input_tokens_seen": 47827856, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7213541666666666, | |
| "loss": 0.0524156428873539, | |
| "loss_ce": 0.0002611021918710321, | |
| "loss_iou": 0.73046875, | |
| "loss_num": 0.01043701171875, | |
| "loss_xval": 0.05224609375, | |
| "num_input_tokens_seen": 47827856, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7239583333333334, | |
| "grad_norm": 3.478066675039873, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0642, | |
| "num_input_tokens_seen": 48000956, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7239583333333334, | |
| "loss": 0.03859657049179077, | |
| "loss_ce": 5.2869407227262855e-05, | |
| "loss_iou": 0.474609375, | |
| "loss_num": 0.007720947265625, | |
| "loss_xval": 0.03857421875, | |
| "num_input_tokens_seen": 48000956, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7265625, | |
| "grad_norm": 10.669002227751372, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0673, | |
| "num_input_tokens_seen": 48173420, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7265625, | |
| "loss": 0.04094023257493973, | |
| "loss_ce": 0.00010771260713227093, | |
| "loss_iou": 0.482421875, | |
| "loss_num": 0.0081787109375, | |
| "loss_xval": 0.040771484375, | |
| "num_input_tokens_seen": 48173420, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7291666666666666, | |
| "grad_norm": 6.013727130209973, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0843, | |
| "num_input_tokens_seen": 48346040, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7291666666666666, | |
| "loss": 0.06400243937969208, | |
| "loss_ce": 6.811654020566493e-05, | |
| "loss_iou": 0.5546875, | |
| "loss_num": 0.0128173828125, | |
| "loss_xval": 0.06396484375, | |
| "num_input_tokens_seen": 48346040, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7317708333333334, | |
| "grad_norm": 6.320025783309937, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0776, | |
| "num_input_tokens_seen": 48518684, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7317708333333334, | |
| "loss": 0.08228301256895065, | |
| "loss_ce": 0.00012969484669156373, | |
| "loss_iou": 0.7421875, | |
| "loss_num": 0.0164794921875, | |
| "loss_xval": 0.08203125, | |
| "num_input_tokens_seen": 48518684, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.734375, | |
| "grad_norm": 2.3539480804430353, | |
| "learning_rate": 5e-06, | |
| "loss": 0.064, | |
| "num_input_tokens_seen": 48691296, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.734375, | |
| "loss": 0.06360374391078949, | |
| "loss_ce": 5.089196565677412e-05, | |
| "loss_iou": 0.55859375, | |
| "loss_num": 0.0126953125, | |
| "loss_xval": 0.0634765625, | |
| "num_input_tokens_seen": 48691296, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7369791666666666, | |
| "grad_norm": 4.165777643617544, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0581, | |
| "num_input_tokens_seen": 48864252, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7369791666666666, | |
| "loss": 0.07076792418956757, | |
| "loss_ce": 5.869198503205553e-05, | |
| "loss_iou": 0.455078125, | |
| "loss_num": 0.01409912109375, | |
| "loss_xval": 0.07080078125, | |
| "num_input_tokens_seen": 48864252, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7395833333333334, | |
| "grad_norm": 4.530184060910693, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0857, | |
| "num_input_tokens_seen": 49037116, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7395833333333334, | |
| "loss": 0.10114337503910065, | |
| "loss_ce": 6.914998812135309e-05, | |
| "loss_iou": 0.66015625, | |
| "loss_num": 0.020263671875, | |
| "loss_xval": 0.10107421875, | |
| "num_input_tokens_seen": 49037116, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7421875, | |
| "grad_norm": 7.025143291686679, | |
| "learning_rate": 5e-06, | |
| "loss": 0.09, | |
| "num_input_tokens_seen": 49209880, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7421875, | |
| "loss": 0.07852576673030853, | |
| "loss_ce": 3.455359546933323e-05, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.0157470703125, | |
| "loss_xval": 0.07861328125, | |
| "num_input_tokens_seen": 49209880, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7447916666666666, | |
| "grad_norm": 8.858211776100614, | |
| "learning_rate": 5e-06, | |
| "loss": 0.084, | |
| "num_input_tokens_seen": 49381700, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7447916666666666, | |
| "loss": 0.05212024226784706, | |
| "loss_ce": 5.7252564147347584e-05, | |
| "loss_iou": 0.7265625, | |
| "loss_num": 0.01043701171875, | |
| "loss_xval": 0.052001953125, | |
| "num_input_tokens_seen": 49381700, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7473958333333334, | |
| "grad_norm": 3.6537179877047663, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0989, | |
| "num_input_tokens_seen": 49554536, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7473958333333334, | |
| "loss": 0.055037256330251694, | |
| "loss_ce": 2.9322651244001463e-05, | |
| "loss_iou": 0.546875, | |
| "loss_num": 0.010986328125, | |
| "loss_xval": 0.054931640625, | |
| "num_input_tokens_seen": 49554536, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 5.570461350284086, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0772, | |
| "num_input_tokens_seen": 49726396, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "loss": 0.07801353931427002, | |
| "loss_ce": 7.164124690461904e-05, | |
| "loss_iou": 0.451171875, | |
| "loss_num": 0.015625, | |
| "loss_xval": 0.078125, | |
| "num_input_tokens_seen": 49726396, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7526041666666666, | |
| "grad_norm": 5.806990578827175, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0737, | |
| "num_input_tokens_seen": 49899536, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7526041666666666, | |
| "loss": 0.09019728004932404, | |
| "loss_ce": 4.8357818741351366e-05, | |
| "loss_iou": 0.46484375, | |
| "loss_num": 0.01806640625, | |
| "loss_xval": 0.09033203125, | |
| "num_input_tokens_seen": 49899536, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7552083333333334, | |
| "grad_norm": 6.584433746493665, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0753, | |
| "num_input_tokens_seen": 50072028, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7552083333333334, | |
| "loss": 0.05955757200717926, | |
| "loss_ce": 0.00013984768884256482, | |
| "loss_iou": 0.5, | |
| "loss_num": 0.01190185546875, | |
| "loss_xval": 0.059326171875, | |
| "num_input_tokens_seen": 50072028, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7578125, | |
| "grad_norm": 4.769362882722307, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0836, | |
| "num_input_tokens_seen": 50244788, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7578125, | |
| "loss": 0.08353784680366516, | |
| "loss_ce": 4.175720823695883e-05, | |
| "loss_iou": 0.58203125, | |
| "loss_num": 0.0167236328125, | |
| "loss_xval": 0.08349609375, | |
| "num_input_tokens_seen": 50244788, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7604166666666666, | |
| "grad_norm": 4.630970710069874, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0924, | |
| "num_input_tokens_seen": 50417020, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7604166666666666, | |
| "loss": 0.0867491364479065, | |
| "loss_ce": 6.395512173185125e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.017333984375, | |
| "loss_xval": 0.0869140625, | |
| "num_input_tokens_seen": 50417020, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7630208333333334, | |
| "grad_norm": 4.771052495662392, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0789, | |
| "num_input_tokens_seen": 50589288, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7630208333333334, | |
| "loss": 0.04268595576286316, | |
| "loss_ce": 3.76409079763107e-05, | |
| "loss_iou": 0.62109375, | |
| "loss_num": 0.008544921875, | |
| "loss_xval": 0.042724609375, | |
| "num_input_tokens_seen": 50589288, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.765625, | |
| "grad_norm": 5.549980291826297, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1124, | |
| "num_input_tokens_seen": 50762276, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.765625, | |
| "loss": 0.08056493103504181, | |
| "loss_ce": 5.956060340395197e-05, | |
| "loss_iou": 0.58203125, | |
| "loss_num": 0.01611328125, | |
| "loss_xval": 0.08056640625, | |
| "num_input_tokens_seen": 50762276, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.7682291666666666, | |
| "grad_norm": 58.66835057028341, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0912, | |
| "num_input_tokens_seen": 50935292, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.7682291666666666, | |
| "loss": 0.12624840438365936, | |
| "loss_ce": 5.8218334743287414e-05, | |
| "loss_iou": 0.72265625, | |
| "loss_num": 0.0252685546875, | |
| "loss_xval": 0.1259765625, | |
| "num_input_tokens_seen": 50935292, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.7708333333333334, | |
| "grad_norm": 5.644622915739686, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0868, | |
| "num_input_tokens_seen": 51108336, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7708333333333334, | |
| "loss": 0.1262531876564026, | |
| "loss_ce": 9.352029883302748e-05, | |
| "loss_iou": 0.6015625, | |
| "loss_num": 0.0252685546875, | |
| "loss_xval": 0.1259765625, | |
| "num_input_tokens_seen": 51108336, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7734375, | |
| "grad_norm": 9.321237615443232, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1107, | |
| "num_input_tokens_seen": 51280676, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7734375, | |
| "loss": 0.08918002992868423, | |
| "loss_ce": 6.8700457632076e-05, | |
| "loss_iou": 0.6484375, | |
| "loss_num": 0.017822265625, | |
| "loss_xval": 0.0888671875, | |
| "num_input_tokens_seen": 51280676, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7760416666666666, | |
| "grad_norm": 8.413905673909936, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0788, | |
| "num_input_tokens_seen": 51452600, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7760416666666666, | |
| "loss": 0.06944364309310913, | |
| "loss_ce": 7.71840859670192e-05, | |
| "loss_iou": 0.53125, | |
| "loss_num": 0.01385498046875, | |
| "loss_xval": 0.0693359375, | |
| "num_input_tokens_seen": 51452600, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7786458333333334, | |
| "grad_norm": 3.9297688671160738, | |
| "learning_rate": 5e-06, | |
| "loss": 0.08, | |
| "num_input_tokens_seen": 51625560, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.7786458333333334, | |
| "loss": 0.12099509686231613, | |
| "loss_ce": 0.00014548808394465595, | |
| "loss_iou": 0.447265625, | |
| "loss_num": 0.024169921875, | |
| "loss_xval": 0.12109375, | |
| "num_input_tokens_seen": 51625560, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.78125, | |
| "grad_norm": 8.840803926190146, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0903, | |
| "num_input_tokens_seen": 51797848, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.78125, | |
| "loss": 0.08386749029159546, | |
| "loss_ce": 0.00018828835163731128, | |
| "loss_iou": 0.62890625, | |
| "loss_num": 0.0167236328125, | |
| "loss_xval": 0.08349609375, | |
| "num_input_tokens_seen": 51797848, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7838541666666666, | |
| "grad_norm": 6.267252913184398, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0768, | |
| "num_input_tokens_seen": 51970968, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.7838541666666666, | |
| "loss": 0.08612730354070663, | |
| "loss_ce": 0.000205064527108334, | |
| "loss_iou": 0.470703125, | |
| "loss_num": 0.0172119140625, | |
| "loss_xval": 0.0859375, | |
| "num_input_tokens_seen": 51970968, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.7864583333333334, | |
| "grad_norm": 5.712597753331284, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0856, | |
| "num_input_tokens_seen": 52143656, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7864583333333334, | |
| "loss": 0.14908897876739502, | |
| "loss_ce": 0.00016319258429575711, | |
| "loss_iou": 0.6796875, | |
| "loss_num": 0.02978515625, | |
| "loss_xval": 0.1484375, | |
| "num_input_tokens_seen": 52143656, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7890625, | |
| "grad_norm": 8.35751018269278, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0759, | |
| "num_input_tokens_seen": 52316820, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.7890625, | |
| "loss": 0.059616073966026306, | |
| "loss_ce": 7.628079038113356e-05, | |
| "loss_iou": 0.5546875, | |
| "loss_num": 0.01190185546875, | |
| "loss_xval": 0.0595703125, | |
| "num_input_tokens_seen": 52316820, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.7916666666666666, | |
| "grad_norm": 21.438956075626194, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0912, | |
| "num_input_tokens_seen": 52489896, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7916666666666666, | |
| "loss": 0.10695922374725342, | |
| "loss_ce": 0.00010192444460699335, | |
| "loss_iou": 0.6171875, | |
| "loss_num": 0.0213623046875, | |
| "loss_xval": 0.10693359375, | |
| "num_input_tokens_seen": 52489896, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7942708333333334, | |
| "grad_norm": 11.563280258074105, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0721, | |
| "num_input_tokens_seen": 52662040, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.7942708333333334, | |
| "loss": 0.08778760582208633, | |
| "loss_ce": 4.956443444825709e-05, | |
| "loss_iou": 0.609375, | |
| "loss_num": 0.017578125, | |
| "loss_xval": 0.087890625, | |
| "num_input_tokens_seen": 52662040, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.796875, | |
| "grad_norm": 3.1582836546422683, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0762, | |
| "num_input_tokens_seen": 52833528, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.796875, | |
| "loss": 0.048152316361665726, | |
| "loss_ce": 5.6614066124893725e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0096435546875, | |
| "loss_xval": 0.048095703125, | |
| "num_input_tokens_seen": 52833528, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7994791666666666, | |
| "grad_norm": 3.9541505621592403, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0728, | |
| "num_input_tokens_seen": 53006328, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.7994791666666666, | |
| "loss": 0.07045421004295349, | |
| "loss_ce": 6.541772745549679e-05, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.01409912109375, | |
| "loss_xval": 0.0703125, | |
| "num_input_tokens_seen": 53006328, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8020833333333334, | |
| "grad_norm": 21.04478597433239, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0841, | |
| "num_input_tokens_seen": 53179340, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8020833333333334, | |
| "loss": 0.04340720921754837, | |
| "loss_ce": 7.224958972074091e-05, | |
| "loss_iou": 0.486328125, | |
| "loss_num": 0.0086669921875, | |
| "loss_xval": 0.04345703125, | |
| "num_input_tokens_seen": 53179340, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8046875, | |
| "grad_norm": 36.45731809620038, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0903, | |
| "num_input_tokens_seen": 53352024, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.8046875, | |
| "loss": 0.0791003406047821, | |
| "loss_ce": 9.0329660451971e-05, | |
| "loss_iou": 0.59375, | |
| "loss_num": 0.0157470703125, | |
| "loss_xval": 0.0791015625, | |
| "num_input_tokens_seen": 53352024, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.8072916666666666, | |
| "grad_norm": 3.6922772893156828, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0982, | |
| "num_input_tokens_seen": 53524908, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8072916666666666, | |
| "loss": 0.06811343133449554, | |
| "loss_ce": 0.00012026849435642362, | |
| "loss_iou": 0.546875, | |
| "loss_num": 0.01361083984375, | |
| "loss_xval": 0.06787109375, | |
| "num_input_tokens_seen": 53524908, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8098958333333334, | |
| "grad_norm": 27.046913168708976, | |
| "learning_rate": 5e-06, | |
| "loss": 0.081, | |
| "num_input_tokens_seen": 53696732, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.8098958333333334, | |
| "loss": 0.0632261261343956, | |
| "loss_ce": 3.94820308429189e-05, | |
| "loss_iou": 0.62109375, | |
| "loss_num": 0.01263427734375, | |
| "loss_xval": 0.06298828125, | |
| "num_input_tokens_seen": 53696732, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.8125, | |
| "grad_norm": 14.857627339858754, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1017, | |
| "num_input_tokens_seen": 53869308, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.8125, | |
| "loss": 0.07280921936035156, | |
| "loss_ce": 0.0001468673290219158, | |
| "loss_iou": 0.482421875, | |
| "loss_num": 0.0145263671875, | |
| "loss_xval": 0.07275390625, | |
| "num_input_tokens_seen": 53869308, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.8151041666666666, | |
| "grad_norm": 4.652815682219442, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0804, | |
| "num_input_tokens_seen": 54042004, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.8151041666666666, | |
| "loss": 0.08997043967247009, | |
| "loss_ce": 3.514082345645875e-05, | |
| "loss_iou": 0.73046875, | |
| "loss_num": 0.0179443359375, | |
| "loss_xval": 0.08984375, | |
| "num_input_tokens_seen": 54042004, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.8177083333333334, | |
| "grad_norm": 4.472330671881049, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0766, | |
| "num_input_tokens_seen": 54214544, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.8177083333333334, | |
| "loss": 0.05066576227545738, | |
| "loss_ce": 3.710209784912877e-05, | |
| "loss_iou": 0.462890625, | |
| "loss_num": 0.0101318359375, | |
| "loss_xval": 0.050537109375, | |
| "num_input_tokens_seen": 54214544, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.8203125, | |
| "grad_norm": 14.395534068995472, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0834, | |
| "num_input_tokens_seen": 54387032, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8203125, | |
| "loss": 0.11046263575553894, | |
| "loss_ce": 5.003847763873637e-05, | |
| "loss_iou": 0.609375, | |
| "loss_num": 0.0220947265625, | |
| "loss_xval": 0.1103515625, | |
| "num_input_tokens_seen": 54387032, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8229166666666666, | |
| "grad_norm": 5.6405315516941545, | |
| "learning_rate": 5e-06, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 54559764, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8229166666666666, | |
| "loss": 0.07865004241466522, | |
| "loss_ce": 0.00015882565639913082, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.0157470703125, | |
| "loss_xval": 0.07861328125, | |
| "num_input_tokens_seen": 54559764, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8255208333333334, | |
| "grad_norm": 8.404403222163058, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0788, | |
| "num_input_tokens_seen": 54732960, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.8255208333333334, | |
| "loss": 0.09951162338256836, | |
| "loss_ce": 5.4833071772009134e-05, | |
| "loss_iou": 0.462890625, | |
| "loss_num": 0.0198974609375, | |
| "loss_xval": 0.099609375, | |
| "num_input_tokens_seen": 54732960, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.828125, | |
| "grad_norm": 12.856336033562837, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0745, | |
| "num_input_tokens_seen": 54905888, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.828125, | |
| "loss": 0.05548687279224396, | |
| "loss_ce": 0.00015850822092033923, | |
| "loss_iou": 0.5078125, | |
| "loss_num": 0.0111083984375, | |
| "loss_xval": 0.055419921875, | |
| "num_input_tokens_seen": 54905888, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8307291666666666, | |
| "grad_norm": 7.5015307945338545, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0814, | |
| "num_input_tokens_seen": 55078584, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8307291666666666, | |
| "loss": 0.09399284422397614, | |
| "loss_ce": 0.00018180246115662158, | |
| "loss_iou": 0.8203125, | |
| "loss_num": 0.018798828125, | |
| "loss_xval": 0.09375, | |
| "num_input_tokens_seen": 55078584, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 3.549717733561083, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0888, | |
| "num_input_tokens_seen": 55251416, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "loss": 0.10571445524692535, | |
| "loss_ce": 0.00012363299902062863, | |
| "loss_iou": 0.390625, | |
| "loss_num": 0.0211181640625, | |
| "loss_xval": 0.10546875, | |
| "num_input_tokens_seen": 55251416, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8359375, | |
| "grad_norm": 15.137913189345245, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0813, | |
| "num_input_tokens_seen": 55424308, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8359375, | |
| "loss": 0.12823191285133362, | |
| "loss_ce": 0.0001038559275912121, | |
| "loss_iou": 0.423828125, | |
| "loss_num": 0.025634765625, | |
| "loss_xval": 0.1279296875, | |
| "num_input_tokens_seen": 55424308, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8385416666666666, | |
| "grad_norm": 4.928878873643115, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1069, | |
| "num_input_tokens_seen": 55597376, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8385416666666666, | |
| "loss": 0.1322542130947113, | |
| "loss_ce": 2.1536015992751345e-05, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.0264892578125, | |
| "loss_xval": 0.1318359375, | |
| "num_input_tokens_seen": 55597376, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8411458333333334, | |
| "grad_norm": 19.737058147658324, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0898, | |
| "num_input_tokens_seen": 55769600, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8411458333333334, | |
| "loss": 0.17265748977661133, | |
| "loss_ce": 8.058187086135149e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.034423828125, | |
| "loss_xval": 0.1728515625, | |
| "num_input_tokens_seen": 55769600, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.84375, | |
| "grad_norm": 7.152491955998749, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0743, | |
| "num_input_tokens_seen": 55942580, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.84375, | |
| "loss": 0.059846702963113785, | |
| "loss_ce": 6.276796921156347e-05, | |
| "loss_iou": 0.5234375, | |
| "loss_num": 0.011962890625, | |
| "loss_xval": 0.059814453125, | |
| "num_input_tokens_seen": 55942580, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8463541666666666, | |
| "grad_norm": 6.664096474807532, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0831, | |
| "num_input_tokens_seen": 56115528, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8463541666666666, | |
| "loss": 0.06182098388671875, | |
| "loss_ce": 3.814647061517462e-05, | |
| "loss_iou": 0.7734375, | |
| "loss_num": 0.01239013671875, | |
| "loss_xval": 0.061767578125, | |
| "num_input_tokens_seen": 56115528, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8489583333333334, | |
| "grad_norm": 4.533780334308584, | |
| "learning_rate": 5e-06, | |
| "loss": 0.085, | |
| "num_input_tokens_seen": 56288484, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8489583333333334, | |
| "loss": 0.10065165907144547, | |
| "loss_ce": 0.00015727368008811027, | |
| "loss_iou": 0.4453125, | |
| "loss_num": 0.0201416015625, | |
| "loss_xval": 0.1005859375, | |
| "num_input_tokens_seen": 56288484, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8515625, | |
| "grad_norm": 4.639727507170639, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0767, | |
| "num_input_tokens_seen": 56460840, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8515625, | |
| "loss": 0.0594358891248703, | |
| "loss_ce": 7.919950439827517e-05, | |
| "loss_iou": 0.69140625, | |
| "loss_num": 0.01190185546875, | |
| "loss_xval": 0.059326171875, | |
| "num_input_tokens_seen": 56460840, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8541666666666666, | |
| "grad_norm": 4.945822599515496, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0809, | |
| "num_input_tokens_seen": 56633612, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8541666666666666, | |
| "loss": 0.10031691938638687, | |
| "loss_ce": 3.615960304159671e-05, | |
| "loss_iou": 0.462890625, | |
| "loss_num": 0.02001953125, | |
| "loss_xval": 0.10009765625, | |
| "num_input_tokens_seen": 56633612, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8567708333333334, | |
| "grad_norm": 17.721130156863943, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0852, | |
| "num_input_tokens_seen": 56806644, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.8567708333333334, | |
| "loss": 0.06605279445648193, | |
| "loss_ce": 0.0002263778733322397, | |
| "loss_iou": 0.60546875, | |
| "loss_num": 0.01318359375, | |
| "loss_xval": 0.06591796875, | |
| "num_input_tokens_seen": 56806644, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.859375, | |
| "grad_norm": 4.228842310344442, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0829, | |
| "num_input_tokens_seen": 56978832, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.859375, | |
| "loss": 0.09333358705043793, | |
| "loss_ce": 7.186534639913589e-05, | |
| "loss_iou": 0.48046875, | |
| "loss_num": 0.0186767578125, | |
| "loss_xval": 0.09326171875, | |
| "num_input_tokens_seen": 56978832, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8619791666666666, | |
| "grad_norm": 4.379026646689163, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0708, | |
| "num_input_tokens_seen": 57151476, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8619791666666666, | |
| "loss": 0.05179120972752571, | |
| "loss_ce": 6.39162608422339e-05, | |
| "loss_iou": 0.49609375, | |
| "loss_num": 0.0103759765625, | |
| "loss_xval": 0.0517578125, | |
| "num_input_tokens_seen": 57151476, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8645833333333334, | |
| "grad_norm": 9.125447816364591, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0697, | |
| "num_input_tokens_seen": 57323400, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8645833333333334, | |
| "loss": 0.10480596870183945, | |
| "loss_ce": 6.964314525248483e-05, | |
| "loss_iou": 0.51171875, | |
| "loss_num": 0.02099609375, | |
| "loss_xval": 0.1044921875, | |
| "num_input_tokens_seen": 57323400, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8671875, | |
| "grad_norm": 5.241494036335466, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0617, | |
| "num_input_tokens_seen": 57496068, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8671875, | |
| "loss": 0.06167437136173248, | |
| "loss_ce": 0.00012041500303894281, | |
| "loss_iou": 0.71875, | |
| "loss_num": 0.0123291015625, | |
| "loss_xval": 0.0615234375, | |
| "num_input_tokens_seen": 57496068, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8697916666666666, | |
| "grad_norm": 3.7921802750366664, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0859, | |
| "num_input_tokens_seen": 57669236, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8697916666666666, | |
| "loss": 0.03953123837709427, | |
| "loss_ce": 0.00020934098574798554, | |
| "loss_iou": 0.47265625, | |
| "loss_num": 0.00787353515625, | |
| "loss_xval": 0.039306640625, | |
| "num_input_tokens_seen": 57669236, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8723958333333334, | |
| "grad_norm": 7.046054210110739, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0857, | |
| "num_input_tokens_seen": 57841676, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.8723958333333334, | |
| "loss": 0.13086940348148346, | |
| "loss_ce": 4.054443706991151e-05, | |
| "loss_iou": 0.46875, | |
| "loss_num": 0.026123046875, | |
| "loss_xval": 0.130859375, | |
| "num_input_tokens_seen": 57841676, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.875, | |
| "grad_norm": 7.658169223957337, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0927, | |
| "num_input_tokens_seen": 58015076, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.875, | |
| "loss": 0.03890954330563545, | |
| "loss_ce": 0.0006252414314076304, | |
| "loss_iou": 0.52734375, | |
| "loss_num": 0.007659912109375, | |
| "loss_xval": 0.038330078125, | |
| "num_input_tokens_seen": 58015076, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.8776041666666666, | |
| "grad_norm": 4.319289196507174, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0931, | |
| "num_input_tokens_seen": 58187592, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.8776041666666666, | |
| "loss": 0.10160954296588898, | |
| "loss_ce": 4.7046225517988205e-05, | |
| "loss_iou": 0.625, | |
| "loss_num": 0.0203857421875, | |
| "loss_xval": 0.1015625, | |
| "num_input_tokens_seen": 58187592, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.8802083333333334, | |
| "grad_norm": 8.26220496195536, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0691, | |
| "num_input_tokens_seen": 58360220, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.8802083333333334, | |
| "loss": 0.05522051081061363, | |
| "loss_ce": 4.472649015951902e-05, | |
| "loss_iou": 0.44140625, | |
| "loss_num": 0.01104736328125, | |
| "loss_xval": 0.05517578125, | |
| "num_input_tokens_seen": 58360220, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.8828125, | |
| "grad_norm": 3.8822556756341036, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0498, | |
| "num_input_tokens_seen": 58532536, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.8828125, | |
| "loss": 0.07371848821640015, | |
| "loss_ce": 7.957669731695205e-05, | |
| "loss_iou": 0.515625, | |
| "loss_num": 0.01470947265625, | |
| "loss_xval": 0.07373046875, | |
| "num_input_tokens_seen": 58532536, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.8854166666666666, | |
| "grad_norm": 9.884171334560891, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0594, | |
| "num_input_tokens_seen": 58705004, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8854166666666666, | |
| "loss": 0.06320229917764664, | |
| "loss_ce": 6.143321661511436e-05, | |
| "loss_iou": 0.44921875, | |
| "loss_num": 0.01263427734375, | |
| "loss_xval": 0.06298828125, | |
| "num_input_tokens_seen": 58705004, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8880208333333334, | |
| "grad_norm": 5.196359666592977, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0404, | |
| "num_input_tokens_seen": 58878152, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.8880208333333334, | |
| "loss": 0.03186158090829849, | |
| "loss_ce": 0.0002453709894325584, | |
| "loss_iou": 0.546875, | |
| "loss_num": 0.006317138671875, | |
| "loss_xval": 0.03173828125, | |
| "num_input_tokens_seen": 58878152, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.890625, | |
| "grad_norm": 6.15237627529603, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0741, | |
| "num_input_tokens_seen": 59050440, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.890625, | |
| "loss": 0.05918329954147339, | |
| "loss_ce": 5.5491131206508726e-05, | |
| "loss_iou": 0.36328125, | |
| "loss_num": 0.0118408203125, | |
| "loss_xval": 0.05908203125, | |
| "num_input_tokens_seen": 59050440, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.8932291666666666, | |
| "grad_norm": 2.027289516848372, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0797, | |
| "num_input_tokens_seen": 59223528, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.8932291666666666, | |
| "loss": 0.0674634724855423, | |
| "loss_ce": 3.488633592496626e-05, | |
| "loss_iou": 0.443359375, | |
| "loss_num": 0.01348876953125, | |
| "loss_xval": 0.0673828125, | |
| "num_input_tokens_seen": 59223528, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.8958333333333334, | |
| "grad_norm": 3.3607059104825554, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0925, | |
| "num_input_tokens_seen": 59396320, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.8958333333333334, | |
| "loss": 0.04388820007443428, | |
| "loss_ce": 8.021650137379766e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0087890625, | |
| "loss_xval": 0.043701171875, | |
| "num_input_tokens_seen": 59396320, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.8984375, | |
| "grad_norm": 11.313403126591554, | |
| "learning_rate": 5e-06, | |
| "loss": 0.11, | |
| "num_input_tokens_seen": 59568904, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.8984375, | |
| "loss": 0.059322062879800797, | |
| "loss_ce": 8.744518709136173e-05, | |
| "loss_iou": 0.5, | |
| "loss_num": 0.0118408203125, | |
| "loss_xval": 0.059326171875, | |
| "num_input_tokens_seen": 59568904, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.9010416666666666, | |
| "grad_norm": 9.716589837853562, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0927, | |
| "num_input_tokens_seen": 59741504, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9010416666666666, | |
| "loss": 0.08992569893598557, | |
| "loss_ce": 5.143693124409765e-05, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.0179443359375, | |
| "loss_xval": 0.08984375, | |
| "num_input_tokens_seen": 59741504, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9036458333333334, | |
| "grad_norm": 4.9130642144499985, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0664, | |
| "num_input_tokens_seen": 59913580, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.9036458333333334, | |
| "loss": 0.059036046266555786, | |
| "loss_ce": 6.082511754357256e-05, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.01177978515625, | |
| "loss_xval": 0.05908203125, | |
| "num_input_tokens_seen": 59913580, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.90625, | |
| "grad_norm": 5.535144728019767, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0459, | |
| "num_input_tokens_seen": 60086300, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.90625, | |
| "loss": 0.040458932518959045, | |
| "loss_ce": 6.891523662488908e-05, | |
| "loss_iou": 0.62109375, | |
| "loss_num": 0.008056640625, | |
| "loss_xval": 0.040283203125, | |
| "num_input_tokens_seen": 60086300, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9088541666666666, | |
| "grad_norm": 8.128924901708682, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1183, | |
| "num_input_tokens_seen": 60258804, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.9088541666666666, | |
| "loss": 0.10604314506053925, | |
| "loss_ce": 2.508011857571546e-05, | |
| "loss_iou": 0.58984375, | |
| "loss_num": 0.021240234375, | |
| "loss_xval": 0.10595703125, | |
| "num_input_tokens_seen": 60258804, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.9114583333333334, | |
| "grad_norm": 6.130745719562545, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1182, | |
| "num_input_tokens_seen": 60431928, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9114583333333334, | |
| "loss": 0.137631356716156, | |
| "loss_ce": 5.8111756516154855e-05, | |
| "loss_iou": 0.671875, | |
| "loss_num": 0.0274658203125, | |
| "loss_xval": 0.1376953125, | |
| "num_input_tokens_seen": 60431928, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9140625, | |
| "grad_norm": 6.6090310971417345, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0682, | |
| "num_input_tokens_seen": 60604596, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.9140625, | |
| "loss": 0.03837839514017105, | |
| "loss_ce": 0.0008875515777617693, | |
| "loss_iou": 0.5078125, | |
| "loss_num": 0.00750732421875, | |
| "loss_xval": 0.03759765625, | |
| "num_input_tokens_seen": 60604596, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.9166666666666666, | |
| "grad_norm": 37.977702783393255, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0839, | |
| "num_input_tokens_seen": 60777696, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9166666666666666, | |
| "loss": 0.05776657909154892, | |
| "loss_ce": 7.309722423087806e-05, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.01153564453125, | |
| "loss_xval": 0.0576171875, | |
| "num_input_tokens_seen": 60777696, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9192708333333334, | |
| "grad_norm": 10.793340791159972, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1036, | |
| "num_input_tokens_seen": 60950176, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.9192708333333334, | |
| "loss": 0.10405679047107697, | |
| "loss_ce": 5.2886520279571414e-05, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.020751953125, | |
| "loss_xval": 0.10400390625, | |
| "num_input_tokens_seen": 60950176, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.921875, | |
| "grad_norm": 4.2624655031129395, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0638, | |
| "num_input_tokens_seen": 61123352, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.921875, | |
| "loss": 0.045672204345464706, | |
| "loss_ce": 4.842308408115059e-05, | |
| "loss_iou": 0.640625, | |
| "loss_num": 0.0091552734375, | |
| "loss_xval": 0.045654296875, | |
| "num_input_tokens_seen": 61123352, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9244791666666666, | |
| "grad_norm": 5.263551596545367, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0836, | |
| "num_input_tokens_seen": 61296296, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.9244791666666666, | |
| "loss": 0.07579399645328522, | |
| "loss_ce": 1.8844926671590656e-05, | |
| "loss_iou": 0.484375, | |
| "loss_num": 0.01513671875, | |
| "loss_xval": 0.07568359375, | |
| "num_input_tokens_seen": 61296296, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.9270833333333334, | |
| "grad_norm": 4.969020675022387, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1161, | |
| "num_input_tokens_seen": 61468464, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9270833333333334, | |
| "loss": 0.0982382521033287, | |
| "loss_ce": 9.372214117320254e-05, | |
| "loss_iou": 0.474609375, | |
| "loss_num": 0.0196533203125, | |
| "loss_xval": 0.09814453125, | |
| "num_input_tokens_seen": 61468464, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9296875, | |
| "grad_norm": 9.751227404400339, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0667, | |
| "num_input_tokens_seen": 61641104, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9296875, | |
| "loss": 0.08714728057384491, | |
| "loss_ce": 6.537619628943503e-05, | |
| "loss_iou": 0.4921875, | |
| "loss_num": 0.0174560546875, | |
| "loss_xval": 0.0869140625, | |
| "num_input_tokens_seen": 61641104, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9322916666666666, | |
| "grad_norm": 3.705998309698105, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0614, | |
| "num_input_tokens_seen": 61813956, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9322916666666666, | |
| "loss": 0.05996260046958923, | |
| "loss_ce": 7.185334106907248e-05, | |
| "loss_iou": 0.439453125, | |
| "loss_num": 0.011962890625, | |
| "loss_xval": 0.059814453125, | |
| "num_input_tokens_seen": 61813956, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9348958333333334, | |
| "grad_norm": 5.61843483400317, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0953, | |
| "num_input_tokens_seen": 61987068, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.9348958333333334, | |
| "loss": 0.1060662716627121, | |
| "loss_ce": 4.819741297978908e-05, | |
| "loss_iou": 0.5078125, | |
| "loss_num": 0.021240234375, | |
| "loss_xval": 0.10595703125, | |
| "num_input_tokens_seen": 61987068, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.9375, | |
| "grad_norm": 4.53602826237247, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0859, | |
| "num_input_tokens_seen": 62160000, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9375, | |
| "loss": 0.12344817072153091, | |
| "loss_ce": 3.5084449336864054e-05, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.024658203125, | |
| "loss_xval": 0.12353515625, | |
| "num_input_tokens_seen": 62160000, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9401041666666666, | |
| "grad_norm": 2.382495598116124, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0654, | |
| "num_input_tokens_seen": 62332704, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9401041666666666, | |
| "loss": 0.04234257712960243, | |
| "loss_ce": 9.099017916014418e-05, | |
| "loss_iou": 0.53125, | |
| "loss_num": 0.00848388671875, | |
| "loss_xval": 0.042236328125, | |
| "num_input_tokens_seen": 62332704, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9427083333333334, | |
| "grad_norm": 3.67565808505264, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0921, | |
| "num_input_tokens_seen": 62505472, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9427083333333334, | |
| "loss": 0.1404985636472702, | |
| "loss_ce": 0.0001482181833125651, | |
| "loss_iou": 0.41015625, | |
| "loss_num": 0.028076171875, | |
| "loss_xval": 0.140625, | |
| "num_input_tokens_seen": 62505472, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9453125, | |
| "grad_norm": 4.393117034860246, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0746, | |
| "num_input_tokens_seen": 62677852, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.9453125, | |
| "loss": 0.08209509402513504, | |
| "loss_ce": 3.332511550979689e-05, | |
| "loss_iou": 0.57421875, | |
| "loss_num": 0.016357421875, | |
| "loss_xval": 0.08203125, | |
| "num_input_tokens_seen": 62677852, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.9479166666666666, | |
| "grad_norm": 56.3211081199482, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0939, | |
| "num_input_tokens_seen": 62850624, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9479166666666666, | |
| "loss": 0.06467482447624207, | |
| "loss_ce": 3.859533171635121e-05, | |
| "loss_iou": 0.53515625, | |
| "loss_num": 0.012939453125, | |
| "loss_xval": 0.064453125, | |
| "num_input_tokens_seen": 62850624, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9505208333333334, | |
| "grad_norm": 4.908757065453886, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0602, | |
| "num_input_tokens_seen": 63022912, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.9505208333333334, | |
| "loss": 0.05544174462556839, | |
| "loss_ce": 0.00011337252362864092, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.01104736328125, | |
| "loss_xval": 0.055419921875, | |
| "num_input_tokens_seen": 63022912, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.953125, | |
| "grad_norm": 7.057560906891319, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0884, | |
| "num_input_tokens_seen": 63195992, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.953125, | |
| "loss": 0.08902530372142792, | |
| "loss_ce": 3.6048379115527496e-05, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.017822265625, | |
| "loss_xval": 0.0888671875, | |
| "num_input_tokens_seen": 63195992, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.9557291666666666, | |
| "grad_norm": 4.46902192772412, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0773, | |
| "num_input_tokens_seen": 63368708, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.9557291666666666, | |
| "loss": 0.02907339483499527, | |
| "loss_ce": 3.591889617382549e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.00579833984375, | |
| "loss_xval": 0.029052734375, | |
| "num_input_tokens_seen": 63368708, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.9583333333333334, | |
| "grad_norm": 4.189625335712974, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0685, | |
| "num_input_tokens_seen": 63541312, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9583333333333334, | |
| "loss": 0.10639164596796036, | |
| "loss_ce": 8.366195834241807e-05, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.021240234375, | |
| "loss_xval": 0.1064453125, | |
| "num_input_tokens_seen": 63541312, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9609375, | |
| "grad_norm": 5.657094938839105, | |
| "learning_rate": 5e-06, | |
| "loss": 0.068, | |
| "num_input_tokens_seen": 63713968, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9609375, | |
| "loss": 0.08933991193771362, | |
| "loss_ce": 3.0215423976187594e-05, | |
| "loss_iou": 0.484375, | |
| "loss_num": 0.017822265625, | |
| "loss_xval": 0.08935546875, | |
| "num_input_tokens_seen": 63713968, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9635416666666666, | |
| "grad_norm": 5.423233634302121, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0647, | |
| "num_input_tokens_seen": 63886996, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9635416666666666, | |
| "loss": 0.06539873778820038, | |
| "loss_ce": 3.008513340319041e-05, | |
| "loss_iou": 0.59765625, | |
| "loss_num": 0.0130615234375, | |
| "loss_xval": 0.0654296875, | |
| "num_input_tokens_seen": 63886996, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9661458333333334, | |
| "grad_norm": 5.395935683494909, | |
| "learning_rate": 5e-06, | |
| "loss": 0.09, | |
| "num_input_tokens_seen": 64059660, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.9661458333333334, | |
| "loss": 0.12266229093074799, | |
| "loss_ce": 7.318713323911652e-05, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.0245361328125, | |
| "loss_xval": 0.12255859375, | |
| "num_input_tokens_seen": 64059660, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.96875, | |
| "grad_norm": 5.856107929033903, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0854, | |
| "num_input_tokens_seen": 64232096, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.96875, | |
| "loss": 0.09164264798164368, | |
| "loss_ce": 4.413935312186368e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.018310546875, | |
| "loss_xval": 0.091796875, | |
| "num_input_tokens_seen": 64232096, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9713541666666666, | |
| "grad_norm": 5.870548313752241, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0559, | |
| "num_input_tokens_seen": 64404756, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.9713541666666666, | |
| "loss": 0.046882934868335724, | |
| "loss_ce": 2.3196011170512065e-05, | |
| "loss_iou": 0.50390625, | |
| "loss_num": 0.0093994140625, | |
| "loss_xval": 0.046875, | |
| "num_input_tokens_seen": 64404756, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.9739583333333334, | |
| "grad_norm": 10.532029531276638, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0822, | |
| "num_input_tokens_seen": 64577476, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.9739583333333334, | |
| "loss": 0.07333735376596451, | |
| "loss_ce": 6.464817124651745e-05, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.0146484375, | |
| "loss_xval": 0.0732421875, | |
| "num_input_tokens_seen": 64577476, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.9765625, | |
| "grad_norm": 8.746459219065029, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0993, | |
| "num_input_tokens_seen": 64750252, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.9765625, | |
| "loss": 0.08189202845096588, | |
| "loss_ce": 2.8627566280192696e-05, | |
| "loss_iou": 0.55078125, | |
| "loss_num": 0.016357421875, | |
| "loss_xval": 0.08203125, | |
| "num_input_tokens_seen": 64750252, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.9791666666666666, | |
| "grad_norm": 4.369734068569422, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0772, | |
| "num_input_tokens_seen": 64922984, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.9791666666666666, | |
| "loss": 0.07155308127403259, | |
| "loss_ce": 5.0396010919939727e-05, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.01434326171875, | |
| "loss_xval": 0.0712890625, | |
| "num_input_tokens_seen": 64922984, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.9817708333333334, | |
| "grad_norm": 5.055558558635633, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0739, | |
| "num_input_tokens_seen": 65095228, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.9817708333333334, | |
| "loss": 0.10587326437234879, | |
| "loss_ce": 2.304443478351459e-05, | |
| "loss_iou": 0.37890625, | |
| "loss_num": 0.0211181640625, | |
| "loss_xval": 0.10595703125, | |
| "num_input_tokens_seen": 65095228, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.984375, | |
| "grad_norm": 5.286209551414624, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0876, | |
| "num_input_tokens_seen": 65267596, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.984375, | |
| "loss": 0.06380397081375122, | |
| "loss_ce": 5.275038711261004e-05, | |
| "loss_iou": 0.59375, | |
| "loss_num": 0.01275634765625, | |
| "loss_xval": 0.06396484375, | |
| "num_input_tokens_seen": 65267596, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.9869791666666666, | |
| "grad_norm": 4.779020534428804, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0801, | |
| "num_input_tokens_seen": 65439632, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.9869791666666666, | |
| "loss": 0.05044550448656082, | |
| "loss_ce": 3.0469000193988904e-05, | |
| "loss_iou": 0.68359375, | |
| "loss_num": 0.01007080078125, | |
| "loss_xval": 0.05029296875, | |
| "num_input_tokens_seen": 65439632, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.9895833333333334, | |
| "grad_norm": 4.685839131970804, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0717, | |
| "num_input_tokens_seen": 65612188, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9895833333333334, | |
| "loss": 0.05971755087375641, | |
| "loss_ce": 2.5168032152578235e-05, | |
| "loss_iou": 0.5234375, | |
| "loss_num": 0.01190185546875, | |
| "loss_xval": 0.0595703125, | |
| "num_input_tokens_seen": 65612188, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9921875, | |
| "grad_norm": 5.019679075383125, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0852, | |
| "num_input_tokens_seen": 65785132, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.9921875, | |
| "loss": 0.0729844868183136, | |
| "loss_ce": 7.799551531206816e-05, | |
| "loss_iou": 0.5390625, | |
| "loss_num": 0.01458740234375, | |
| "loss_xval": 0.07275390625, | |
| "num_input_tokens_seen": 65785132, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.9947916666666666, | |
| "grad_norm": 5.2408542210225075, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0618, | |
| "num_input_tokens_seen": 65958084, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.9947916666666666, | |
| "loss": 0.07006223499774933, | |
| "loss_ce": 2.439254785713274e-05, | |
| "loss_iou": 0.400390625, | |
| "loss_num": 0.0140380859375, | |
| "loss_xval": 0.06982421875, | |
| "num_input_tokens_seen": 65958084, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.9973958333333334, | |
| "grad_norm": 8.43973663796555, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0739, | |
| "num_input_tokens_seen": 66130316, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.9973958333333334, | |
| "loss": 0.05812692642211914, | |
| "loss_ce": 0.00012826945749111474, | |
| "loss_iou": 0.55078125, | |
| "loss_num": 0.0115966796875, | |
| "loss_xval": 0.05810546875, | |
| "num_input_tokens_seen": 66130316, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 14.446394116068738, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0949, | |
| "num_input_tokens_seen": 66302752, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "loss": 0.12496863305568695, | |
| "loss_ce": 4.493331289268099e-05, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.0250244140625, | |
| "loss_xval": 0.125, | |
| "num_input_tokens_seen": 66302752, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.0026041666666667, | |
| "grad_norm": 4.289918076646653, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0624, | |
| "num_input_tokens_seen": 66475484, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.0026041666666667, | |
| "loss": 0.04102395847439766, | |
| "loss_ce": 2.3592958314111456e-05, | |
| "loss_iou": 0.435546875, | |
| "loss_num": 0.0081787109375, | |
| "loss_xval": 0.041015625, | |
| "num_input_tokens_seen": 66475484, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.0052083333333333, | |
| "grad_norm": 4.281041154001583, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0857, | |
| "num_input_tokens_seen": 66647384, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.0052083333333333, | |
| "loss": 0.12955166399478912, | |
| "loss_ce": 3.50592345057521e-05, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.02587890625, | |
| "loss_xval": 0.1298828125, | |
| "num_input_tokens_seen": 66647384, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.0078125, | |
| "grad_norm": 9.266009885978788, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0655, | |
| "num_input_tokens_seen": 66820256, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.0078125, | |
| "loss": 0.076566182076931, | |
| "loss_ce": 4.3353000364732e-05, | |
| "loss_iou": 0.48046875, | |
| "loss_num": 0.01531982421875, | |
| "loss_xval": 0.07666015625, | |
| "num_input_tokens_seen": 66820256, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.0104166666666667, | |
| "grad_norm": 6.544465032820982, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0798, | |
| "num_input_tokens_seen": 66992440, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0104166666666667, | |
| "loss": 0.0616319440305233, | |
| "loss_ce": 4.7468380216741934e-05, | |
| "loss_iou": 0.609375, | |
| "loss_num": 0.0123291015625, | |
| "loss_xval": 0.0615234375, | |
| "num_input_tokens_seen": 66992440, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0130208333333333, | |
| "grad_norm": 5.606310567972833, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0857, | |
| "num_input_tokens_seen": 67165064, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.0130208333333333, | |
| "loss": 0.08174864202737808, | |
| "loss_ce": 2.2569187422050163e-05, | |
| "loss_iou": 0.515625, | |
| "loss_num": 0.016357421875, | |
| "loss_xval": 0.08154296875, | |
| "num_input_tokens_seen": 67165064, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.015625, | |
| "grad_norm": 11.386296445247536, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1006, | |
| "num_input_tokens_seen": 67338208, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.015625, | |
| "loss": 0.13062095642089844, | |
| "loss_ce": 6.675285840174183e-05, | |
| "loss_iou": 0.625, | |
| "loss_num": 0.026123046875, | |
| "loss_xval": 0.130859375, | |
| "num_input_tokens_seen": 67338208, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0182291666666667, | |
| "grad_norm": 9.218245759030461, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0807, | |
| "num_input_tokens_seen": 67511096, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.0182291666666667, | |
| "loss": 0.1230873167514801, | |
| "loss_ce": 4.043774606543593e-05, | |
| "loss_iou": 0.466796875, | |
| "loss_num": 0.024658203125, | |
| "loss_xval": 0.123046875, | |
| "num_input_tokens_seen": 67511096, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.0208333333333333, | |
| "grad_norm": 7.360763519108863, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0808, | |
| "num_input_tokens_seen": 67683500, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.0208333333333333, | |
| "loss": 0.04704148322343826, | |
| "loss_ce": 7.492824079236016e-05, | |
| "loss_iou": 0.69140625, | |
| "loss_num": 0.0093994140625, | |
| "loss_xval": 0.046875, | |
| "num_input_tokens_seen": 67683500, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.0234375, | |
| "grad_norm": 24.398414774258395, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0653, | |
| "num_input_tokens_seen": 67856564, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.0234375, | |
| "loss": 0.052695855498313904, | |
| "loss_ce": 6.829424819443375e-05, | |
| "loss_iou": 0.48828125, | |
| "loss_num": 0.01055908203125, | |
| "loss_xval": 0.052734375, | |
| "num_input_tokens_seen": 67856564, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.0260416666666667, | |
| "grad_norm": 3.952882418188013, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0773, | |
| "num_input_tokens_seen": 68029008, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.0260416666666667, | |
| "loss": 0.05983951687812805, | |
| "loss_ce": 5.558759949053638e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.011962890625, | |
| "loss_xval": 0.059814453125, | |
| "num_input_tokens_seen": 68029008, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.0286458333333333, | |
| "grad_norm": 6.537700275238822, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0599, | |
| "num_input_tokens_seen": 68201948, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.0286458333333333, | |
| "loss": 0.09255748987197876, | |
| "loss_ce": 4.3449574150145054e-05, | |
| "loss_iou": 0.53515625, | |
| "loss_num": 0.0185546875, | |
| "loss_xval": 0.09228515625, | |
| "num_input_tokens_seen": 68201948, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.03125, | |
| "grad_norm": 9.576915699057926, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0537, | |
| "num_input_tokens_seen": 68374640, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.03125, | |
| "loss": 0.06341977417469025, | |
| "loss_ce": 6.528654193971306e-05, | |
| "loss_iou": 0.419921875, | |
| "loss_num": 0.0126953125, | |
| "loss_xval": 0.0634765625, | |
| "num_input_tokens_seen": 68374640, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.0338541666666667, | |
| "grad_norm": 4.782272117515052, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0954, | |
| "num_input_tokens_seen": 68547628, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.0338541666666667, | |
| "loss": 0.05385718494653702, | |
| "loss_ce": 5.469346069730818e-05, | |
| "loss_iou": 0.56640625, | |
| "loss_num": 0.0107421875, | |
| "loss_xval": 0.0537109375, | |
| "num_input_tokens_seen": 68547628, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.0364583333333333, | |
| "grad_norm": 5.7075554975649485, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1176, | |
| "num_input_tokens_seen": 68720856, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.0364583333333333, | |
| "loss": 0.04491497576236725, | |
| "loss_ce": 2.361964106967207e-05, | |
| "loss_iou": 0.6953125, | |
| "loss_num": 0.00897216796875, | |
| "loss_xval": 0.044921875, | |
| "num_input_tokens_seen": 68720856, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.0390625, | |
| "grad_norm": 5.118278327904573, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0574, | |
| "num_input_tokens_seen": 68893560, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.0390625, | |
| "loss": 0.057190101593732834, | |
| "loss_ce": 6.119744648458436e-05, | |
| "loss_iou": 0.640625, | |
| "loss_num": 0.01141357421875, | |
| "loss_xval": 0.05712890625, | |
| "num_input_tokens_seen": 68893560, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.0416666666666667, | |
| "grad_norm": 4.488918016416647, | |
| "learning_rate": 5e-06, | |
| "loss": 0.064, | |
| "num_input_tokens_seen": 69066468, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0416666666666667, | |
| "loss": 0.06421714276075363, | |
| "loss_ce": 3.867531631840393e-05, | |
| "loss_iou": 0.57421875, | |
| "loss_num": 0.0128173828125, | |
| "loss_xval": 0.06396484375, | |
| "num_input_tokens_seen": 69066468, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0442708333333333, | |
| "grad_norm": 5.792990925737602, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0838, | |
| "num_input_tokens_seen": 69239188, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.0442708333333333, | |
| "loss": 0.0709276869893074, | |
| "loss_ce": 3.5352179111214355e-05, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.01416015625, | |
| "loss_xval": 0.07080078125, | |
| "num_input_tokens_seen": 69239188, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.046875, | |
| "grad_norm": 5.477877590256074, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0616, | |
| "num_input_tokens_seen": 69412048, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.046875, | |
| "loss": 0.05337923392653465, | |
| "loss_ce": 0.00012606415839400142, | |
| "loss_iou": 0.53125, | |
| "loss_num": 0.01068115234375, | |
| "loss_xval": 0.05322265625, | |
| "num_input_tokens_seen": 69412048, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.0494791666666667, | |
| "grad_norm": 7.785066348132969, | |
| "learning_rate": 5e-06, | |
| "loss": 0.077, | |
| "num_input_tokens_seen": 69584372, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.0494791666666667, | |
| "loss": 0.11938966065645218, | |
| "loss_ce": 0.0002032608463196084, | |
| "loss_iou": 0.404296875, | |
| "loss_num": 0.0238037109375, | |
| "loss_xval": 0.119140625, | |
| "num_input_tokens_seen": 69584372, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.0520833333333333, | |
| "grad_norm": 3.487837088264721, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0598, | |
| "num_input_tokens_seen": 69756908, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.0520833333333333, | |
| "loss": 0.04861289635300636, | |
| "loss_ce": 5.943168798694387e-05, | |
| "loss_iou": 0.421875, | |
| "loss_num": 0.00970458984375, | |
| "loss_xval": 0.048583984375, | |
| "num_input_tokens_seen": 69756908, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.0546875, | |
| "grad_norm": 4.5585273415308505, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0738, | |
| "num_input_tokens_seen": 69929892, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.0546875, | |
| "loss": 0.036217886954545975, | |
| "loss_ce": 5.455628706840798e-05, | |
| "loss_iou": 0.494140625, | |
| "loss_num": 0.007232666015625, | |
| "loss_xval": 0.0361328125, | |
| "num_input_tokens_seen": 69929892, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.0572916666666667, | |
| "grad_norm": 5.607953623525571, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0589, | |
| "num_input_tokens_seen": 70102304, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.0572916666666667, | |
| "loss": 0.08112768828868866, | |
| "loss_ce": 4.247991455486044e-05, | |
| "loss_iou": 0.66796875, | |
| "loss_num": 0.0162353515625, | |
| "loss_xval": 0.0810546875, | |
| "num_input_tokens_seen": 70102304, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.0598958333333333, | |
| "grad_norm": 3.224104704302036, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0706, | |
| "num_input_tokens_seen": 70274860, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.0598958333333333, | |
| "loss": 0.042282506823539734, | |
| "loss_ce": 3.09214046865236e-05, | |
| "loss_iou": 0.52734375, | |
| "loss_num": 0.0084228515625, | |
| "loss_xval": 0.042236328125, | |
| "num_input_tokens_seen": 70274860, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.0625, | |
| "grad_norm": 13.790835085548427, | |
| "learning_rate": 5e-06, | |
| "loss": 0.054, | |
| "num_input_tokens_seen": 70447752, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.0625, | |
| "loss": 0.06554967164993286, | |
| "loss_ce": 4.369396629044786e-05, | |
| "loss_iou": 0.453125, | |
| "loss_num": 0.01312255859375, | |
| "loss_xval": 0.0654296875, | |
| "num_input_tokens_seen": 70447752, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.0651041666666667, | |
| "grad_norm": 21.170926774013214, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1041, | |
| "num_input_tokens_seen": 70620408, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.0651041666666667, | |
| "loss": 0.05410638824105263, | |
| "loss_ce": 2.9238653951324522e-05, | |
| "loss_iou": 0.5625, | |
| "loss_num": 0.01080322265625, | |
| "loss_xval": 0.05419921875, | |
| "num_input_tokens_seen": 70620408, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.0677083333333333, | |
| "grad_norm": 4.451906270983918, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0553, | |
| "num_input_tokens_seen": 70792740, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0677083333333333, | |
| "loss": 0.060652364045381546, | |
| "loss_ce": 4.4454794988268986e-05, | |
| "loss_iou": 0.43359375, | |
| "loss_num": 0.0120849609375, | |
| "loss_xval": 0.060546875, | |
| "num_input_tokens_seen": 70792740, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0703125, | |
| "grad_norm": 4.685547616833428, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0872, | |
| "num_input_tokens_seen": 70965912, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.0703125, | |
| "loss": 0.038969703018665314, | |
| "loss_ce": 2.9273152904352173e-05, | |
| "loss_iou": 0.578125, | |
| "loss_num": 0.007781982421875, | |
| "loss_xval": 0.0390625, | |
| "num_input_tokens_seen": 70965912, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.0729166666666667, | |
| "grad_norm": 4.205176098429634, | |
| "learning_rate": 5e-06, | |
| "loss": 0.097, | |
| "num_input_tokens_seen": 71138240, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.0729166666666667, | |
| "loss": 0.06594446301460266, | |
| "loss_ce": 2.6499863452045247e-05, | |
| "loss_iou": 0.490234375, | |
| "loss_num": 0.01318359375, | |
| "loss_xval": 0.06591796875, | |
| "num_input_tokens_seen": 71138240, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.0755208333333333, | |
| "grad_norm": 2.6975606542073414, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0503, | |
| "num_input_tokens_seen": 71311316, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.0755208333333333, | |
| "loss": 0.04244375228881836, | |
| "loss_ce": 2.4318891519214958e-05, | |
| "loss_iou": 0.5078125, | |
| "loss_num": 0.00848388671875, | |
| "loss_xval": 0.04248046875, | |
| "num_input_tokens_seen": 71311316, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.078125, | |
| "grad_norm": 7.285990628964785, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0779, | |
| "num_input_tokens_seen": 71483816, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.078125, | |
| "loss": 0.04324822127819061, | |
| "loss_ce": 2.007262264669407e-05, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.0086669921875, | |
| "loss_xval": 0.043212890625, | |
| "num_input_tokens_seen": 71483816, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.0807291666666667, | |
| "grad_norm": 4.52661538724694, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0641, | |
| "num_input_tokens_seen": 71656340, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.0807291666666667, | |
| "loss": 0.08217348903417587, | |
| "loss_ce": 4.30593136115931e-05, | |
| "loss_iou": 0.45703125, | |
| "loss_num": 0.0164794921875, | |
| "loss_xval": 0.08203125, | |
| "num_input_tokens_seen": 71656340, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.0833333333333333, | |
| "grad_norm": 4.6125148439773485, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0882, | |
| "num_input_tokens_seen": 71828256, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.0833333333333333, | |
| "loss": 0.13281017541885376, | |
| "loss_ce": 2.819242035911884e-05, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.0264892578125, | |
| "loss_xval": 0.1328125, | |
| "num_input_tokens_seen": 71828256, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.0859375, | |
| "grad_norm": 4.643689845065649, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0627, | |
| "num_input_tokens_seen": 72000720, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.0859375, | |
| "loss": 0.11508992314338684, | |
| "loss_ce": 2.3400416466756724e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0230712890625, | |
| "loss_xval": 0.115234375, | |
| "num_input_tokens_seen": 72000720, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.0885416666666667, | |
| "grad_norm": 4.231062348032645, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0809, | |
| "num_input_tokens_seen": 72173088, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.0885416666666667, | |
| "loss": 0.10267479717731476, | |
| "loss_ce": 4.418793832883239e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0205078125, | |
| "loss_xval": 0.1025390625, | |
| "num_input_tokens_seen": 72173088, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.0911458333333333, | |
| "grad_norm": 3.198450887714274, | |
| "learning_rate": 5e-06, | |
| "loss": 0.094, | |
| "num_input_tokens_seen": 72345528, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.0911458333333333, | |
| "loss": 0.10875082015991211, | |
| "loss_ce": 0.00010823761840583757, | |
| "loss_iou": 0.431640625, | |
| "loss_num": 0.021728515625, | |
| "loss_xval": 0.1083984375, | |
| "num_input_tokens_seen": 72345528, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.09375, | |
| "grad_norm": 4.428326611539968, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0439, | |
| "num_input_tokens_seen": 72517624, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.09375, | |
| "loss": 0.05049506574869156, | |
| "loss_ce": 4.951009759679437e-05, | |
| "loss_iou": 0.51171875, | |
| "loss_num": 0.01007080078125, | |
| "loss_xval": 0.050537109375, | |
| "num_input_tokens_seen": 72517624, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0963541666666667, | |
| "grad_norm": 5.316131474081422, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0816, | |
| "num_input_tokens_seen": 72690776, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.0963541666666667, | |
| "loss": 0.0798894613981247, | |
| "loss_ce": 4.021547283628024e-05, | |
| "loss_iou": 0.515625, | |
| "loss_num": 0.0159912109375, | |
| "loss_xval": 0.080078125, | |
| "num_input_tokens_seen": 72690776, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.0989583333333333, | |
| "grad_norm": 4.80233181201779, | |
| "learning_rate": 5e-06, | |
| "loss": 0.099, | |
| "num_input_tokens_seen": 72863552, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.0989583333333333, | |
| "loss": 0.09374965727329254, | |
| "loss_ce": 3.0174571293173358e-05, | |
| "loss_iou": 0.6484375, | |
| "loss_num": 0.018798828125, | |
| "loss_xval": 0.09375, | |
| "num_input_tokens_seen": 72863552, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.1015625, | |
| "grad_norm": 3.042295910685699, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0756, | |
| "num_input_tokens_seen": 73035716, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.1015625, | |
| "loss": 0.05097030848264694, | |
| "loss_ce": 2.1213931177044287e-05, | |
| "loss_iou": 0.57421875, | |
| "loss_num": 0.01019287109375, | |
| "loss_xval": 0.051025390625, | |
| "num_input_tokens_seen": 73035716, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.1041666666666667, | |
| "grad_norm": 3.8912805314115473, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0536, | |
| "num_input_tokens_seen": 73208532, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.1041666666666667, | |
| "loss": 0.04248078912496567, | |
| "loss_ce": 4.60965748061426e-05, | |
| "loss_iou": 0.61328125, | |
| "loss_num": 0.00848388671875, | |
| "loss_xval": 0.04248046875, | |
| "num_input_tokens_seen": 73208532, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.1067708333333333, | |
| "grad_norm": 14.632789864172295, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0658, | |
| "num_input_tokens_seen": 73381164, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.1067708333333333, | |
| "loss": 0.052568383514881134, | |
| "loss_ce": 0.000154447479872033, | |
| "loss_iou": 0.47265625, | |
| "loss_num": 0.010498046875, | |
| "loss_xval": 0.052490234375, | |
| "num_input_tokens_seen": 73381164, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.109375, | |
| "grad_norm": 4.3783199311299486, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0861, | |
| "num_input_tokens_seen": 73553404, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.109375, | |
| "loss": 0.125158429145813, | |
| "loss_ce": 6.687753193546087e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0250244140625, | |
| "loss_xval": 0.125, | |
| "num_input_tokens_seen": 73553404, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.1119791666666667, | |
| "grad_norm": 3.5716619036124766, | |
| "learning_rate": 5e-06, | |
| "loss": 0.075, | |
| "num_input_tokens_seen": 73726020, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.1119791666666667, | |
| "loss": 0.14429143071174622, | |
| "loss_ce": 3.4840733860619366e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.0289306640625, | |
| "loss_xval": 0.14453125, | |
| "num_input_tokens_seen": 73726020, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.1145833333333333, | |
| "grad_norm": 4.334696158970524, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0636, | |
| "num_input_tokens_seen": 73898692, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.1145833333333333, | |
| "loss": 0.08402653783559799, | |
| "loss_ce": 4.216242450638674e-05, | |
| "loss_iou": 0.5546875, | |
| "loss_num": 0.016845703125, | |
| "loss_xval": 0.083984375, | |
| "num_input_tokens_seen": 73898692, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.1171875, | |
| "grad_norm": 5.554218703009278, | |
| "learning_rate": 5e-06, | |
| "loss": 0.059, | |
| "num_input_tokens_seen": 74071620, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.1171875, | |
| "loss": 0.0587516650557518, | |
| "loss_ce": 3.584453952498734e-05, | |
| "loss_iou": 0.49609375, | |
| "loss_num": 0.01171875, | |
| "loss_xval": 0.05859375, | |
| "num_input_tokens_seen": 74071620, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.1197916666666667, | |
| "grad_norm": 4.43058457913269, | |
| "learning_rate": 5e-06, | |
| "loss": 0.111, | |
| "num_input_tokens_seen": 74244416, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.1197916666666667, | |
| "loss": 0.1568872630596161, | |
| "loss_ce": 2.6918030926026404e-05, | |
| "loss_iou": 0.71875, | |
| "loss_num": 0.031494140625, | |
| "loss_xval": 0.1572265625, | |
| "num_input_tokens_seen": 74244416, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.1223958333333333, | |
| "grad_norm": 5.12234513289191, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0652, | |
| "num_input_tokens_seen": 74416812, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.1223958333333333, | |
| "loss": 0.04296587407588959, | |
| "loss_ce": 7.341805758187547e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.00860595703125, | |
| "loss_xval": 0.04296875, | |
| "num_input_tokens_seen": 74416812, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.125, | |
| "grad_norm": 12.372100052601173, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0794, | |
| "num_input_tokens_seen": 74589952, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.125, | |
| "loss": 0.057425886392593384, | |
| "loss_ce": 3.757977538043633e-05, | |
| "loss_iou": 0.4609375, | |
| "loss_num": 0.011474609375, | |
| "loss_xval": 0.057373046875, | |
| "num_input_tokens_seen": 74589952, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.1276041666666667, | |
| "grad_norm": 5.254766938250951, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0813, | |
| "num_input_tokens_seen": 74762884, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.1276041666666667, | |
| "loss": 0.07790642231702805, | |
| "loss_ce": 0.00014763849321752787, | |
| "loss_iou": 0.43359375, | |
| "loss_num": 0.01556396484375, | |
| "loss_xval": 0.07763671875, | |
| "num_input_tokens_seen": 74762884, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.1302083333333333, | |
| "grad_norm": 4.363985148609402, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0678, | |
| "num_input_tokens_seen": 74935932, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.1302083333333333, | |
| "loss": 0.09511469304561615, | |
| "loss_ce": 2.1914216631557792e-05, | |
| "loss_iou": 0.671875, | |
| "loss_num": 0.01904296875, | |
| "loss_xval": 0.09521484375, | |
| "num_input_tokens_seen": 74935932, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.1328125, | |
| "grad_norm": 36.76822239657336, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0789, | |
| "num_input_tokens_seen": 75109188, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.1328125, | |
| "loss": 0.05521143600344658, | |
| "loss_ce": 3.5653371014632285e-05, | |
| "loss_iou": 0.5234375, | |
| "loss_num": 0.01104736328125, | |
| "loss_xval": 0.05517578125, | |
| "num_input_tokens_seen": 75109188, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.1354166666666667, | |
| "grad_norm": 5.8422904737549635, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0833, | |
| "num_input_tokens_seen": 75282080, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.1354166666666667, | |
| "loss": 0.1195131167769432, | |
| "loss_ce": 3.679828660096973e-05, | |
| "loss_iou": 0.5078125, | |
| "loss_num": 0.02392578125, | |
| "loss_xval": 0.11962890625, | |
| "num_input_tokens_seen": 75282080, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.1380208333333333, | |
| "grad_norm": 5.633890734428066, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0714, | |
| "num_input_tokens_seen": 75454600, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.1380208333333333, | |
| "loss": 0.04856128245592117, | |
| "loss_ce": 8.410715963691473e-05, | |
| "loss_iou": 0.482421875, | |
| "loss_num": 0.00970458984375, | |
| "loss_xval": 0.048583984375, | |
| "num_input_tokens_seen": 75454600, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.140625, | |
| "grad_norm": 4.6822951947306946, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0673, | |
| "num_input_tokens_seen": 75627104, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.140625, | |
| "loss": 0.05475003272294998, | |
| "loss_ce": 4.72724532301072e-05, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.01092529296875, | |
| "loss_xval": 0.0546875, | |
| "num_input_tokens_seen": 75627104, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.1432291666666667, | |
| "grad_norm": 6.006286624841916, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0639, | |
| "num_input_tokens_seen": 75799808, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.1432291666666667, | |
| "loss": 0.0665750578045845, | |
| "loss_ce": 1.621775300009176e-05, | |
| "loss_iou": 0.62890625, | |
| "loss_num": 0.0133056640625, | |
| "loss_xval": 0.06640625, | |
| "num_input_tokens_seen": 75799808, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.1458333333333333, | |
| "grad_norm": 28.980298300208794, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0762, | |
| "num_input_tokens_seen": 75972072, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.1458333333333333, | |
| "loss": 0.09322504699230194, | |
| "loss_ce": 2.435836722725071e-05, | |
| "loss_iou": 0.71875, | |
| "loss_num": 0.0186767578125, | |
| "loss_xval": 0.09326171875, | |
| "num_input_tokens_seen": 75972072, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.1484375, | |
| "grad_norm": 13.46707783271563, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1018, | |
| "num_input_tokens_seen": 76144720, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.1484375, | |
| "loss": 0.08068449795246124, | |
| "loss_ce": 7.231286144815385e-05, | |
| "loss_iou": 0.3515625, | |
| "loss_num": 0.01611328125, | |
| "loss_xval": 0.08056640625, | |
| "num_input_tokens_seen": 76144720, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.1510416666666667, | |
| "grad_norm": 2.9432117535086357, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0636, | |
| "num_input_tokens_seen": 76316644, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.1510416666666667, | |
| "loss": 0.056132424622774124, | |
| "loss_ce": 0.00016318520647473633, | |
| "loss_iou": 0.66796875, | |
| "loss_num": 0.01116943359375, | |
| "loss_xval": 0.055908203125, | |
| "num_input_tokens_seen": 76316644, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.1536458333333333, | |
| "grad_norm": 2.439393218897116, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0374, | |
| "num_input_tokens_seen": 76489288, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.1536458333333333, | |
| "loss": 0.03558982163667679, | |
| "loss_ce": 2.1583975467365235e-05, | |
| "loss_iou": 0.451171875, | |
| "loss_num": 0.007110595703125, | |
| "loss_xval": 0.03564453125, | |
| "num_input_tokens_seen": 76489288, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.15625, | |
| "grad_norm": 14.267748702685116, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0665, | |
| "num_input_tokens_seen": 76661744, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.15625, | |
| "loss": 0.11009622365236282, | |
| "loss_ce": 2.694531031011138e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.02197265625, | |
| "loss_xval": 0.10986328125, | |
| "num_input_tokens_seen": 76661744, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.1588541666666667, | |
| "grad_norm": 20.903388479491294, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0489, | |
| "num_input_tokens_seen": 76833952, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.1588541666666667, | |
| "loss": 0.03838071599602699, | |
| "loss_ce": 3.5379373002797365e-05, | |
| "loss_iou": 0.65234375, | |
| "loss_num": 0.007659912109375, | |
| "loss_xval": 0.038330078125, | |
| "num_input_tokens_seen": 76833952, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.1614583333333333, | |
| "grad_norm": 8.469986250177818, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0857, | |
| "num_input_tokens_seen": 77007080, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.1614583333333333, | |
| "loss": 0.051541343331336975, | |
| "loss_ce": 2.767006662907079e-05, | |
| "loss_iou": 0.59375, | |
| "loss_num": 0.01031494140625, | |
| "loss_xval": 0.051513671875, | |
| "num_input_tokens_seen": 77007080, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.1640625, | |
| "grad_norm": 5.002762010889236, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1031, | |
| "num_input_tokens_seen": 77180040, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.1640625, | |
| "loss": 0.20415398478507996, | |
| "loss_ce": 5.2410614443942904e-05, | |
| "loss_iou": 0.45703125, | |
| "loss_num": 0.040771484375, | |
| "loss_xval": 0.2041015625, | |
| "num_input_tokens_seen": 77180040, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.1666666666666667, | |
| "grad_norm": 100.8603598057148, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0848, | |
| "num_input_tokens_seen": 77353160, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.1666666666666667, | |
| "loss": 0.07399199903011322, | |
| "loss_ce": 7.841931073926389e-05, | |
| "loss_iou": 0.69921875, | |
| "loss_num": 0.0147705078125, | |
| "loss_xval": 0.07373046875, | |
| "num_input_tokens_seen": 77353160, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.1692708333333333, | |
| "grad_norm": 3.351767451423234, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0852, | |
| "num_input_tokens_seen": 77526040, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.1692708333333333, | |
| "loss": 0.06542657315731049, | |
| "loss_ce": 5.792453157482669e-05, | |
| "loss_iou": 0.4609375, | |
| "loss_num": 0.0130615234375, | |
| "loss_xval": 0.0654296875, | |
| "num_input_tokens_seen": 77526040, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.171875, | |
| "grad_norm": 4.456484406427641, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0804, | |
| "num_input_tokens_seen": 77698436, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.171875, | |
| "loss": 0.037512898445129395, | |
| "loss_ce": 6.783234130125493e-05, | |
| "loss_iou": 0.57421875, | |
| "loss_num": 0.00750732421875, | |
| "loss_xval": 0.037353515625, | |
| "num_input_tokens_seen": 77698436, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.1744791666666667, | |
| "grad_norm": 5.16567471249552, | |
| "learning_rate": 5e-06, | |
| "loss": 0.076, | |
| "num_input_tokens_seen": 77870996, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.1744791666666667, | |
| "loss": 0.1230858787894249, | |
| "loss_ce": 3.9006972656352445e-05, | |
| "loss_iou": 0.52734375, | |
| "loss_num": 0.0245361328125, | |
| "loss_xval": 0.123046875, | |
| "num_input_tokens_seen": 77870996, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.1770833333333333, | |
| "grad_norm": 4.701516259626003, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0584, | |
| "num_input_tokens_seen": 78043824, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.1770833333333333, | |
| "loss": 0.04219118878245354, | |
| "loss_ce": 6.16741890553385e-05, | |
| "loss_iou": 0.58984375, | |
| "loss_num": 0.0084228515625, | |
| "loss_xval": 0.042236328125, | |
| "num_input_tokens_seen": 78043824, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.1796875, | |
| "grad_norm": 5.151886406386116, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0829, | |
| "num_input_tokens_seen": 78216596, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.1796875, | |
| "loss": 0.11115504801273346, | |
| "loss_ce": 0.00010158185614272952, | |
| "loss_iou": 0.58984375, | |
| "loss_num": 0.022216796875, | |
| "loss_xval": 0.11083984375, | |
| "num_input_tokens_seen": 78216596, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.1822916666666667, | |
| "grad_norm": 5.099784873283209, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0496, | |
| "num_input_tokens_seen": 78389868, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.1822916666666667, | |
| "loss": 0.05530470609664917, | |
| "loss_ce": 6.788992322981358e-05, | |
| "loss_iou": 0.57421875, | |
| "loss_num": 0.01104736328125, | |
| "loss_xval": 0.05517578125, | |
| "num_input_tokens_seen": 78389868, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.1848958333333333, | |
| "grad_norm": 22.327108090070816, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0734, | |
| "num_input_tokens_seen": 78562732, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.1848958333333333, | |
| "loss": 0.1318536400794983, | |
| "loss_ce": 7.873401773395017e-05, | |
| "loss_iou": 0.453125, | |
| "loss_num": 0.0263671875, | |
| "loss_xval": 0.1318359375, | |
| "num_input_tokens_seen": 78562732, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.1875, | |
| "grad_norm": 6.402774286125369, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0785, | |
| "num_input_tokens_seen": 78735540, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.1875, | |
| "loss": 0.08449774980545044, | |
| "loss_ce": 4.035345773445442e-05, | |
| "loss_iou": 0.359375, | |
| "loss_num": 0.016845703125, | |
| "loss_xval": 0.08447265625, | |
| "num_input_tokens_seen": 78735540, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.1901041666666667, | |
| "grad_norm": 3.7359791226023495, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0667, | |
| "num_input_tokens_seen": 78907916, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.1901041666666667, | |
| "loss": 0.04169946163892746, | |
| "loss_ce": 4.2965810280293226e-05, | |
| "loss_iou": 0.474609375, | |
| "loss_num": 0.00830078125, | |
| "loss_xval": 0.041748046875, | |
| "num_input_tokens_seen": 78907916, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.1927083333333333, | |
| "grad_norm": 7.721272031766003, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1165, | |
| "num_input_tokens_seen": 79080080, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.1927083333333333, | |
| "loss": 0.09451837837696075, | |
| "loss_ce": 0.00012751182657666504, | |
| "loss_iou": 0.421875, | |
| "loss_num": 0.0189208984375, | |
| "loss_xval": 0.09423828125, | |
| "num_input_tokens_seen": 79080080, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.1953125, | |
| "grad_norm": 3.5323313091644755, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0699, | |
| "num_input_tokens_seen": 79252520, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.1953125, | |
| "loss": 0.09058161079883575, | |
| "loss_ce": 2.0699575543403625e-05, | |
| "loss_iou": 0.51953125, | |
| "loss_num": 0.0181884765625, | |
| "loss_xval": 0.09033203125, | |
| "num_input_tokens_seen": 79252520, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.1979166666666667, | |
| "grad_norm": 6.54371810379822, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0901, | |
| "num_input_tokens_seen": 79425288, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.1979166666666667, | |
| "loss": 0.06754864007234573, | |
| "loss_ce": 4.375486241769977e-05, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.01348876953125, | |
| "loss_xval": 0.0673828125, | |
| "num_input_tokens_seen": 79425288, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.2005208333333333, | |
| "grad_norm": 4.16776605785161, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0483, | |
| "num_input_tokens_seen": 79597852, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.2005208333333333, | |
| "loss": 0.040252070873975754, | |
| "loss_ce": 2.990448228956666e-05, | |
| "loss_iou": 0.458984375, | |
| "loss_num": 0.008056640625, | |
| "loss_xval": 0.040283203125, | |
| "num_input_tokens_seen": 79597852, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.203125, | |
| "grad_norm": 10.246051862590502, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0741, | |
| "num_input_tokens_seen": 79770236, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.203125, | |
| "loss": 0.04001723229885101, | |
| "loss_ce": 5.4462791013065726e-05, | |
| "loss_iou": 0.5625, | |
| "loss_num": 0.00799560546875, | |
| "loss_xval": 0.0400390625, | |
| "num_input_tokens_seen": 79770236, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.2057291666666667, | |
| "grad_norm": 6.092643709859456, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0521, | |
| "num_input_tokens_seen": 79943152, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.2057291666666667, | |
| "loss": 0.03291913866996765, | |
| "loss_ce": 2.1190953702898696e-05, | |
| "loss_iou": 0.5390625, | |
| "loss_num": 0.006591796875, | |
| "loss_xval": 0.032958984375, | |
| "num_input_tokens_seen": 79943152, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.2083333333333333, | |
| "grad_norm": 4.950298356741838, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0743, | |
| "num_input_tokens_seen": 80115996, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.2083333333333333, | |
| "loss": 0.06474019587039948, | |
| "loss_ce": 4.293021993362345e-05, | |
| "loss_iou": 0.50390625, | |
| "loss_num": 0.012939453125, | |
| "loss_xval": 0.064453125, | |
| "num_input_tokens_seen": 80115996, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.2109375, | |
| "grad_norm": 6.018163435684629, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0688, | |
| "num_input_tokens_seen": 80288564, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.2109375, | |
| "loss": 0.06020050495862961, | |
| "loss_ce": 1.984027767321095e-05, | |
| "loss_iou": 0.59765625, | |
| "loss_num": 0.01202392578125, | |
| "loss_xval": 0.06005859375, | |
| "num_input_tokens_seen": 80288564, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.2135416666666667, | |
| "grad_norm": 4.280669670593218, | |
| "learning_rate": 5e-06, | |
| "loss": 0.074, | |
| "num_input_tokens_seen": 80461048, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.2135416666666667, | |
| "loss": 0.05621056258678436, | |
| "loss_ce": 2.77029030257836e-05, | |
| "loss_iou": 0.57421875, | |
| "loss_num": 0.01123046875, | |
| "loss_xval": 0.05615234375, | |
| "num_input_tokens_seen": 80461048, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.2161458333333333, | |
| "grad_norm": 29.24577961253563, | |
| "learning_rate": 5e-06, | |
| "loss": 0.072, | |
| "num_input_tokens_seen": 80633544, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.2161458333333333, | |
| "loss": 0.036091044545173645, | |
| "loss_ce": 2.689398024813272e-05, | |
| "loss_iou": 0.50390625, | |
| "loss_num": 0.007232666015625, | |
| "loss_xval": 0.0361328125, | |
| "num_input_tokens_seen": 80633544, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.21875, | |
| "grad_norm": 3.5977371268772007, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0541, | |
| "num_input_tokens_seen": 80805856, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.21875, | |
| "loss": 0.05104288086295128, | |
| "loss_ce": 3.274788468843326e-05, | |
| "loss_iou": 0.53515625, | |
| "loss_num": 0.01019287109375, | |
| "loss_xval": 0.051025390625, | |
| "num_input_tokens_seen": 80805856, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.2213541666666667, | |
| "grad_norm": 8.075177097555214, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0605, | |
| "num_input_tokens_seen": 80978184, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.2213541666666667, | |
| "loss": 0.04464123770594597, | |
| "loss_ce": 2.4538327124901116e-05, | |
| "loss_iou": 0.61328125, | |
| "loss_num": 0.0089111328125, | |
| "loss_xval": 0.044677734375, | |
| "num_input_tokens_seen": 80978184, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.2239583333333333, | |
| "grad_norm": 6.996084067501281, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0577, | |
| "num_input_tokens_seen": 81150552, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.2239583333333333, | |
| "loss": 0.04917052388191223, | |
| "loss_ce": 5.248059460427612e-05, | |
| "loss_iou": 0.59765625, | |
| "loss_num": 0.00982666015625, | |
| "loss_xval": 0.049072265625, | |
| "num_input_tokens_seen": 81150552, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.2265625, | |
| "grad_norm": 9.557588670219046, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0598, | |
| "num_input_tokens_seen": 81323276, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.2265625, | |
| "loss": 0.08455046266317368, | |
| "loss_ce": 3.203285814379342e-05, | |
| "loss_iou": 0.404296875, | |
| "loss_num": 0.016845703125, | |
| "loss_xval": 0.08447265625, | |
| "num_input_tokens_seen": 81323276, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.2291666666666667, | |
| "grad_norm": 4.882489644855878, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0455, | |
| "num_input_tokens_seen": 81496112, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.2291666666666667, | |
| "loss": 0.032756030559539795, | |
| "loss_ce": 0.0001327365607721731, | |
| "loss_iou": 0.53125, | |
| "loss_num": 0.00653076171875, | |
| "loss_xval": 0.03271484375, | |
| "num_input_tokens_seen": 81496112, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.2317708333333333, | |
| "grad_norm": 10.129497104665319, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0865, | |
| "num_input_tokens_seen": 81668408, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.2317708333333333, | |
| "loss": 0.053121719509363174, | |
| "loss_ce": 2.1133846530574374e-05, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.0106201171875, | |
| "loss_xval": 0.05322265625, | |
| "num_input_tokens_seen": 81668408, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.234375, | |
| "grad_norm": 34.29728711508608, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0801, | |
| "num_input_tokens_seen": 81841444, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.234375, | |
| "loss": 0.1159551739692688, | |
| "loss_ce": 1.8899745555245318e-05, | |
| "loss_iou": 0.46484375, | |
| "loss_num": 0.023193359375, | |
| "loss_xval": 0.11572265625, | |
| "num_input_tokens_seen": 81841444, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.2369791666666667, | |
| "grad_norm": 5.248583896165671, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0835, | |
| "num_input_tokens_seen": 82014424, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.2369791666666667, | |
| "loss": 0.046804144978523254, | |
| "loss_ce": 2.0697760191978887e-05, | |
| "loss_iou": 0.65234375, | |
| "loss_num": 0.00933837890625, | |
| "loss_xval": 0.046875, | |
| "num_input_tokens_seen": 82014424, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.2395833333333333, | |
| "grad_norm": 4.890262555680429, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0638, | |
| "num_input_tokens_seen": 82187060, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.2395833333333333, | |
| "loss": 0.05300772190093994, | |
| "loss_ce": 2.9203043595771305e-05, | |
| "loss_iou": 0.54296875, | |
| "loss_num": 0.0106201171875, | |
| "loss_xval": 0.052978515625, | |
| "num_input_tokens_seen": 82187060, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.2421875, | |
| "grad_norm": 6.375507009761332, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0746, | |
| "num_input_tokens_seen": 82359884, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.2421875, | |
| "loss": 0.08658900111913681, | |
| "loss_ce": 0.00011744195217033848, | |
| "loss_iou": 0.515625, | |
| "loss_num": 0.017333984375, | |
| "loss_xval": 0.08642578125, | |
| "num_input_tokens_seen": 82359884, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.2447916666666667, | |
| "grad_norm": 6.190781448434917, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0682, | |
| "num_input_tokens_seen": 82532312, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.2447916666666667, | |
| "loss": 0.06419570744037628, | |
| "loss_ce": 1.724117828416638e-05, | |
| "loss_iou": 0.5390625, | |
| "loss_num": 0.0128173828125, | |
| "loss_xval": 0.06396484375, | |
| "num_input_tokens_seen": 82532312, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.2473958333333333, | |
| "grad_norm": 4.492503172851453, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0545, | |
| "num_input_tokens_seen": 82705224, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.2473958333333333, | |
| "loss": 0.043702684342861176, | |
| "loss_ce": 4.728833300760016e-05, | |
| "loss_iou": 0.4453125, | |
| "loss_num": 0.00872802734375, | |
| "loss_xval": 0.043701171875, | |
| "num_input_tokens_seen": 82705224, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 4.614176563274451, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0825, | |
| "num_input_tokens_seen": 82877740, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "loss": 0.0817180722951889, | |
| "loss_ce": 8.355021418537945e-05, | |
| "loss_iou": 0.703125, | |
| "loss_num": 0.016357421875, | |
| "loss_xval": 0.08154296875, | |
| "num_input_tokens_seen": 82877740, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2526041666666667, | |
| "grad_norm": 4.135440424213399, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0639, | |
| "num_input_tokens_seen": 83050904, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.2526041666666667, | |
| "loss": 0.045239534229040146, | |
| "loss_ce": 2.774174208752811e-05, | |
| "loss_iou": 0.470703125, | |
| "loss_num": 0.009033203125, | |
| "loss_xval": 0.045166015625, | |
| "num_input_tokens_seen": 83050904, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.2552083333333333, | |
| "grad_norm": 5.953250787402434, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0758, | |
| "num_input_tokens_seen": 83223916, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.2552083333333333, | |
| "loss": 0.039626024663448334, | |
| "loss_ce": 2.946431777672842e-05, | |
| "loss_iou": 0.60546875, | |
| "loss_num": 0.0079345703125, | |
| "loss_xval": 0.03955078125, | |
| "num_input_tokens_seen": 83223916, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.2578125, | |
| "grad_norm": 5.778983695199196, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0476, | |
| "num_input_tokens_seen": 83397368, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.2578125, | |
| "loss": 0.041962604969739914, | |
| "loss_ce": 4.6711622417205945e-05, | |
| "loss_iou": 0.5703125, | |
| "loss_num": 0.00836181640625, | |
| "loss_xval": 0.0419921875, | |
| "num_input_tokens_seen": 83397368, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.2604166666666667, | |
| "grad_norm": 5.433318803087276, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0785, | |
| "num_input_tokens_seen": 83569504, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.2604166666666667, | |
| "loss": 0.05590134114027023, | |
| "loss_ce": 2.3658354621147737e-05, | |
| "loss_iou": 0.58984375, | |
| "loss_num": 0.01116943359375, | |
| "loss_xval": 0.055908203125, | |
| "num_input_tokens_seen": 83569504, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.2630208333333333, | |
| "grad_norm": 4.6826104330453955, | |
| "learning_rate": 5e-06, | |
| "loss": 0.054, | |
| "num_input_tokens_seen": 83742676, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.2630208333333333, | |
| "loss": 0.03859255462884903, | |
| "loss_ce": 6.411132198991254e-05, | |
| "loss_iou": 0.50390625, | |
| "loss_num": 0.0076904296875, | |
| "loss_xval": 0.03857421875, | |
| "num_input_tokens_seen": 83742676, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.265625, | |
| "grad_norm": 4.369179337344076, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0529, | |
| "num_input_tokens_seen": 83915776, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.265625, | |
| "loss": 0.04284074157476425, | |
| "loss_ce": 7.035740418359637e-05, | |
| "loss_iou": 0.5234375, | |
| "loss_num": 0.008544921875, | |
| "loss_xval": 0.042724609375, | |
| "num_input_tokens_seen": 83915776, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.2682291666666667, | |
| "grad_norm": 4.855681248964782, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0893, | |
| "num_input_tokens_seen": 84088164, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.2682291666666667, | |
| "loss": 0.0853077843785286, | |
| "loss_ce": 4.1672632505651563e-05, | |
| "loss_iou": 0.59375, | |
| "loss_num": 0.01708984375, | |
| "loss_xval": 0.08544921875, | |
| "num_input_tokens_seen": 84088164, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.2708333333333333, | |
| "grad_norm": 4.574747694340549, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0667, | |
| "num_input_tokens_seen": 84261012, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.2708333333333333, | |
| "loss": 0.1186133474111557, | |
| "loss_ce": 3.7304311263142154e-05, | |
| "loss_iou": 0.455078125, | |
| "loss_num": 0.0238037109375, | |
| "loss_xval": 0.11865234375, | |
| "num_input_tokens_seen": 84261012, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.2734375, | |
| "grad_norm": 6.201362257140882, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0656, | |
| "num_input_tokens_seen": 84433984, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.2734375, | |
| "loss": 0.055373311042785645, | |
| "loss_ce": 4.4942811655346304e-05, | |
| "loss_iou": 0.61328125, | |
| "loss_num": 0.01104736328125, | |
| "loss_xval": 0.055419921875, | |
| "num_input_tokens_seen": 84433984, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.2760416666666667, | |
| "grad_norm": 4.576166685047339, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0666, | |
| "num_input_tokens_seen": 84606516, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.2760416666666667, | |
| "loss": 0.046068161725997925, | |
| "loss_ce": 4.765454650623724e-05, | |
| "loss_iou": 0.0, | |
| "loss_num": 0.00921630859375, | |
| "loss_xval": 0.0458984375, | |
| "num_input_tokens_seen": 84606516, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.2786458333333333, | |
| "grad_norm": 4.832131210851992, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0737, | |
| "num_input_tokens_seen": 84779356, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.2786458333333333, | |
| "loss": 0.11677989363670349, | |
| "loss_ce": 1.964074544957839e-05, | |
| "loss_iou": 0.484375, | |
| "loss_num": 0.0233154296875, | |
| "loss_xval": 0.11669921875, | |
| "num_input_tokens_seen": 84779356, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.28125, | |
| "grad_norm": 5.262752038477657, | |
| "learning_rate": 5e-06, | |
| "loss": 0.071, | |
| "num_input_tokens_seen": 84952020, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.28125, | |
| "loss": 0.0537094846367836, | |
| "loss_ce": 2.9066111892461777e-05, | |
| "loss_iou": 0.58203125, | |
| "loss_num": 0.0107421875, | |
| "loss_xval": 0.0537109375, | |
| "num_input_tokens_seen": 84952020, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.2838541666666667, | |
| "grad_norm": 2.9344225414677836, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0495, | |
| "num_input_tokens_seen": 85124876, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.2838541666666667, | |
| "loss": 0.03429765999317169, | |
| "loss_ce": 0.00013323240273166448, | |
| "loss_iou": 0.49609375, | |
| "loss_num": 0.0068359375, | |
| "loss_xval": 0.0341796875, | |
| "num_input_tokens_seen": 85124876, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.2864583333333333, | |
| "grad_norm": 12.093642895702288, | |
| "learning_rate": 5e-06, | |
| "loss": 0.083, | |
| "num_input_tokens_seen": 85297824, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.2864583333333333, | |
| "loss": 0.13448233902454376, | |
| "loss_ce": 3.7149860872887075e-05, | |
| "loss_iou": 0.498046875, | |
| "loss_num": 0.02685546875, | |
| "loss_xval": 0.134765625, | |
| "num_input_tokens_seen": 85297824, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.2890625, | |
| "grad_norm": 5.311410396179597, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0622, | |
| "num_input_tokens_seen": 85469896, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.2890625, | |
| "loss": 0.10600131750106812, | |
| "loss_ce": 4.428675310919061e-05, | |
| "loss_iou": 0.466796875, | |
| "loss_num": 0.0211181640625, | |
| "loss_xval": 0.10595703125, | |
| "num_input_tokens_seen": 85469896, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.2916666666666667, | |
| "grad_norm": 13.126940553733593, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0509, | |
| "num_input_tokens_seen": 85642324, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.2916666666666667, | |
| "loss": 0.04261418431997299, | |
| "loss_ce": 0.00010319902503397316, | |
| "loss_iou": 0.53515625, | |
| "loss_num": 0.00848388671875, | |
| "loss_xval": 0.04248046875, | |
| "num_input_tokens_seen": 85642324, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.2942708333333333, | |
| "grad_norm": 9.141153643623982, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0514, | |
| "num_input_tokens_seen": 85815252, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.2942708333333333, | |
| "loss": 0.04319656640291214, | |
| "loss_ce": 4.471266584005207e-05, | |
| "loss_iou": 0.46875, | |
| "loss_num": 0.0086669921875, | |
| "loss_xval": 0.043212890625, | |
| "num_input_tokens_seen": 85815252, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.296875, | |
| "grad_norm": 5.456561002723919, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0744, | |
| "num_input_tokens_seen": 85988240, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.296875, | |
| "loss": 0.0488949790596962, | |
| "loss_ce": 3.6337674828246236e-05, | |
| "loss_iou": 0.5859375, | |
| "loss_num": 0.009765625, | |
| "loss_xval": 0.048828125, | |
| "num_input_tokens_seen": 85988240, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.2994791666666667, | |
| "grad_norm": 4.197467000151624, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0618, | |
| "num_input_tokens_seen": 86160724, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.2994791666666667, | |
| "loss": 0.04255600646138191, | |
| "loss_ce": 2.9761704354314134e-05, | |
| "loss_iou": 0.443359375, | |
| "loss_num": 0.00848388671875, | |
| "loss_xval": 0.04248046875, | |
| "num_input_tokens_seen": 86160724, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.3020833333333333, | |
| "grad_norm": 16.018482571236348, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0857, | |
| "num_input_tokens_seen": 86333348, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3020833333333333, | |
| "eval_seeclick_CIoU": 0.49157558381557465, | |
| "eval_seeclick_GIoU": 0.4883834272623062, | |
| "eval_seeclick_IoU": 0.5341370701789856, | |
| "eval_seeclick_MAE_all": 0.07028103247284889, | |
| "eval_seeclick_MAE_h": 0.05726983770728111, | |
| "eval_seeclick_MAE_w": 0.08522269874811172, | |
| "eval_seeclick_MAE_x": 0.08005227893590927, | |
| "eval_seeclick_MAE_y": 0.058579325675964355, | |
| "eval_seeclick_NUM_probability": 0.9999949038028717, | |
| "eval_seeclick_inside_bbox": 0.8764204680919647, | |
| "eval_seeclick_loss": 0.9519317150115967, | |
| "eval_seeclick_loss_ce": 0.6910622417926788, | |
| "eval_seeclick_loss_iou": 0.6273193359375, | |
| "eval_seeclick_loss_num": 0.053680419921875, | |
| "eval_seeclick_loss_xval": 0.26849365234375, | |
| "eval_seeclick_runtime": 71.7405, | |
| "eval_seeclick_samples_per_second": 0.599, | |
| "eval_seeclick_steps_per_second": 0.028, | |
| "num_input_tokens_seen": 86333348, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3020833333333333, | |
| "eval_icons_CIoU": 0.7799727618694305, | |
| "eval_icons_GIoU": 0.7758736610412598, | |
| "eval_icons_IoU": 0.7871803939342499, | |
| "eval_icons_MAE_all": 0.026267122477293015, | |
| "eval_icons_MAE_h": 0.024472126737236977, | |
| "eval_icons_MAE_w": 0.029545767232775688, | |
| "eval_icons_MAE_x": 0.02697262354195118, | |
| "eval_icons_MAE_y": 0.024077963083982468, | |
| "eval_icons_NUM_probability": 0.9999885261058807, | |
| "eval_icons_inside_bbox": 1.0, | |
| "eval_icons_loss": 0.07963114976882935, | |
| "eval_icons_loss_ce": 0.0020425044931471348, | |
| "eval_icons_loss_iou": 0.5069580078125, | |
| "eval_icons_loss_num": 0.014467239379882812, | |
| "eval_icons_loss_xval": 0.07232666015625, | |
| "eval_icons_runtime": 80.3553, | |
| "eval_icons_samples_per_second": 0.622, | |
| "eval_icons_steps_per_second": 0.025, | |
| "num_input_tokens_seen": 86333348, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3020833333333333, | |
| "eval_screenspot_CIoU": 0.3665693998336792, | |
| "eval_screenspot_GIoU": 0.3608221113681793, | |
| "eval_screenspot_IoU": 0.4541289210319519, | |
| "eval_screenspot_MAE_all": 0.13468862076600394, | |
| "eval_screenspot_MAE_h": 0.07963093866904576, | |
| "eval_screenspot_MAE_w": 0.2195572853088379, | |
| "eval_screenspot_MAE_x": 0.16379199425379434, | |
| "eval_screenspot_MAE_y": 0.07577425986528397, | |
| "eval_screenspot_NUM_probability": 0.9999738732973734, | |
| "eval_screenspot_inside_bbox": 0.7116666634877523, | |
| "eval_screenspot_loss": 0.9175184369087219, | |
| "eval_screenspot_loss_ce": 0.42678311467170715, | |
| "eval_screenspot_loss_iou": 0.4466145833333333, | |
| "eval_screenspot_loss_num": 0.09850565592447917, | |
| "eval_screenspot_loss_xval": 0.4925130208333333, | |
| "eval_screenspot_runtime": 149.8949, | |
| "eval_screenspot_samples_per_second": 0.594, | |
| "eval_screenspot_steps_per_second": 0.02, | |
| "num_input_tokens_seen": 86333348, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3020833333333333, | |
| "eval_compot_CIoU": 0.9082967340946198, | |
| "eval_compot_GIoU": 0.9085466265678406, | |
| "eval_compot_IoU": 0.9093170166015625, | |
| "eval_compot_MAE_all": 0.009244627552106977, | |
| "eval_compot_MAE_h": 0.004357840050943196, | |
| "eval_compot_MAE_w": 0.014095565304160118, | |
| "eval_compot_MAE_x": 0.012027833610773087, | |
| "eval_compot_MAE_y": 0.006497269030660391, | |
| "eval_compot_NUM_probability": 0.9999580085277557, | |
| "eval_compot_inside_bbox": 1.0, | |
| "eval_compot_loss": 0.04286140948534012, | |
| "eval_compot_loss_ce": 4.613543933373876e-05, | |
| "eval_compot_loss_iou": 0.507080078125, | |
| "eval_compot_loss_num": 0.009250640869140625, | |
| "eval_compot_loss_xval": 0.0462188720703125, | |
| "eval_compot_runtime": 84.1131, | |
| "eval_compot_samples_per_second": 0.594, | |
| "eval_compot_steps_per_second": 0.024, | |
| "num_input_tokens_seen": 86333348, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1152, | |
| "num_input_tokens_seen": 86333348, | |
| "num_train_epochs": 3, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 622740728971264.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |