| { | |
| "best_metric": 0.886983632112237, | |
| "best_model_checkpoint": "./results/finetunes/20250129-172154__microsoft_phi-4__ft/checkpoint-432", | |
| "epoch": 0.1884961884961885, | |
| "eval_steps": 16, | |
| "global_step": 1088, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0003465003465003465, | |
| "grad_norm": 86.02561950683594, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.6643, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.000693000693000693, | |
| "grad_norm": 69.82108306884766, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.471, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0010395010395010396, | |
| "grad_norm": 56.067874908447266, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.6652, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.001386001386001386, | |
| "grad_norm": 20.953514099121094, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.7931, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0017325017325017325, | |
| "grad_norm": 66.40070343017578, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.3449, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002079002079002079, | |
| "grad_norm": 19.0939998626709, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.4636, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0024255024255024253, | |
| "grad_norm": 7.584406852722168, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.4794, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.002772002772002772, | |
| "grad_norm": 114.57583618164062, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.6834, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.002772002772002772, | |
| "eval_1_ratio_diff": -0.1504286827747467, | |
| "eval_accuracy": 0.7778643803585347, | |
| "eval_f1": 0.7382920110192838, | |
| "eval_loss": 0.5040489435195923, | |
| "eval_precision": 0.8973214285714286, | |
| "eval_recall": 0.6271450858034321, | |
| "eval_runtime": 3804.9177, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0031185031185031187, | |
| "grad_norm": 83.26435089111328, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.5774, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.003465003465003465, | |
| "grad_norm": 59.174560546875, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.7834, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0038115038115038116, | |
| "grad_norm": 26.088254928588867, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.3587, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.004158004158004158, | |
| "grad_norm": 40.37193298339844, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.3989, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0045045045045045045, | |
| "grad_norm": 75.50234985351562, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.5288, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.004851004851004851, | |
| "grad_norm": 37.00468444824219, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.2342, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.005197505197505198, | |
| "grad_norm": 74.53498077392578, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.3931, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005544005544005544, | |
| "grad_norm": 40.15735626220703, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.1786, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.005544005544005544, | |
| "eval_1_ratio_diff": 0.09586905689789549, | |
| "eval_accuracy": 0.8199532346063912, | |
| "eval_f1": 0.8355871886120997, | |
| "eval_loss": 0.44859689474105835, | |
| "eval_precision": 0.768324607329843, | |
| "eval_recall": 0.9157566302652106, | |
| "eval_runtime": 3804.4441, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.00589050589050589, | |
| "grad_norm": 17.47291374206543, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.628, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.006237006237006237, | |
| "grad_norm": 52.683101654052734, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.8133, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.006583506583506584, | |
| "grad_norm": 12.70147705078125, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.2731, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.00693000693000693, | |
| "grad_norm": 68.10077667236328, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.3009, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.007276507276507277, | |
| "grad_norm": 6.308539867401123, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.625, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.007623007623007623, | |
| "grad_norm": 9.908273696899414, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0649, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.00796950796950797, | |
| "grad_norm": 42.916542053222656, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.2181, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.008316008316008316, | |
| "grad_norm": 13.416431427001953, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.1225, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.008316008316008316, | |
| "eval_1_ratio_diff": 0.13951675759937643, | |
| "eval_accuracy": 0.8043647700701481, | |
| "eval_f1": 0.8281998631074606, | |
| "eval_loss": 0.7583639025688171, | |
| "eval_precision": 0.7378048780487805, | |
| "eval_recall": 0.9438377535101404, | |
| "eval_runtime": 3804.8896, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.008662508662508662, | |
| "grad_norm": 118.25779724121094, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.0563, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.009009009009009009, | |
| "grad_norm": 0.019266022369265556, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.1503, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.009355509355509356, | |
| "grad_norm": 0.027064168825745583, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.3849, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.009702009702009701, | |
| "grad_norm": 0.009735088795423508, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 2.0022, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.010048510048510048, | |
| "grad_norm": 18.206382751464844, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.6092, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.010395010395010396, | |
| "grad_norm": 107.0077896118164, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 2.1483, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.010741510741510741, | |
| "grad_norm": 11.034400939941406, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0388, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.011088011088011088, | |
| "grad_norm": 209.00735473632812, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 3.7767, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.011088011088011088, | |
| "eval_1_ratio_diff": 0.031956352299298496, | |
| "eval_accuracy": 0.8667186282151208, | |
| "eval_f1": 0.8707482993197279, | |
| "eval_loss": 0.5764707922935486, | |
| "eval_precision": 0.844574780058651, | |
| "eval_recall": 0.8985959438377535, | |
| "eval_runtime": 3804.7578, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.011434511434511435, | |
| "grad_norm": 5.225222110748291, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.8703, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.01178101178101178, | |
| "grad_norm": 106.7333984375, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.8151, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.012127512127512128, | |
| "grad_norm": 8.81540298461914, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.054, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.012474012474012475, | |
| "grad_norm": 1.4214346408843994, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0035, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.01282051282051282, | |
| "grad_norm": 1.0482317209243774, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0068, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.013167013167013167, | |
| "grad_norm": 107.0990219116211, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 2.6722, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.013513513513513514, | |
| "grad_norm": 10.672940254211426, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.3053, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.01386001386001386, | |
| "grad_norm": 99.93098449707031, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 2.6363, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.01386001386001386, | |
| "eval_1_ratio_diff": 0.15666406858924398, | |
| "eval_accuracy": 0.8121590023382697, | |
| "eval_f1": 0.837491571139582, | |
| "eval_loss": 0.9510833621025085, | |
| "eval_precision": 0.7375296912114014, | |
| "eval_recall": 0.968798751950078, | |
| "eval_runtime": 3804.6615, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.014206514206514207, | |
| "grad_norm": 104.2468490600586, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.3811, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.014553014553014554, | |
| "grad_norm": 8.673962593078613, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.052, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.0148995148995149, | |
| "grad_norm": 101.39873504638672, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 2.7375, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.015246015246015246, | |
| "grad_norm": 32.66648864746094, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0983, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.015592515592515593, | |
| "grad_norm": 1.59808349609375, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0137, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.01593901593901594, | |
| "grad_norm": 0.20981302857398987, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.6231, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.016285516285516284, | |
| "grad_norm": 102.21177673339844, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.4008, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.016632016632016633, | |
| "grad_norm": 0.24047495424747467, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.4377, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.016632016632016633, | |
| "eval_1_ratio_diff": -0.030397505845674244, | |
| "eval_accuracy": 0.8667186282151208, | |
| "eval_f1": 0.8624296057924377, | |
| "eval_loss": 0.5004476308822632, | |
| "eval_precision": 0.8903654485049833, | |
| "eval_recall": 0.8361934477379095, | |
| "eval_runtime": 3804.5287, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.01697851697851698, | |
| "grad_norm": 25.925752639770508, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.2565, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.017325017325017324, | |
| "grad_norm": 17.991100311279297, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0732, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.017671517671517672, | |
| "grad_norm": 9.48592758178711, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.1476, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.018018018018018018, | |
| "grad_norm": 25.491506576538086, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0673, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.018364518364518363, | |
| "grad_norm": 7.842948913574219, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.3734, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.018711018711018712, | |
| "grad_norm": 7.493876934051514, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.044, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.019057519057519057, | |
| "grad_norm": 1.7811343669891357, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0072, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.019404019404019403, | |
| "grad_norm": 3.076145648956299, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0114, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.019404019404019403, | |
| "eval_1_ratio_diff": -0.0093530787217459, | |
| "eval_accuracy": 0.8706157443491817, | |
| "eval_f1": 0.8692913385826772, | |
| "eval_loss": 0.4880940914154053, | |
| "eval_precision": 0.8775834658187599, | |
| "eval_recall": 0.8611544461778471, | |
| "eval_runtime": 3804.9107, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.01975051975051975, | |
| "grad_norm": 0.05920056626200676, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0008, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.020097020097020097, | |
| "grad_norm": 0.1755896806716919, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.9493, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.020443520443520442, | |
| "grad_norm": 0.08734409511089325, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0014, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.02079002079002079, | |
| "grad_norm": 0.04737528786063194, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.8342, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.021136521136521137, | |
| "grad_norm": 106.53982543945312, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.3666, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.021483021483021482, | |
| "grad_norm": 100.54167175292969, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.5401, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.02182952182952183, | |
| "grad_norm": 96.30493927001953, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.6086, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.022176022176022176, | |
| "grad_norm": 0.14051453769207, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.4729, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.022176022176022176, | |
| "eval_1_ratio_diff": -0.02883865939204988, | |
| "eval_accuracy": 0.8713951675759938, | |
| "eval_f1": 0.8674698795180723, | |
| "eval_loss": 0.6076126098632812, | |
| "eval_precision": 0.8940397350993378, | |
| "eval_recall": 0.8424336973478939, | |
| "eval_runtime": 3804.7525, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.02252252252252252, | |
| "grad_norm": 10.388425827026367, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.1845, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02286902286902287, | |
| "grad_norm": 199.0683135986328, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.7664, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.023215523215523216, | |
| "grad_norm": 107.45567321777344, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 2.095, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.02356202356202356, | |
| "grad_norm": 0.16609467566013336, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.0607, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.02390852390852391, | |
| "grad_norm": 17.739356994628906, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0496, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.024255024255024255, | |
| "grad_norm": 0.5711311101913452, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.652, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0246015246015246, | |
| "grad_norm": 12.555821418762207, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0351, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.02494802494802495, | |
| "grad_norm": 21.08517837524414, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0624, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.02494802494802495, | |
| "eval_1_ratio_diff": 0.08106001558846454, | |
| "eval_accuracy": 0.8597038191738113, | |
| "eval_f1": 0.8701298701298701, | |
| "eval_loss": 0.7122698426246643, | |
| "eval_precision": 0.8093959731543624, | |
| "eval_recall": 0.9407176287051482, | |
| "eval_runtime": 3805.0376, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.025294525294525295, | |
| "grad_norm": 46.71989822387695, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.152, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.02564102564102564, | |
| "grad_norm": 2.8813726902008057, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0211, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.02598752598752599, | |
| "grad_norm": 99.26583099365234, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.1805, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.026334026334026334, | |
| "grad_norm": 99.44942474365234, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 2.6296, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.02668052668052668, | |
| "grad_norm": 100.0189208984375, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 2.0272, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.02702702702702703, | |
| "grad_norm": 0.5884932279586792, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0016, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.027373527373527374, | |
| "grad_norm": 0.00022523404913954437, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.02772002772002772, | |
| "grad_norm": 38.54767608642578, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.1184, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.02772002772002772, | |
| "eval_1_ratio_diff": -0.07560405300077944, | |
| "eval_accuracy": 0.848012470771629, | |
| "eval_f1": 0.8354430379746836, | |
| "eval_loss": 0.981860339641571, | |
| "eval_precision": 0.9099264705882353, | |
| "eval_recall": 0.7722308892355694, | |
| "eval_runtime": 3804.9612, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.028066528066528068, | |
| "grad_norm": 4.101857848581858e-05, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 2.4463, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.028413028413028413, | |
| "grad_norm": 0.3266747295856476, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0015, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.02875952875952876, | |
| "grad_norm": 92.61214447021484, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.2523, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.029106029106029108, | |
| "grad_norm": 88.13154602050781, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.5995, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.029452529452529453, | |
| "grad_norm": 93.40657043457031, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.7273, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0297990297990298, | |
| "grad_norm": 0.0034895113203674555, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0041, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.030145530145530147, | |
| "grad_norm": 1.7105669975280762, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0051, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.030492030492030493, | |
| "grad_norm": 15.390508651733398, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 1.8573, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.030492030492030493, | |
| "eval_1_ratio_diff": -0.016367887763055367, | |
| "eval_accuracy": 0.8745128604832424, | |
| "eval_f1": 0.8723235527359239, | |
| "eval_loss": 0.67668217420578, | |
| "eval_precision": 0.8870967741935484, | |
| "eval_recall": 0.858034321372855, | |
| "eval_runtime": 3804.3744, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.030838530838530838, | |
| "grad_norm": 0.000692114292178303, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.8897, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.031185031185031187, | |
| "grad_norm": 91.26844024658203, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.7938, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.03153153153153153, | |
| "grad_norm": 0.2341953068971634, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0028, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.03187803187803188, | |
| "grad_norm": 16.670625686645508, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0469, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.032224532224532226, | |
| "grad_norm": 94.17417907714844, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.7944, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.03257103257103257, | |
| "grad_norm": 97.2259292602539, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 3.8379, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.03291753291753292, | |
| "grad_norm": 0.2022901326417923, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.0015, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.033264033264033266, | |
| "grad_norm": 0.9778112173080444, | |
| "learning_rate": 2.345481288954896e-05, | |
| "loss": 0.4574, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.033264033264033266, | |
| "eval_1_ratio_diff": -0.01948558067030398, | |
| "eval_accuracy": 0.8791893998441154, | |
| "eval_f1": 0.8766905330151153, | |
| "eval_loss": 0.6556122303009033, | |
| "eval_precision": 0.8944805194805194, | |
| "eval_recall": 0.859594383775351, | |
| "eval_runtime": 3804.2343, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.03361053361053361, | |
| "grad_norm": 2.296571969985962, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.051, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.03395703395703396, | |
| "grad_norm": 95.68350982666016, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 1.1658, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.034303534303534305, | |
| "grad_norm": 0.33530309796333313, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0035, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.03465003465003465, | |
| "grad_norm": 0.3190847337245941, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0046, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.034996534996534996, | |
| "grad_norm": 0.22778594493865967, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0008, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.035343035343035345, | |
| "grad_norm": 61.27931594848633, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.2352, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.03568953568953569, | |
| "grad_norm": 0.07557390630245209, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.2882, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.036036036036036036, | |
| "grad_norm": 69.33809661865234, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.3218, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.036036036036036036, | |
| "eval_1_ratio_diff": -0.015588464536243185, | |
| "eval_accuracy": 0.8799688230709275, | |
| "eval_f1": 0.8779714738510301, | |
| "eval_loss": 0.6531423926353455, | |
| "eval_precision": 0.8921095008051529, | |
| "eval_recall": 0.8642745709828393, | |
| "eval_runtime": 3804.7065, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.036382536382536385, | |
| "grad_norm": 0.0005657664150930941, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.036729036729036726, | |
| "grad_norm": 196.73025512695312, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 4.7327, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.037075537075537075, | |
| "grad_norm": 0.0739484652876854, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0007, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.037422037422037424, | |
| "grad_norm": 45.3680305480957, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.1615, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.037768537768537766, | |
| "grad_norm": 0.040038663893938065, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.5819, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.038115038115038115, | |
| "grad_norm": 96.53976440429688, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 1.343, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.038461538461538464, | |
| "grad_norm": 0.06292600184679031, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.6704, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.038808038808038806, | |
| "grad_norm": 0.059508323669433594, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0004, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.038808038808038806, | |
| "eval_1_ratio_diff": -0.017926734216679674, | |
| "eval_accuracy": 0.8807482462977396, | |
| "eval_f1": 0.8784749801429707, | |
| "eval_loss": 0.6496536731719971, | |
| "eval_precision": 0.8948220064724919, | |
| "eval_recall": 0.8627145085803433, | |
| "eval_runtime": 3804.1312, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.039154539154539154, | |
| "grad_norm": 51.095741271972656, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.1843, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.0395010395010395, | |
| "grad_norm": 2.8822214603424072, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.024, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.039847539847539845, | |
| "grad_norm": 82.8652572631836, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.5421, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.040194040194040194, | |
| "grad_norm": 46.17012405395508, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 1.2965, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.04054054054054054, | |
| "grad_norm": 95.0387191772461, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.854, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.040887040887040885, | |
| "grad_norm": 25.17544174194336, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0751, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.041233541233541234, | |
| "grad_norm": 0.18485400080680847, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0016, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.04158004158004158, | |
| "grad_norm": 28.657712936401367, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0891, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.04158004158004158, | |
| "eval_1_ratio_diff": -0.017926734216679674, | |
| "eval_accuracy": 0.882307092751364, | |
| "eval_f1": 0.8800635424940428, | |
| "eval_loss": 0.6529919505119324, | |
| "eval_precision": 0.8964401294498382, | |
| "eval_recall": 0.8642745709828393, | |
| "eval_runtime": 3804.0239, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.041926541926541924, | |
| "grad_norm": 1.6055676937103271, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0042, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.04227304227304227, | |
| "grad_norm": 0.9823045134544373, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 2.1695, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.04261954261954262, | |
| "grad_norm": 0.0013798171421512961, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0006, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.042966042966042964, | |
| "grad_norm": 0.12650461494922638, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 1.4929, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.04331254331254331, | |
| "grad_norm": 1.4423363208770752, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0039, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.04365904365904366, | |
| "grad_norm": 0.10318942368030548, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0007, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.044005544005544, | |
| "grad_norm": 0.8829823136329651, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0025, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.04435204435204435, | |
| "grad_norm": 0.7373402714729309, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.002, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.04435204435204435, | |
| "eval_1_ratio_diff": -0.017147310989867492, | |
| "eval_accuracy": 0.8815276695245519, | |
| "eval_f1": 0.8793650793650793, | |
| "eval_loss": 0.645000696182251, | |
| "eval_precision": 0.8949919224555735, | |
| "eval_recall": 0.8642745709828393, | |
| "eval_runtime": 3801.6968, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.0446985446985447, | |
| "grad_norm": 10.895577430725098, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0309, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.04504504504504504, | |
| "grad_norm": 0.052276190370321274, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0007, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.04539154539154539, | |
| "grad_norm": 10.15628719329834, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0267, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.04573804573804574, | |
| "grad_norm": 99.17190551757812, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 1.4613, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.04608454608454608, | |
| "grad_norm": 0.14940449595451355, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0004, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.04643104643104643, | |
| "grad_norm": 0.054311566054821014, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.001, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.04677754677754678, | |
| "grad_norm": 175.11187744140625, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 2.7088, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04712404712404712, | |
| "grad_norm": 99.04315185546875, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.3569, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.04712404712404712, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8830865159781761, | |
| "eval_f1": 0.8829953198127926, | |
| "eval_loss": 0.6330925822257996, | |
| "eval_precision": 0.8829953198127926, | |
| "eval_recall": 0.8829953198127926, | |
| "eval_runtime": 3801.9691, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.04747054747054747, | |
| "grad_norm": 0.02164226956665516, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0002, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.04781704781704782, | |
| "grad_norm": 0.0005396510241553187, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.04816354816354816, | |
| "grad_norm": 0.11029522866010666, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0008, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.04851004851004851, | |
| "grad_norm": 0.056431207805871964, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0002, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.04885654885654886, | |
| "grad_norm": 0.05988427251577377, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0007, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.0492030492030492, | |
| "grad_norm": 0.038764405995607376, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0003, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.04954954954954955, | |
| "grad_norm": 0.042382217943668365, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0004, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.0498960498960499, | |
| "grad_norm": 0.08615046739578247, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 1.0097, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.0498960498960499, | |
| "eval_1_ratio_diff": 0.002338269680436489, | |
| "eval_accuracy": 0.882307092751364, | |
| "eval_f1": 0.8824902723735408, | |
| "eval_loss": 0.636997401714325, | |
| "eval_precision": 0.8804347826086957, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.1055, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.05024255024255024, | |
| "grad_norm": 0.03646495193243027, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 1.8997, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.05058905058905059, | |
| "grad_norm": 96.74433135986328, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 1.111, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.05093555093555094, | |
| "grad_norm": 0.34065139293670654, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 1.1679, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 0.07861107587814331, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0003, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.05162855162855163, | |
| "grad_norm": 0.1301509290933609, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0006, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.05197505197505198, | |
| "grad_norm": 0.19169773161411285, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0006, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05232155232155232, | |
| "grad_norm": 0.07935987412929535, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0007, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.05266805266805267, | |
| "grad_norm": 0.38771602511405945, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0037, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.05266805266805267, | |
| "eval_1_ratio_diff": -0.010911925175370263, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8832807570977917, | |
| "eval_loss": 0.6364841461181641, | |
| "eval_precision": 0.8931419457735247, | |
| "eval_recall": 0.8736349453978159, | |
| "eval_runtime": 3801.9684, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.05301455301455302, | |
| "grad_norm": 0.006659930571913719, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.05336105336105336, | |
| "grad_norm": 0.036822691559791565, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0001, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.05370755370755371, | |
| "grad_norm": 0.08990409970283508, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0004, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.05405405405405406, | |
| "grad_norm": 18.95056915283203, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0538, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.0544005544005544, | |
| "grad_norm": 0.5704998970031738, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0048, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.05474705474705475, | |
| "grad_norm": 0.26951637864112854, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0007, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.0550935550935551, | |
| "grad_norm": 0.029281673952937126, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0046, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.05544005544005544, | |
| "grad_norm": 0.11716895550489426, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0003, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.05544005544005544, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6468711495399475, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3801.9776, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.05578655578655579, | |
| "grad_norm": 0.0007287299376912415, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.056133056133056136, | |
| "grad_norm": 0.49943840503692627, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0016, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.05647955647955648, | |
| "grad_norm": 66.82319641113281, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.6704, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.05682605682605683, | |
| "grad_norm": 0.013491444289684296, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0001, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.057172557172557176, | |
| "grad_norm": 22.91457176208496, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0746, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.05751905751905752, | |
| "grad_norm": 0.12269003689289093, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0005, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.057865557865557866, | |
| "grad_norm": 0.27446281909942627, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.002, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.058212058212058215, | |
| "grad_norm": 0.0032145013101398945, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0001, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.058212058212058215, | |
| "eval_1_ratio_diff": -0.006235385814497285, | |
| "eval_accuracy": 0.8862042088854248, | |
| "eval_f1": 0.8854003139717426, | |
| "eval_loss": 0.6679523587226868, | |
| "eval_precision": 0.8909952606635071, | |
| "eval_recall": 0.8798751950078003, | |
| "eval_runtime": 3801.9487, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.05855855855855856, | |
| "grad_norm": 0.025797845795750618, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.7783, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.058905058905058906, | |
| "grad_norm": 46.18172073364258, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.1467, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.059251559251559255, | |
| "grad_norm": 14.45876407623291, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0406, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.0595980595980596, | |
| "grad_norm": 0.057107917964458466, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0016, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.059944559944559946, | |
| "grad_norm": 0.2026216834783554, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0008, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.060291060291060294, | |
| "grad_norm": 0.0018065160838887095, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.1469, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.060637560637560636, | |
| "grad_norm": 0.2094137966632843, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.004, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.060984060984060985, | |
| "grad_norm": 0.04397908225655556, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0003, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.060984060984060985, | |
| "eval_1_ratio_diff": 0.00623538581449723, | |
| "eval_accuracy": 0.8815276695245519, | |
| "eval_f1": 0.8821705426356589, | |
| "eval_loss": 0.6661366820335388, | |
| "eval_precision": 0.8767334360554699, | |
| "eval_recall": 0.8876755070202809, | |
| "eval_runtime": 3801.813, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.061330561330561334, | |
| "grad_norm": 0.3771282434463501, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.001, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.061677061677061676, | |
| "grad_norm": 0.006506125908344984, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0003, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.062023562023562025, | |
| "grad_norm": 0.04021889716386795, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0001, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.062370062370062374, | |
| "grad_norm": 0.03679339215159416, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0002, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.06271656271656272, | |
| "grad_norm": 1.2173601388931274, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0037, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.06306306306306306, | |
| "grad_norm": 0.01900528371334076, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0001, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.06340956340956341, | |
| "grad_norm": 2.7245795726776123, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 2.5894, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.06375606375606375, | |
| "grad_norm": 0.049179498106241226, | |
| "learning_rate": 2.3454812889548964e-06, | |
| "loss": 0.0003, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.06375606375606375, | |
| "eval_1_ratio_diff": 0.0038971161340607408, | |
| "eval_accuracy": 0.882307092751364, | |
| "eval_f1": 0.8826728826728827, | |
| "eval_loss": 0.6829211711883545, | |
| "eval_precision": 0.8792569659442725, | |
| "eval_recall": 0.8861154446177847, | |
| "eval_runtime": 3801.791, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 0.01943264901638031, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.06444906444906445, | |
| "grad_norm": 0.01157083548605442, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.9679, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.0647955647955648, | |
| "grad_norm": 1.1828685998916626, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0031, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.06514206514206514, | |
| "grad_norm": 1.0368878841400146, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0026, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.06548856548856549, | |
| "grad_norm": 0.016720596700906754, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.06583506583506583, | |
| "grad_norm": 92.59363555908203, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.7584, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.06618156618156618, | |
| "grad_norm": 85.17770385742188, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.4196, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.06652806652806653, | |
| "grad_norm": 0.10658948123455048, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0003, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.06652806652806653, | |
| "eval_1_ratio_diff": 0.002338269680436489, | |
| "eval_accuracy": 0.882307092751364, | |
| "eval_f1": 0.8824902723735408, | |
| "eval_loss": 0.6842545866966248, | |
| "eval_precision": 0.8804347826086957, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3801.8143, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.06687456687456687, | |
| "grad_norm": 0.14093957841396332, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0004, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.06722106722106722, | |
| "grad_norm": 0.016837403178215027, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.06756756756756757, | |
| "grad_norm": 0.32615694403648376, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.001, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.06791406791406791, | |
| "grad_norm": 0.0006421082653105259, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0019, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.06826056826056826, | |
| "grad_norm": 0.19320330023765564, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0005, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.06860706860706861, | |
| "grad_norm": 0.11701953411102295, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.7289, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.06895356895356895, | |
| "grad_norm": 95.60856628417969, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.9569, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.0693000693000693, | |
| "grad_norm": 45.88581085205078, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.1621, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0693000693000693, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6871820092201233, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3801.8519, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06964656964656965, | |
| "grad_norm": 0.018819378688931465, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.06999306999306999, | |
| "grad_norm": 95.983642578125, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.1498, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.07033957033957033, | |
| "grad_norm": 0.26662153005599976, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0017, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.07068607068607069, | |
| "grad_norm": 14.524856567382812, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0997, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.07103257103257103, | |
| "grad_norm": 98.88104248046875, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 3.3431, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.07137907137907137, | |
| "grad_norm": 0.023577246814966202, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0008, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.07172557172557173, | |
| "grad_norm": 0.0023175834212452173, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.5407, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.07207207207207207, | |
| "grad_norm": 0.04252276197075844, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0002, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.07207207207207207, | |
| "eval_1_ratio_diff": -0.0015588464536243074, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.884375, | |
| "eval_loss": 0.6888303756713867, | |
| "eval_precision": 0.8857589984350548, | |
| "eval_recall": 0.8829953198127926, | |
| "eval_runtime": 3802.0883, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.07241857241857241, | |
| "grad_norm": 0.016715947538614273, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0007, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.07276507276507277, | |
| "grad_norm": 1.2063138484954834, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0032, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.07311157311157311, | |
| "grad_norm": 79.85220336914062, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.389, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.07345807345807345, | |
| "grad_norm": 0.020562905818223953, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.07380457380457381, | |
| "grad_norm": 0.09144292026758194, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0003, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.07415107415107415, | |
| "grad_norm": 0.320324182510376, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 2.3083, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.07449757449757449, | |
| "grad_norm": 0.01271853782236576, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.07484407484407485, | |
| "grad_norm": 0.5471435785293579, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0014, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.07484407484407485, | |
| "eval_1_ratio_diff": -0.0038971161340607963, | |
| "eval_accuracy": 0.886983632112237, | |
| "eval_f1": 0.8864526233359437, | |
| "eval_loss": 0.6897381544113159, | |
| "eval_precision": 0.889937106918239, | |
| "eval_recall": 0.8829953198127926, | |
| "eval_runtime": 3802.2915, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.07519057519057519, | |
| "grad_norm": 48.383155822753906, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 3.1628, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.07553707553707553, | |
| "grad_norm": 0.08458105474710464, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 2.3924, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.07588357588357589, | |
| "grad_norm": 102.9222640991211, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.1745, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.07623007623007623, | |
| "grad_norm": 0.4161507785320282, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0012, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.07657657657657657, | |
| "grad_norm": 8.339736938476562, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0215, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 13.342203140258789, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0368, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.07726957726957727, | |
| "grad_norm": 0.16003106534481049, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.1956, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.07761607761607761, | |
| "grad_norm": 0.4954013526439667, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0014, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.07761607761607761, | |
| "eval_1_ratio_diff": -0.0038971161340607963, | |
| "eval_accuracy": 0.886983632112237, | |
| "eval_f1": 0.8864526233359437, | |
| "eval_loss": 0.6851783990859985, | |
| "eval_precision": 0.889937106918239, | |
| "eval_recall": 0.8829953198127926, | |
| "eval_runtime": 3802.2925, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.07796257796257797, | |
| "grad_norm": 102.0475845336914, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.8356, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.07830907830907831, | |
| "grad_norm": 100.94009399414062, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.3047, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.07865557865557865, | |
| "grad_norm": 1.5232117176055908, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.9998, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.079002079002079, | |
| "grad_norm": 0.28479334712028503, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.3865, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.07934857934857935, | |
| "grad_norm": 0.4387876093387604, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0023, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.07969507969507969, | |
| "grad_norm": 0.09818091988563538, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0003, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.08004158004158005, | |
| "grad_norm": 15.28391170501709, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.1902, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.08038808038808039, | |
| "grad_norm": 0.02123742178082466, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0003, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.08038808038808039, | |
| "eval_1_ratio_diff": -0.0015588464536243074, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.884375, | |
| "eval_loss": 0.6811160445213318, | |
| "eval_precision": 0.8857589984350548, | |
| "eval_recall": 0.8829953198127926, | |
| "eval_runtime": 3802.0709, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.08073458073458073, | |
| "grad_norm": 1.7175835371017456, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.1125, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.08108108108108109, | |
| "grad_norm": 0.018997719511389732, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.08142758142758143, | |
| "grad_norm": 0.009305741637945175, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.08177408177408177, | |
| "grad_norm": 0.016941837966442108, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.08212058212058213, | |
| "grad_norm": 0.039568159729242325, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.08246708246708247, | |
| "grad_norm": 0.0007386144134216011, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0096, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.08281358281358281, | |
| "grad_norm": 103.17326354980469, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.6978, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.08316008316008316, | |
| "grad_norm": 0.013824285939335823, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 1.9363, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.08316008316008316, | |
| "eval_1_ratio_diff": -0.0007794232268121815, | |
| "eval_accuracy": 0.8854247856586126, | |
| "eval_f1": 0.8852459016393442, | |
| "eval_loss": 0.6808629035949707, | |
| "eval_precision": 0.8859375, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3801.9206, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.0835065835065835, | |
| "grad_norm": 0.8375845551490784, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0107, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.08385308385308385, | |
| "grad_norm": 0.024480916559696198, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0002, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.0841995841995842, | |
| "grad_norm": 81.85368347167969, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.488, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.08454608454608455, | |
| "grad_norm": 0.019407041370868683, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.3867, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.08489258489258489, | |
| "grad_norm": 0.12843383848667145, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0005, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.08523908523908524, | |
| "grad_norm": 0.011416368186473846, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0004, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.08558558558558559, | |
| "grad_norm": 0.032860685139894485, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.2092, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.08593208593208593, | |
| "grad_norm": 92.4006576538086, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.765, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.08593208593208593, | |
| "eval_1_ratio_diff": -0.0015588464536243074, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.884375, | |
| "eval_loss": 0.6817346811294556, | |
| "eval_precision": 0.8857589984350548, | |
| "eval_recall": 0.8829953198127926, | |
| "eval_runtime": 3802.0239, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.08627858627858628, | |
| "grad_norm": 0.006762477569282055, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.08662508662508663, | |
| "grad_norm": 0.021069064736366272, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0083, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08697158697158697, | |
| "grad_norm": 0.017759568989276886, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0003, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.08731808731808732, | |
| "grad_norm": 100.52206420898438, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 2.3802, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.08766458766458766, | |
| "grad_norm": 105.62979125976562, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 3.7172, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.088011088011088, | |
| "grad_norm": 0.34661927819252014, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.003, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.08835758835758836, | |
| "grad_norm": 99.1572265625, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 2.9664, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.0887040887040887, | |
| "grad_norm": 0.0003038525173906237, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.0887040887040887, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.6775835752487183, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.2121, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.08905058905058905, | |
| "grad_norm": 0.016024667769670486, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 2.7406, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.0893970893970894, | |
| "grad_norm": 100.74127960205078, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 2.0749, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.08974358974358974, | |
| "grad_norm": 0.0011968504404649138, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.09009009009009009, | |
| "grad_norm": 2.873019218444824, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0077, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.09043659043659044, | |
| "grad_norm": 0.018814735114574432, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.09078309078309078, | |
| "grad_norm": 0.0029318886809051037, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.09112959112959113, | |
| "grad_norm": 0.009292243048548698, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0013, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.09147609147609148, | |
| "grad_norm": 0.013486395590007305, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0001, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.09147609147609148, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6737587451934814, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.0754, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.09182259182259182, | |
| "grad_norm": 0.030322887003421783, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 2.1096, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.09216909216909216, | |
| "grad_norm": 0.053928058594465256, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0004, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.09251559251559252, | |
| "grad_norm": 102.34916687011719, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 2.1453, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.09286209286209286, | |
| "grad_norm": 0.9909892678260803, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0034, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.0932085932085932, | |
| "grad_norm": 95.83029174804688, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.613, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.09355509355509356, | |
| "grad_norm": 108.8365707397461, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 2.8814, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.0939015939015939, | |
| "grad_norm": 1.0528830289840698, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.004, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.09424809424809424, | |
| "grad_norm": 0.0003182841173838824, | |
| "learning_rate": 2.3454812889548964e-07, | |
| "loss": 0.0, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.09424809424809424, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.6737085580825806, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3801.9716, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.0945945945945946, | |
| "grad_norm": 0.010217499919235706, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0015, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.09494109494109494, | |
| "grad_norm": 0.3472006022930145, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0009, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.09528759528759528, | |
| "grad_norm": 92.27816772460938, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.5726, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.09563409563409564, | |
| "grad_norm": 0.005271604750305414, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.4792, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.09598059598059598, | |
| "grad_norm": 98.78565216064453, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.797, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.09632709632709632, | |
| "grad_norm": 43.5427131652832, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.1413, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.09667359667359668, | |
| "grad_norm": 0.0004461357893887907, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0002, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.09702009702009702, | |
| "grad_norm": 0.028169138357043266, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.1394, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.09702009702009702, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.6733962893486023, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.0044, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.09736659736659736, | |
| "grad_norm": 0.0005732557619921863, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.09771309771309772, | |
| "grad_norm": 106.34477996826172, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.1237, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.09805959805959806, | |
| "grad_norm": 0.2676280438899994, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0008, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.0984060984060984, | |
| "grad_norm": 0.001712639699690044, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0032, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.09875259875259876, | |
| "grad_norm": 0.03806428983807564, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.0990990990990991, | |
| "grad_norm": 0.9465712308883667, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0024, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.09944559944559944, | |
| "grad_norm": 1.9612773656845093, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.4378, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.0997920997920998, | |
| "grad_norm": 100.2083969116211, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.7637, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.0997920997920998, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.6731467843055725, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3803.3785, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.10013860013860014, | |
| "grad_norm": 91.13343811035156, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.6642, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.10048510048510048, | |
| "grad_norm": 0.021487416699528694, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0007, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.10083160083160084, | |
| "grad_norm": 0.017454765737056732, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.10117810117810118, | |
| "grad_norm": 0.001247262000106275, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.10152460152460152, | |
| "grad_norm": 0.01062464714050293, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.10187110187110188, | |
| "grad_norm": 111.11028289794922, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 5.5258, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.10221760221760222, | |
| "grad_norm": 0.04351111128926277, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0003, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 0.013359926640987396, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.6729430556297302, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.5461, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.10291060291060292, | |
| "grad_norm": 0.05155809223651886, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.10325710325710326, | |
| "grad_norm": 99.85391998291016, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.0198, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.1036036036036036, | |
| "grad_norm": 10.50502872467041, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.4264, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.10395010395010396, | |
| "grad_norm": 0.01816392131149769, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.5265, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1042966042966043, | |
| "grad_norm": 38.740116119384766, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.2613, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.10464310464310464, | |
| "grad_norm": 0.011164786294102669, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.104989604989605, | |
| "grad_norm": 0.003219365607947111, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.10533610533610534, | |
| "grad_norm": 0.0027089028153568506, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0005, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.10533610533610534, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.6724082827568054, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.7555, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.10568260568260568, | |
| "grad_norm": 3.065094232559204, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0082, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.10602910602910603, | |
| "grad_norm": 0.002183800796046853, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.10637560637560638, | |
| "grad_norm": 104.22856140136719, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.6673, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.10672210672210672, | |
| "grad_norm": 0.039993271231651306, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.5715, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.10706860706860707, | |
| "grad_norm": 81.30303955078125, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.5197, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.10741510741510742, | |
| "grad_norm": 0.011346640065312386, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0006, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.10776160776160776, | |
| "grad_norm": 6.598723411560059, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.8566, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.10810810810810811, | |
| "grad_norm": 31.924484252929688, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.4598, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.10810810810810811, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.6718815565109253, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3803.0388, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.10845460845460846, | |
| "grad_norm": 0.2010572999715805, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0006, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.1088011088011088, | |
| "grad_norm": 0.026479966938495636, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0002, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.10914760914760915, | |
| "grad_norm": 95.62553405761719, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.2849, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1094941094941095, | |
| "grad_norm": 0.10254587233066559, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.08, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.10984060984060984, | |
| "grad_norm": 102.76111602783203, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.4606, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.1101871101871102, | |
| "grad_norm": 0.008490975946187973, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0003, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.11053361053361054, | |
| "grad_norm": 0.08022568374872208, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0004, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.11088011088011088, | |
| "grad_norm": 79.22454833984375, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.8434, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.11088011088011088, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.6716776490211487, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.9999, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.11122661122661123, | |
| "grad_norm": 0.44935956597328186, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0074, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.11157311157311157, | |
| "grad_norm": 0.178094282746315, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0005, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.11191961191961192, | |
| "grad_norm": 0.0018482008017599583, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.11226611226611227, | |
| "grad_norm": 0.659843385219574, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.6941, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.11261261261261261, | |
| "grad_norm": 104.63482666015625, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.2871, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.11295911295911296, | |
| "grad_norm": 0.0014776097377762198, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.11330561330561331, | |
| "grad_norm": 0.003828430315479636, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.11365211365211365, | |
| "grad_norm": 49.71247100830078, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.524, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.11365211365211365, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.671708881855011, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.9149, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.113998613998614, | |
| "grad_norm": 91.23478698730469, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.5816, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.11434511434511435, | |
| "grad_norm": 0.015174830332398415, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.1146916146916147, | |
| "grad_norm": 0.04287717118859291, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0002, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.11503811503811504, | |
| "grad_norm": 101.90594482421875, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 3.0768, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.11538461538461539, | |
| "grad_norm": 0.49903520941734314, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0015, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.11573111573111573, | |
| "grad_norm": 7.688581943511963, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0208, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.11607761607761607, | |
| "grad_norm": 84.524169921875, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.45, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.11642411642411643, | |
| "grad_norm": 102.66691589355469, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.2867, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.11642411642411643, | |
| "eval_1_ratio_diff": 0.0, | |
| "eval_accuracy": 0.8846453624318005, | |
| "eval_f1": 0.8845553822152886, | |
| "eval_loss": 0.6711748242378235, | |
| "eval_precision": 0.8845553822152886, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3803.1931, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.11677061677061677, | |
| "grad_norm": 99.22567749023438, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.1032, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.11711711711711711, | |
| "grad_norm": 0.6447390913963318, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0024, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.11746361746361747, | |
| "grad_norm": 0.9273783564567566, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.8471, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.11781011781011781, | |
| "grad_norm": 0.02292543835937977, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.831, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.11815661815661815, | |
| "grad_norm": 0.0013903329381719232, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.11850311850311851, | |
| "grad_norm": 0.007690189406275749, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.11884961884961885, | |
| "grad_norm": 78.83476257324219, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.3904, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.1191961191961192, | |
| "grad_norm": 40.72224426269531, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.1344, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.1191961191961192, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6707317233085632, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3801.8243, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.11954261954261955, | |
| "grad_norm": 0.020250441506505013, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0034, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.11988911988911989, | |
| "grad_norm": 0.024622227996587753, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.12023562023562023, | |
| "grad_norm": 0.14477354288101196, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.8727, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.12058212058212059, | |
| "grad_norm": 8.177030563354492, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0608, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.12092862092862093, | |
| "grad_norm": 0.03661353141069412, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0004, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.12127512127512127, | |
| "grad_norm": 0.255051851272583, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0007, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.12162162162162163, | |
| "grad_norm": 0.0030992343090474606, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0002, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.12196812196812197, | |
| "grad_norm": 0.005417773965746164, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0024, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.12196812196812197, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6705958247184753, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.2575, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.12231462231462231, | |
| "grad_norm": 0.03200246021151543, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.3592, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.12266112266112267, | |
| "grad_norm": 99.44486236572266, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 1.044, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.12300762300762301, | |
| "grad_norm": 70.03866577148438, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.4255, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.12335412335412335, | |
| "grad_norm": 0.028057171031832695, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.12370062370062371, | |
| "grad_norm": 207.3563232421875, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 3.7546, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.12404712404712405, | |
| "grad_norm": 0.0021035911049693823, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.0001, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.12439362439362439, | |
| "grad_norm": 98.69009399414062, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 2.157, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.12474012474012475, | |
| "grad_norm": 47.113651275634766, | |
| "learning_rate": 2.3454812889548966e-08, | |
| "loss": 0.1673, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.12474012474012475, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6703583002090454, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.4945, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.12508662508662508, | |
| "grad_norm": 98.36994934082031, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 3.9073, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.12543312543312543, | |
| "grad_norm": 0.0256715826690197, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.0689, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.1257796257796258, | |
| "grad_norm": 0.0007734175305813551, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.12612612612612611, | |
| "grad_norm": 0.04009055346250534, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.1743, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.12647262647262647, | |
| "grad_norm": 0.00048400016385130584, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.6207, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.12681912681912683, | |
| "grad_norm": 96.37062072753906, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.0242, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.12716562716562715, | |
| "grad_norm": 101.28919982910156, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.2, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.1275121275121275, | |
| "grad_norm": 99.32202911376953, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.3346, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.1275121275121275, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6703282594680786, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.7507, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.12785862785862787, | |
| "grad_norm": 0.006389938294887543, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 109.00679779052734, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.6368, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.12855162855162855, | |
| "grad_norm": 92.6392822265625, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.152, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.1288981288981289, | |
| "grad_norm": 0.07208535820245743, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.12924462924462923, | |
| "grad_norm": 0.04709033668041229, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.1295911295911296, | |
| "grad_norm": 0.021052101626992226, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.12993762993762994, | |
| "grad_norm": 0.29711541533470154, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.0028, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.13028413028413027, | |
| "grad_norm": 0.04428931698203087, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0004, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.13028413028413027, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6703124642372131, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3803.2777, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.13063063063063063, | |
| "grad_norm": 0.3277018666267395, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.7119, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.13097713097713098, | |
| "grad_norm": 0.014452395960688591, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.1313236313236313, | |
| "grad_norm": 0.006934754550457001, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.7701, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.13167013167013167, | |
| "grad_norm": 0.014140780083835125, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0019, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.13201663201663202, | |
| "grad_norm": 0.06956882029771805, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.13236313236313235, | |
| "grad_norm": 97.27932739257812, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.4485, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.1327096327096327, | |
| "grad_norm": 97.00716400146484, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.0216, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.13305613305613306, | |
| "grad_norm": 0.0003238619538024068, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.13305613305613306, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702964305877686, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3803.644, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.1334026334026334, | |
| "grad_norm": 0.010110430419445038, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.13374913374913375, | |
| "grad_norm": 0.04101982340216637, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.1396, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.1340956340956341, | |
| "grad_norm": 0.004015587270259857, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.13444213444213443, | |
| "grad_norm": 0.25856757164001465, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.613, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.1347886347886348, | |
| "grad_norm": 17.537782669067383, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0506, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 0.02263958565890789, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.7827, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.13548163548163547, | |
| "grad_norm": 0.00452468590810895, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.13582813582813583, | |
| "grad_norm": 97.92658233642578, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.7204, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.13582813582813583, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702899932861328, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3803.7672, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.13617463617463618, | |
| "grad_norm": 5.811797142028809, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0154, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.1365211365211365, | |
| "grad_norm": 0.0009533663396723568, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.6813, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.13686763686763687, | |
| "grad_norm": 0.21574796736240387, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0006, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.13721413721413722, | |
| "grad_norm": 0.029973836615681648, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0027, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.13756063756063755, | |
| "grad_norm": 0.0021266574040055275, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.1379071379071379, | |
| "grad_norm": 1.722880482673645, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0046, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.13825363825363826, | |
| "grad_norm": 0.1858731210231781, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0415, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.1386001386001386, | |
| "grad_norm": 0.15411172807216644, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0006, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1386001386001386, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702878475189209, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.7764, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.13894663894663895, | |
| "grad_norm": 0.022141670808196068, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.033, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.1392931392931393, | |
| "grad_norm": 6.732369899749756, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0182, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.13963963963963963, | |
| "grad_norm": 0.018432870507240295, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.13998613998613998, | |
| "grad_norm": 45.47744369506836, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.1551, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.14033264033264034, | |
| "grad_norm": 99.37589263916016, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.0688, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.14067914067914067, | |
| "grad_norm": 96.54620361328125, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.242, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.14102564102564102, | |
| "grad_norm": 37.72175979614258, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.1738, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.14137214137214138, | |
| "grad_norm": 0.07114993035793304, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0005, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.14137214137214138, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702972054481506, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.7247, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.1417186417186417, | |
| "grad_norm": 0.011865837499499321, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.6889, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.14206514206514206, | |
| "grad_norm": 0.008003904484212399, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0004, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.14241164241164242, | |
| "grad_norm": 0.012314215302467346, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.14275814275814275, | |
| "grad_norm": 14.464447021484375, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0418, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.1431046431046431, | |
| "grad_norm": 0.0005685996729880571, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.14345114345114346, | |
| "grad_norm": 0.08294668048620224, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0974, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.1437976437976438, | |
| "grad_norm": 0.041373468935489655, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.14414414414414414, | |
| "grad_norm": 0.04561450332403183, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0003, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.14414414414414414, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702831387519836, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.3171, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.1444906444906445, | |
| "grad_norm": 0.10258769989013672, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0004, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.14483714483714483, | |
| "grad_norm": 0.01617802493274212, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.14518364518364518, | |
| "grad_norm": 1.6996452808380127, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.1919, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.14553014553014554, | |
| "grad_norm": 0.003590661333873868, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.14587664587664587, | |
| "grad_norm": 13.620148658752441, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0395, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.14622314622314622, | |
| "grad_norm": 1.1218122243881226, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.6076, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.14656964656964658, | |
| "grad_norm": 1.5158343315124512, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.3674, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.1469161469161469, | |
| "grad_norm": 0.009215708822011948, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0003, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.1469161469161469, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702794432640076, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.8989, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.14726264726264726, | |
| "grad_norm": 90.3687973022461, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.6296, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.14760914760914762, | |
| "grad_norm": 0.01208855863660574, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0328, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.14795564795564795, | |
| "grad_norm": 0.09359738975763321, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.2666, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.1483021483021483, | |
| "grad_norm": 0.0012692202581092715, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.14864864864864866, | |
| "grad_norm": 0.02175830490887165, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.14899514899514898, | |
| "grad_norm": 60.99715042114258, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.9586, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.14934164934164934, | |
| "grad_norm": 0.07070188969373703, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.9788, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.1496881496881497, | |
| "grad_norm": 0.05645833909511566, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0004, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.1496881496881497, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702664494514465, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.2679, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.15003465003465002, | |
| "grad_norm": 0.04621017724275589, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.15038115038115038, | |
| "grad_norm": 98.49105072021484, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.8225, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.15072765072765074, | |
| "grad_norm": 0.5321390628814697, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0015, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.15107415107415106, | |
| "grad_norm": 0.09718296676874161, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0006, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.15142065142065142, | |
| "grad_norm": 0.001202503452077508, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.7257, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.15176715176715178, | |
| "grad_norm": 0.02575511857867241, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0279, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.1521136521136521, | |
| "grad_norm": 0.4885219633579254, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0013, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.15246015246015246, | |
| "grad_norm": 0.019609682261943817, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.15246015246015246, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702576279640198, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.8587, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.15280665280665282, | |
| "grad_norm": 0.0061721052043139935, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.15315315315315314, | |
| "grad_norm": 0.013560596853494644, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0007, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.1534996534996535, | |
| "grad_norm": 0.3585527837276459, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0009, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 2.086585521697998, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.3566, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.15419265419265418, | |
| "grad_norm": 0.023040562868118286, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.15453915453915454, | |
| "grad_norm": 0.017716137692332268, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0099, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.1548856548856549, | |
| "grad_norm": 137.6589813232422, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.9727, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.15523215523215522, | |
| "grad_norm": 0.09003114700317383, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.674, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.15523215523215522, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702753305435181, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.4603, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.15557865557865558, | |
| "grad_norm": 0.014279451221227646, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.1168, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.15592515592515593, | |
| "grad_norm": 3.479095220565796, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0089, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.15627165627165626, | |
| "grad_norm": 0.09242203831672668, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.8641, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.15661815661815662, | |
| "grad_norm": 0.01231786422431469, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.15696465696465697, | |
| "grad_norm": 0.10587727278470993, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0006, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.1573111573111573, | |
| "grad_norm": 70.37655639648438, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.2845, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.15765765765765766, | |
| "grad_norm": 19.5317325592041, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.3898, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.158004158004158, | |
| "grad_norm": 0.010684994980692863, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.158004158004158, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702710390090942, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3803.9983, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.15835065835065834, | |
| "grad_norm": 47.22987365722656, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.3641, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.1586971586971587, | |
| "grad_norm": 0.014747441746294498, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.15904365904365905, | |
| "grad_norm": 57.34768295288086, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 6.9391, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.15939015939015938, | |
| "grad_norm": 0.011359051801264286, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.9423, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.15973665973665974, | |
| "grad_norm": 0.0191196296364069, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.1600831600831601, | |
| "grad_norm": 0.665374219417572, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0018, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.16042966042966042, | |
| "grad_norm": 0.0511230044066906, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.16077616077616078, | |
| "grad_norm": 0.04708551988005638, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.16077616077616078, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702331900596619, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3805.3893, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.16112266112266113, | |
| "grad_norm": 102.17916870117188, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.7039, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.16146916146916146, | |
| "grad_norm": 0.40711769461631775, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0011, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.16181566181566182, | |
| "grad_norm": 90.60123443603516, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.5507, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.16216216216216217, | |
| "grad_norm": 0.001160971587523818, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.1625086625086625, | |
| "grad_norm": 4.936448574066162, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0126, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.16285516285516285, | |
| "grad_norm": 0.0048201605677604675, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.1632016632016632, | |
| "grad_norm": 0.0016580702504143119, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.8544, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.16354816354816354, | |
| "grad_norm": 0.019202932715415955, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0005, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.16354816354816354, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702118515968323, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3805.2809, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.1638946638946639, | |
| "grad_norm": 0.028465600684285164, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.16424116424116425, | |
| "grad_norm": 0.009240957908332348, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.16458766458766458, | |
| "grad_norm": 101.55481719970703, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.9137, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.16493416493416493, | |
| "grad_norm": 0.03400309756398201, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.1652806652806653, | |
| "grad_norm": 0.03532765433192253, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0018, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.16562716562716562, | |
| "grad_norm": 0.0010142261162400246, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.0985, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.16597366597366597, | |
| "grad_norm": 0.0018879002891480923, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0004, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.16632016632016633, | |
| "grad_norm": 99.82229614257812, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.0414, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.16632016632016633, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6702096462249756, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3806.2291, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 0.024355348199605942, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.167013167013167, | |
| "grad_norm": 0.0004158661758992821, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.16735966735966737, | |
| "grad_norm": 0.020543742924928665, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.1677061677061677, | |
| "grad_norm": 0.08691411465406418, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.16805266805266805, | |
| "grad_norm": 7.804852485656738, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.1703, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1683991683991684, | |
| "grad_norm": 0.007481154054403305, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.16874566874566874, | |
| "grad_norm": 102.13117218017578, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.2312, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.1690921690921691, | |
| "grad_norm": 49.337467193603516, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.1639, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.1690921690921691, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6701992154121399, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3805.4338, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.16943866943866945, | |
| "grad_norm": 91.43649291992188, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.643, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.16978516978516978, | |
| "grad_norm": 0.013101785443723202, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.17013167013167013, | |
| "grad_norm": 0.19214844703674316, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.8884, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.1704781704781705, | |
| "grad_norm": 99.21739959716797, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.0275, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.17082467082467082, | |
| "grad_norm": 0.042131222784519196, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.17117117117117117, | |
| "grad_norm": 0.005484807770699263, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.1135, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.17151767151767153, | |
| "grad_norm": 72.73301696777344, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.31, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.17186417186417186, | |
| "grad_norm": 102.85774230957031, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.533, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.17186417186417186, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6701642870903015, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.1444, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.1722106722106722, | |
| "grad_norm": 5.119304656982422, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.1034, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.17255717255717257, | |
| "grad_norm": 0.05094608664512634, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0011, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.1729036729036729, | |
| "grad_norm": 0.018024293705821037, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.17325017325017325, | |
| "grad_norm": 0.012675195932388306, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1735966735966736, | |
| "grad_norm": 101.12162780761719, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.4301, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.17394317394317393, | |
| "grad_norm": 0.08050279319286346, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0004, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.1742896742896743, | |
| "grad_norm": 0.004985997918993235, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.17463617463617465, | |
| "grad_norm": 0.0335320308804512, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.17463617463617465, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6701443195343018, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3803.1697, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.17498267498267497, | |
| "grad_norm": 58.16429138183594, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.2296, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.17532917532917533, | |
| "grad_norm": 0.015778416767716408, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0024, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.17567567567567569, | |
| "grad_norm": 0.007109211757779121, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.176022176022176, | |
| "grad_norm": 0.001206032931804657, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.17636867636867637, | |
| "grad_norm": 0.0016432058764621615, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.3836, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.17671517671517672, | |
| "grad_norm": 0.0008347645052708685, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.17706167706167705, | |
| "grad_norm": 0.03109140321612358, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0004, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.1774081774081774, | |
| "grad_norm": 0.0419655367732048, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.5482, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.1774081774081774, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6701238751411438, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3804.5554, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.17775467775467776, | |
| "grad_norm": 0.032015491276979446, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.1781011781011781, | |
| "grad_norm": 0.0072747585363686085, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0011, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.17844767844767845, | |
| "grad_norm": 0.22382956743240356, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0142, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.1787941787941788, | |
| "grad_norm": 9.476286888122559, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.041, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.17914067914067913, | |
| "grad_norm": 61.077667236328125, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 4.2437, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.1794871794871795, | |
| "grad_norm": 0.008154085837304592, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.9777, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.17983367983367984, | |
| "grad_norm": 0.6985426545143127, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.8364, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.18018018018018017, | |
| "grad_norm": 0.012879273854196072, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0002, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.18018018018018017, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6700866222381592, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3806.3018, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.18052668052668053, | |
| "grad_norm": 98.58509826660156, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.8793, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.18087318087318088, | |
| "grad_norm": 0.18119540810585022, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0005, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.1812196812196812, | |
| "grad_norm": 0.866092324256897, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.6971, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.18156618156618157, | |
| "grad_norm": 105.97624206542969, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.1413, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.18191268191268192, | |
| "grad_norm": 20.15652084350586, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.9196, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.18225918225918225, | |
| "grad_norm": 0.022677650675177574, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.2512, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.1826056826056826, | |
| "grad_norm": 0.011547055095434189, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.2487, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.18295218295218296, | |
| "grad_norm": 0.012869571335613728, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.18295218295218296, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6700414419174194, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3803.3482, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.1832986832986833, | |
| "grad_norm": 0.03256943076848984, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.18364518364518365, | |
| "grad_norm": 0.031881775707006454, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0076, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.183991683991684, | |
| "grad_norm": 2.0490903854370117, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.3692, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.18433818433818433, | |
| "grad_norm": 103.4950180053711, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.1407, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.18468468468468469, | |
| "grad_norm": 17.134321212768555, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0473, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.18503118503118504, | |
| "grad_norm": 0.029201500117778778, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0009, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.18537768537768537, | |
| "grad_norm": 0.01404550950974226, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0005, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.18572418572418573, | |
| "grad_norm": 0.020491065457463264, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0457, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.18572418572418573, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6700317859649658, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3804.41, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.18607068607068608, | |
| "grad_norm": 0.025645218789577484, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0027, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.1864171864171864, | |
| "grad_norm": 0.014836100861430168, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0001, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.18676368676368676, | |
| "grad_norm": 0.026932615786790848, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.992, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.18711018711018712, | |
| "grad_norm": 6.111782550811768, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0163, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.18745668745668745, | |
| "grad_norm": 106.83539581298828, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 2.4242, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.1878031878031878, | |
| "grad_norm": 0.020867686718702316, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0007, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.18814968814968816, | |
| "grad_norm": 0.005603364668786526, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 0.0, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.1884961884961885, | |
| "grad_norm": 0.012470322661101818, | |
| "learning_rate": 2.345481288954897e-09, | |
| "loss": 1.0732, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.1884961884961885, | |
| "eval_1_ratio_diff": 0.000779423226812126, | |
| "eval_accuracy": 0.8838659392049883, | |
| "eval_f1": 0.8838659392049883, | |
| "eval_loss": 0.6700374484062195, | |
| "eval_precision": 0.883177570093458, | |
| "eval_recall": 0.8845553822152886, | |
| "eval_runtime": 3802.6858, | |
| "eval_samples_per_second": 0.337, | |
| "eval_steps_per_second": 0.169, | |
| "step": 1088 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 46176, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 64, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 100, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 41 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.20498247008256e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |