| { | |
| "best_global_step": 67000, | |
| "best_metric": 1.3910651206970215, | |
| "best_model_checkpoint": "/home/auguste/Desktop/eDNA/TeleoClassification/scripts/DNABert2/experiments/masking_training/outputs/masking_teleo/checkpoints/checkpoint-67000", | |
| "epoch": 108.06451612903226, | |
| "eval_steps": 1000, | |
| "global_step": 67000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0016129032258064516, | |
| "grad_norm": 107.56168365478516, | |
| "learning_rate": 2e-05, | |
| "loss": 7.9233, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 16.32627296447754, | |
| "learning_rate": 1.9785161290322584e-05, | |
| "loss": 3.0779, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "eval_loss": 2.738837718963623, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.2955, | |
| "eval_samples_per_second": 1766.611, | |
| "eval_steps_per_second": 111.682, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.225806451612903, | |
| "grad_norm": 12.881124496459961, | |
| "learning_rate": 1.9570107526881724e-05, | |
| "loss": 2.506, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.225806451612903, | |
| "eval_loss": 2.4902684688568115, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.2968, | |
| "eval_samples_per_second": 1758.892, | |
| "eval_steps_per_second": 111.194, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.838709677419355, | |
| "grad_norm": 12.914713859558105, | |
| "learning_rate": 1.935505376344086e-05, | |
| "loss": 2.734, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.838709677419355, | |
| "eval_loss": 2.305058479309082, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3006, | |
| "eval_samples_per_second": 1736.761, | |
| "eval_steps_per_second": 109.795, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.451612903225806, | |
| "grad_norm": 13.617836952209473, | |
| "learning_rate": 1.914e-05, | |
| "loss": 2.2267, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 6.451612903225806, | |
| "eval_loss": 2.3899621963500977, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3004, | |
| "eval_samples_per_second": 1737.628, | |
| "eval_steps_per_second": 109.85, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.064516129032258, | |
| "grad_norm": 11.493875503540039, | |
| "learning_rate": 1.892494623655914e-05, | |
| "loss": 2.1095, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.064516129032258, | |
| "eval_loss": 2.1791865825653076, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.2998, | |
| "eval_samples_per_second": 1740.976, | |
| "eval_steps_per_second": 110.062, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 9.67741935483871, | |
| "grad_norm": 16.104379653930664, | |
| "learning_rate": 1.870989247311828e-05, | |
| "loss": 1.9622, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 9.67741935483871, | |
| "eval_loss": 2.0534751415252686, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3144, | |
| "eval_samples_per_second": 1660.314, | |
| "eval_steps_per_second": 104.962, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 11.290322580645162, | |
| "grad_norm": 15.933501243591309, | |
| "learning_rate": 1.8494838709677422e-05, | |
| "loss": 1.8713, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 11.290322580645162, | |
| "eval_loss": 2.1255111694335938, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.31, | |
| "eval_samples_per_second": 1684.034, | |
| "eval_steps_per_second": 106.462, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 12.903225806451612, | |
| "grad_norm": 9.397466659545898, | |
| "learning_rate": 1.8279784946236562e-05, | |
| "loss": 1.7906, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 12.903225806451612, | |
| "eval_loss": 1.9397249221801758, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3026, | |
| "eval_samples_per_second": 1724.803, | |
| "eval_steps_per_second": 109.039, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 14.516129032258064, | |
| "grad_norm": 14.291478157043457, | |
| "learning_rate": 1.8064731182795702e-05, | |
| "loss": 1.7149, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 14.516129032258064, | |
| "eval_loss": 1.8910889625549316, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3013, | |
| "eval_samples_per_second": 1732.385, | |
| "eval_steps_per_second": 109.519, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 16.129032258064516, | |
| "grad_norm": 15.776030540466309, | |
| "learning_rate": 1.784967741935484e-05, | |
| "loss": 1.634, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 16.129032258064516, | |
| "eval_loss": 1.893878698348999, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3023, | |
| "eval_samples_per_second": 1726.506, | |
| "eval_steps_per_second": 109.147, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 17.741935483870968, | |
| "grad_norm": 12.53177547454834, | |
| "learning_rate": 1.763462365591398e-05, | |
| "loss": 1.5991, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 17.741935483870968, | |
| "eval_loss": 1.8701565265655518, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3035, | |
| "eval_samples_per_second": 1720.089, | |
| "eval_steps_per_second": 108.741, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 19.35483870967742, | |
| "grad_norm": 13.62909984588623, | |
| "learning_rate": 1.741956989247312e-05, | |
| "loss": 1.5008, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 19.35483870967742, | |
| "eval_loss": 1.7572582960128784, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3051, | |
| "eval_samples_per_second": 1710.701, | |
| "eval_steps_per_second": 108.148, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 20.967741935483872, | |
| "grad_norm": 13.886764526367188, | |
| "learning_rate": 1.720451612903226e-05, | |
| "loss": 1.4469, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 20.967741935483872, | |
| "eval_loss": 1.7456613779067993, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3048, | |
| "eval_samples_per_second": 1712.389, | |
| "eval_steps_per_second": 108.254, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 22.580645161290324, | |
| "grad_norm": 16.04749870300293, | |
| "learning_rate": 1.6989462365591397e-05, | |
| "loss": 1.404, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 22.580645161290324, | |
| "eval_loss": 1.7826714515686035, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3034, | |
| "eval_samples_per_second": 1720.509, | |
| "eval_steps_per_second": 108.768, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 24.193548387096776, | |
| "grad_norm": 14.932185173034668, | |
| "learning_rate": 1.6774408602150537e-05, | |
| "loss": 1.3552, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 24.193548387096776, | |
| "eval_loss": 1.7234201431274414, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3061, | |
| "eval_samples_per_second": 1705.173, | |
| "eval_steps_per_second": 107.798, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 25.806451612903224, | |
| "grad_norm": 8.178566932678223, | |
| "learning_rate": 1.6559354838709676e-05, | |
| "loss": 1.313, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 25.806451612903224, | |
| "eval_loss": 1.8201613426208496, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3194, | |
| "eval_samples_per_second": 1634.314, | |
| "eval_steps_per_second": 103.319, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 27.419354838709676, | |
| "grad_norm": 16.086894989013672, | |
| "learning_rate": 1.634430107526882e-05, | |
| "loss": 1.2751, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 27.419354838709676, | |
| "eval_loss": 1.6344752311706543, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3053, | |
| "eval_samples_per_second": 1709.792, | |
| "eval_steps_per_second": 108.09, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 29.032258064516128, | |
| "grad_norm": 9.854013442993164, | |
| "learning_rate": 1.612924731182796e-05, | |
| "loss": 1.2377, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 29.032258064516128, | |
| "eval_loss": 1.6381661891937256, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.343, | |
| "eval_samples_per_second": 1521.702, | |
| "eval_steps_per_second": 96.2, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 30.64516129032258, | |
| "grad_norm": 13.270648956298828, | |
| "learning_rate": 1.59141935483871e-05, | |
| "loss": 1.1772, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 30.64516129032258, | |
| "eval_loss": 1.6601710319519043, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3037, | |
| "eval_samples_per_second": 1718.661, | |
| "eval_steps_per_second": 108.651, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 32.25806451612903, | |
| "grad_norm": 20.389537811279297, | |
| "learning_rate": 1.569913978494624e-05, | |
| "loss": 1.176, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 32.25806451612903, | |
| "eval_loss": 1.6632287502288818, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3045, | |
| "eval_samples_per_second": 1714.128, | |
| "eval_steps_per_second": 108.364, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 33.87096774193548, | |
| "grad_norm": 11.229137420654297, | |
| "learning_rate": 1.548408602150538e-05, | |
| "loss": 1.1184, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 33.87096774193548, | |
| "eval_loss": 1.7555991411209106, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3043, | |
| "eval_samples_per_second": 1715.561, | |
| "eval_steps_per_second": 108.455, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 35.483870967741936, | |
| "grad_norm": 10.823155403137207, | |
| "learning_rate": 1.5269032258064518e-05, | |
| "loss": 1.0793, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 35.483870967741936, | |
| "eval_loss": 1.6087384223937988, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3059, | |
| "eval_samples_per_second": 1706.414, | |
| "eval_steps_per_second": 107.877, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 37.096774193548384, | |
| "grad_norm": 6.54916524887085, | |
| "learning_rate": 1.5053978494623658e-05, | |
| "loss": 1.0632, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 37.096774193548384, | |
| "eval_loss": 1.6815119981765747, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3044, | |
| "eval_samples_per_second": 1714.683, | |
| "eval_steps_per_second": 108.399, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 38.70967741935484, | |
| "grad_norm": 14.550293922424316, | |
| "learning_rate": 1.4838924731182798e-05, | |
| "loss": 1.0185, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 38.70967741935484, | |
| "eval_loss": 1.6611889600753784, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3028, | |
| "eval_samples_per_second": 1724.12, | |
| "eval_steps_per_second": 108.996, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 40.32258064516129, | |
| "grad_norm": 14.825828552246094, | |
| "learning_rate": 1.4623870967741937e-05, | |
| "loss": 1.0148, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 40.32258064516129, | |
| "eval_loss": 1.5314302444458008, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3061, | |
| "eval_samples_per_second": 1705.416, | |
| "eval_steps_per_second": 107.814, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 41.935483870967744, | |
| "grad_norm": 15.808582305908203, | |
| "learning_rate": 1.4408817204301075e-05, | |
| "loss": 0.9492, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 41.935483870967744, | |
| "eval_loss": 1.717032790184021, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3131, | |
| "eval_samples_per_second": 1667.386, | |
| "eval_steps_per_second": 105.409, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 43.54838709677419, | |
| "grad_norm": 13.56778621673584, | |
| "learning_rate": 1.4193763440860215e-05, | |
| "loss": 0.9352, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 43.54838709677419, | |
| "eval_loss": 1.631635069847107, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3065, | |
| "eval_samples_per_second": 1703.186, | |
| "eval_steps_per_second": 107.673, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 45.16129032258065, | |
| "grad_norm": 14.375411987304688, | |
| "learning_rate": 1.3978709677419355e-05, | |
| "loss": 0.9287, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 45.16129032258065, | |
| "eval_loss": 1.643862247467041, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3075, | |
| "eval_samples_per_second": 1697.381, | |
| "eval_steps_per_second": 107.306, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 46.774193548387096, | |
| "grad_norm": 12.451338768005371, | |
| "learning_rate": 1.3763655913978495e-05, | |
| "loss": 0.9052, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 46.774193548387096, | |
| "eval_loss": 1.4976590871810913, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3151, | |
| "eval_samples_per_second": 1656.651, | |
| "eval_steps_per_second": 104.731, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 48.38709677419355, | |
| "grad_norm": 15.790621757507324, | |
| "learning_rate": 1.3548602150537636e-05, | |
| "loss": 0.8897, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 48.38709677419355, | |
| "eval_loss": 1.544758915901184, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3045, | |
| "eval_samples_per_second": 1714.222, | |
| "eval_steps_per_second": 108.37, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 15.337139129638672, | |
| "learning_rate": 1.3333548387096776e-05, | |
| "loss": 0.9353, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 1.7019206285476685, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3181, | |
| "eval_samples_per_second": 1640.743, | |
| "eval_steps_per_second": 103.725, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 51.61290322580645, | |
| "grad_norm": 17.48087501525879, | |
| "learning_rate": 1.3118494623655916e-05, | |
| "loss": 0.8976, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 51.61290322580645, | |
| "eval_loss": 1.6256884336471558, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3131, | |
| "eval_samples_per_second": 1667.367, | |
| "eval_steps_per_second": 105.408, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 53.225806451612904, | |
| "grad_norm": 15.387638092041016, | |
| "learning_rate": 1.2903440860215055e-05, | |
| "loss": 0.8414, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 53.225806451612904, | |
| "eval_loss": 1.5139249563217163, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3191, | |
| "eval_samples_per_second": 1635.972, | |
| "eval_steps_per_second": 103.424, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 54.83870967741935, | |
| "grad_norm": 15.2994384765625, | |
| "learning_rate": 1.2688387096774195e-05, | |
| "loss": 0.7897, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 54.83870967741935, | |
| "eval_loss": 1.7013849020004272, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3268, | |
| "eval_samples_per_second": 1597.145, | |
| "eval_steps_per_second": 100.969, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 56.45161290322581, | |
| "grad_norm": 14.40909481048584, | |
| "learning_rate": 1.2473333333333335e-05, | |
| "loss": 0.8627, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 56.45161290322581, | |
| "eval_loss": 1.7141073942184448, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3089, | |
| "eval_samples_per_second": 1689.899, | |
| "eval_steps_per_second": 106.833, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 58.064516129032256, | |
| "grad_norm": 19.243818283081055, | |
| "learning_rate": 1.2258279569892474e-05, | |
| "loss": 0.9135, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 58.064516129032256, | |
| "eval_loss": 1.678747296333313, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3278, | |
| "eval_samples_per_second": 1592.41, | |
| "eval_steps_per_second": 100.67, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 59.67741935483871, | |
| "grad_norm": 14.35431957244873, | |
| "learning_rate": 1.2043225806451614e-05, | |
| "loss": 0.9226, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 59.67741935483871, | |
| "eval_loss": 1.9941015243530273, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3066, | |
| "eval_samples_per_second": 1702.667, | |
| "eval_steps_per_second": 107.64, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 61.29032258064516, | |
| "grad_norm": 16.02369499206543, | |
| "learning_rate": 1.1828172043010752e-05, | |
| "loss": 0.8849, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 61.29032258064516, | |
| "eval_loss": 1.5911988019943237, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3059, | |
| "eval_samples_per_second": 1706.321, | |
| "eval_steps_per_second": 107.871, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 62.903225806451616, | |
| "grad_norm": 24.164094924926758, | |
| "learning_rate": 1.1613118279569892e-05, | |
| "loss": 0.7974, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 62.903225806451616, | |
| "eval_loss": 1.5700287818908691, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3059, | |
| "eval_samples_per_second": 1706.437, | |
| "eval_steps_per_second": 107.878, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 64.51612903225806, | |
| "grad_norm": 10.7676420211792, | |
| "learning_rate": 1.1398064516129033e-05, | |
| "loss": 0.7892, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 64.51612903225806, | |
| "eval_loss": 1.6208666563034058, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3135, | |
| "eval_samples_per_second": 1665.325, | |
| "eval_steps_per_second": 105.279, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 66.12903225806451, | |
| "grad_norm": 8.90040111541748, | |
| "learning_rate": 1.1183010752688173e-05, | |
| "loss": 0.7728, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 66.12903225806451, | |
| "eval_loss": 1.5275108814239502, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3064, | |
| "eval_samples_per_second": 1703.395, | |
| "eval_steps_per_second": 107.686, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 67.74193548387096, | |
| "grad_norm": 16.836742401123047, | |
| "learning_rate": 1.0967956989247313e-05, | |
| "loss": 0.7309, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 67.74193548387096, | |
| "eval_loss": 1.6568617820739746, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3052, | |
| "eval_samples_per_second": 1710.328, | |
| "eval_steps_per_second": 108.124, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 69.35483870967742, | |
| "grad_norm": 16.19956398010254, | |
| "learning_rate": 1.0752903225806453e-05, | |
| "loss": 0.6891, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 69.35483870967742, | |
| "eval_loss": 1.4376003742218018, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3272, | |
| "eval_samples_per_second": 1595.464, | |
| "eval_steps_per_second": 100.863, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 70.96774193548387, | |
| "grad_norm": 19.571664810180664, | |
| "learning_rate": 1.0537849462365592e-05, | |
| "loss": 0.6732, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 70.96774193548387, | |
| "eval_loss": 1.6094655990600586, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3144, | |
| "eval_samples_per_second": 1660.491, | |
| "eval_steps_per_second": 104.974, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 72.58064516129032, | |
| "grad_norm": 11.60450267791748, | |
| "learning_rate": 1.0322795698924732e-05, | |
| "loss": 0.6475, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 72.58064516129032, | |
| "eval_loss": 1.569161295890808, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3241, | |
| "eval_samples_per_second": 1610.77, | |
| "eval_steps_per_second": 101.83, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 74.19354838709677, | |
| "grad_norm": 14.973388671875, | |
| "learning_rate": 1.0107741935483872e-05, | |
| "loss": 0.674, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 74.19354838709677, | |
| "eval_loss": 1.4532381296157837, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3117, | |
| "eval_samples_per_second": 1674.469, | |
| "eval_steps_per_second": 105.857, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 75.80645161290323, | |
| "grad_norm": 19.416486740112305, | |
| "learning_rate": 9.892688172043012e-06, | |
| "loss": 0.6339, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 75.80645161290323, | |
| "eval_loss": 1.5601801872253418, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3055, | |
| "eval_samples_per_second": 1708.783, | |
| "eval_steps_per_second": 108.027, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 77.41935483870968, | |
| "grad_norm": 12.237533569335938, | |
| "learning_rate": 9.677634408602151e-06, | |
| "loss": 0.628, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 77.41935483870968, | |
| "eval_loss": 1.5352447032928467, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3243, | |
| "eval_samples_per_second": 1609.448, | |
| "eval_steps_per_second": 101.747, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 79.03225806451613, | |
| "grad_norm": 8.90131664276123, | |
| "learning_rate": 9.462580645161291e-06, | |
| "loss": 0.6123, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 79.03225806451613, | |
| "eval_loss": 1.6023005247116089, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3064, | |
| "eval_samples_per_second": 1703.925, | |
| "eval_steps_per_second": 107.719, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 80.64516129032258, | |
| "grad_norm": 19.542125701904297, | |
| "learning_rate": 9.24752688172043e-06, | |
| "loss": 0.5913, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 80.64516129032258, | |
| "eval_loss": 1.4985138177871704, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3143, | |
| "eval_samples_per_second": 1660.843, | |
| "eval_steps_per_second": 104.996, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 82.25806451612904, | |
| "grad_norm": 15.9403715133667, | |
| "learning_rate": 9.03247311827957e-06, | |
| "loss": 0.5919, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 82.25806451612904, | |
| "eval_loss": 1.557279109954834, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3138, | |
| "eval_samples_per_second": 1663.684, | |
| "eval_steps_per_second": 105.175, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 83.87096774193549, | |
| "grad_norm": 16.341463088989258, | |
| "learning_rate": 8.81741935483871e-06, | |
| "loss": 0.5849, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 83.87096774193549, | |
| "eval_loss": 1.744088888168335, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3074, | |
| "eval_samples_per_second": 1698.241, | |
| "eval_steps_per_second": 107.36, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 85.48387096774194, | |
| "grad_norm": 17.496572494506836, | |
| "learning_rate": 8.60236559139785e-06, | |
| "loss": 0.5798, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 85.48387096774194, | |
| "eval_loss": 1.5605759620666504, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3312, | |
| "eval_samples_per_second": 1576.078, | |
| "eval_steps_per_second": 99.637, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 87.09677419354838, | |
| "grad_norm": 22.154132843017578, | |
| "learning_rate": 8.38731182795699e-06, | |
| "loss": 0.5627, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 87.09677419354838, | |
| "eval_loss": 1.486401081085205, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3046, | |
| "eval_samples_per_second": 1713.463, | |
| "eval_steps_per_second": 108.322, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 88.70967741935483, | |
| "grad_norm": 12.007641792297363, | |
| "learning_rate": 8.17225806451613e-06, | |
| "loss": 0.5926, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 88.70967741935483, | |
| "eval_loss": 1.533622145652771, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3189, | |
| "eval_samples_per_second": 1636.753, | |
| "eval_steps_per_second": 103.473, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 90.3225806451613, | |
| "grad_norm": 16.921255111694336, | |
| "learning_rate": 7.957204301075269e-06, | |
| "loss": 0.5737, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 90.3225806451613, | |
| "eval_loss": 1.595588207244873, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3248, | |
| "eval_samples_per_second": 1607.001, | |
| "eval_steps_per_second": 101.592, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 91.93548387096774, | |
| "grad_norm": 14.567840576171875, | |
| "learning_rate": 7.74215053763441e-06, | |
| "loss": 0.5521, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 91.93548387096774, | |
| "eval_loss": 1.6286988258361816, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.308, | |
| "eval_samples_per_second": 1694.658, | |
| "eval_steps_per_second": 107.134, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 93.54838709677419, | |
| "grad_norm": 7.83158016204834, | |
| "learning_rate": 7.5270967741935486e-06, | |
| "loss": 0.5672, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 93.54838709677419, | |
| "eval_loss": 1.6612709760665894, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3047, | |
| "eval_samples_per_second": 1713.282, | |
| "eval_steps_per_second": 108.311, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 95.16129032258064, | |
| "grad_norm": 20.766202926635742, | |
| "learning_rate": 7.312043010752688e-06, | |
| "loss": 0.5685, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 95.16129032258064, | |
| "eval_loss": 1.5319266319274902, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3061, | |
| "eval_samples_per_second": 1705.367, | |
| "eval_steps_per_second": 107.811, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 96.7741935483871, | |
| "grad_norm": 13.834534645080566, | |
| "learning_rate": 7.096989247311829e-06, | |
| "loss": 0.5394, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 96.7741935483871, | |
| "eval_loss": 1.5068557262420654, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3061, | |
| "eval_samples_per_second": 1705.255, | |
| "eval_steps_per_second": 107.803, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 98.38709677419355, | |
| "grad_norm": 9.130626678466797, | |
| "learning_rate": 6.881935483870969e-06, | |
| "loss": 0.5095, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 98.38709677419355, | |
| "eval_loss": 1.4926313161849976, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3074, | |
| "eval_samples_per_second": 1698.19, | |
| "eval_steps_per_second": 107.357, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "grad_norm": 18.79903793334961, | |
| "learning_rate": 6.666881720430108e-06, | |
| "loss": 0.5327, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 1.4378135204315186, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3146, | |
| "eval_samples_per_second": 1659.02, | |
| "eval_steps_per_second": 104.881, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 101.61290322580645, | |
| "grad_norm": 17.528038024902344, | |
| "learning_rate": 6.451827956989248e-06, | |
| "loss": 0.5108, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 101.61290322580645, | |
| "eval_loss": 1.4716895818710327, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3118, | |
| "eval_samples_per_second": 1673.899, | |
| "eval_steps_per_second": 105.821, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 103.2258064516129, | |
| "grad_norm": 9.862174034118652, | |
| "learning_rate": 6.236774193548387e-06, | |
| "loss": 0.4874, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 103.2258064516129, | |
| "eval_loss": 1.519917368888855, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3105, | |
| "eval_samples_per_second": 1681.31, | |
| "eval_steps_per_second": 106.29, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 104.83870967741936, | |
| "grad_norm": 11.85350513458252, | |
| "learning_rate": 6.0217204301075275e-06, | |
| "loss": 0.4856, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 104.83870967741936, | |
| "eval_loss": 1.5175796747207642, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3143, | |
| "eval_samples_per_second": 1661.035, | |
| "eval_steps_per_second": 105.008, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 106.45161290322581, | |
| "grad_norm": 21.145742416381836, | |
| "learning_rate": 5.806666666666667e-06, | |
| "loss": 0.4665, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 106.45161290322581, | |
| "eval_loss": 1.5837030410766602, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3108, | |
| "eval_samples_per_second": 1679.611, | |
| "eval_steps_per_second": 106.182, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 108.06451612903226, | |
| "grad_norm": 8.358002662658691, | |
| "learning_rate": 5.591612903225807e-06, | |
| "loss": 0.4846, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 108.06451612903226, | |
| "eval_loss": 1.3910651206970215, | |
| "eval_model_preparation_time": 0.0012, | |
| "eval_runtime": 0.3115, | |
| "eval_samples_per_second": 1676.029, | |
| "eval_steps_per_second": 105.956, | |
| "step": 67000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 93000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 150, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 80, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1978332035428352e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |