| { |
| "best_global_step": 67000, |
| "best_metric": 1.3910651206970215, |
| "best_model_checkpoint": "/home/auguste/Desktop/eDNA/TeleoClassification/scripts/DNABert2/experiments/masking_training/outputs/masking_teleo/checkpoints/checkpoint-67000", |
| "epoch": 108.06451612903226, |
| "eval_steps": 1000, |
| "global_step": 67000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0016129032258064516, |
| "grad_norm": 107.56168365478516, |
| "learning_rate": 2e-05, |
| "loss": 7.9233, |
| "step": 1 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "grad_norm": 16.32627296447754, |
| "learning_rate": 1.9785161290322584e-05, |
| "loss": 3.0779, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "eval_loss": 2.738837718963623, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.2955, |
| "eval_samples_per_second": 1766.611, |
| "eval_steps_per_second": 111.682, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.225806451612903, |
| "grad_norm": 12.881124496459961, |
| "learning_rate": 1.9570107526881724e-05, |
| "loss": 2.506, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.225806451612903, |
| "eval_loss": 2.4902684688568115, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.2968, |
| "eval_samples_per_second": 1758.892, |
| "eval_steps_per_second": 111.194, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.838709677419355, |
| "grad_norm": 12.914713859558105, |
| "learning_rate": 1.935505376344086e-05, |
| "loss": 2.734, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.838709677419355, |
| "eval_loss": 2.305058479309082, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3006, |
| "eval_samples_per_second": 1736.761, |
| "eval_steps_per_second": 109.795, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.451612903225806, |
| "grad_norm": 13.617836952209473, |
| "learning_rate": 1.914e-05, |
| "loss": 2.2267, |
| "step": 4000 |
| }, |
| { |
| "epoch": 6.451612903225806, |
| "eval_loss": 2.3899621963500977, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3004, |
| "eval_samples_per_second": 1737.628, |
| "eval_steps_per_second": 109.85, |
| "step": 4000 |
| }, |
| { |
| "epoch": 8.064516129032258, |
| "grad_norm": 11.493875503540039, |
| "learning_rate": 1.892494623655914e-05, |
| "loss": 2.1095, |
| "step": 5000 |
| }, |
| { |
| "epoch": 8.064516129032258, |
| "eval_loss": 2.1791865825653076, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.2998, |
| "eval_samples_per_second": 1740.976, |
| "eval_steps_per_second": 110.062, |
| "step": 5000 |
| }, |
| { |
| "epoch": 9.67741935483871, |
| "grad_norm": 16.104379653930664, |
| "learning_rate": 1.870989247311828e-05, |
| "loss": 1.9622, |
| "step": 6000 |
| }, |
| { |
| "epoch": 9.67741935483871, |
| "eval_loss": 2.0534751415252686, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3144, |
| "eval_samples_per_second": 1660.314, |
| "eval_steps_per_second": 104.962, |
| "step": 6000 |
| }, |
| { |
| "epoch": 11.290322580645162, |
| "grad_norm": 15.933501243591309, |
| "learning_rate": 1.8494838709677422e-05, |
| "loss": 1.8713, |
| "step": 7000 |
| }, |
| { |
| "epoch": 11.290322580645162, |
| "eval_loss": 2.1255111694335938, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.31, |
| "eval_samples_per_second": 1684.034, |
| "eval_steps_per_second": 106.462, |
| "step": 7000 |
| }, |
| { |
| "epoch": 12.903225806451612, |
| "grad_norm": 9.397466659545898, |
| "learning_rate": 1.8279784946236562e-05, |
| "loss": 1.7906, |
| "step": 8000 |
| }, |
| { |
| "epoch": 12.903225806451612, |
| "eval_loss": 1.9397249221801758, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3026, |
| "eval_samples_per_second": 1724.803, |
| "eval_steps_per_second": 109.039, |
| "step": 8000 |
| }, |
| { |
| "epoch": 14.516129032258064, |
| "grad_norm": 14.291478157043457, |
| "learning_rate": 1.8064731182795702e-05, |
| "loss": 1.7149, |
| "step": 9000 |
| }, |
| { |
| "epoch": 14.516129032258064, |
| "eval_loss": 1.8910889625549316, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3013, |
| "eval_samples_per_second": 1732.385, |
| "eval_steps_per_second": 109.519, |
| "step": 9000 |
| }, |
| { |
| "epoch": 16.129032258064516, |
| "grad_norm": 15.776030540466309, |
| "learning_rate": 1.784967741935484e-05, |
| "loss": 1.634, |
| "step": 10000 |
| }, |
| { |
| "epoch": 16.129032258064516, |
| "eval_loss": 1.893878698348999, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3023, |
| "eval_samples_per_second": 1726.506, |
| "eval_steps_per_second": 109.147, |
| "step": 10000 |
| }, |
| { |
| "epoch": 17.741935483870968, |
| "grad_norm": 12.53177547454834, |
| "learning_rate": 1.763462365591398e-05, |
| "loss": 1.5991, |
| "step": 11000 |
| }, |
| { |
| "epoch": 17.741935483870968, |
| "eval_loss": 1.8701565265655518, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3035, |
| "eval_samples_per_second": 1720.089, |
| "eval_steps_per_second": 108.741, |
| "step": 11000 |
| }, |
| { |
| "epoch": 19.35483870967742, |
| "grad_norm": 13.62909984588623, |
| "learning_rate": 1.741956989247312e-05, |
| "loss": 1.5008, |
| "step": 12000 |
| }, |
| { |
| "epoch": 19.35483870967742, |
| "eval_loss": 1.7572582960128784, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3051, |
| "eval_samples_per_second": 1710.701, |
| "eval_steps_per_second": 108.148, |
| "step": 12000 |
| }, |
| { |
| "epoch": 20.967741935483872, |
| "grad_norm": 13.886764526367188, |
| "learning_rate": 1.720451612903226e-05, |
| "loss": 1.4469, |
| "step": 13000 |
| }, |
| { |
| "epoch": 20.967741935483872, |
| "eval_loss": 1.7456613779067993, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3048, |
| "eval_samples_per_second": 1712.389, |
| "eval_steps_per_second": 108.254, |
| "step": 13000 |
| }, |
| { |
| "epoch": 22.580645161290324, |
| "grad_norm": 16.04749870300293, |
| "learning_rate": 1.6989462365591397e-05, |
| "loss": 1.404, |
| "step": 14000 |
| }, |
| { |
| "epoch": 22.580645161290324, |
| "eval_loss": 1.7826714515686035, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3034, |
| "eval_samples_per_second": 1720.509, |
| "eval_steps_per_second": 108.768, |
| "step": 14000 |
| }, |
| { |
| "epoch": 24.193548387096776, |
| "grad_norm": 14.932185173034668, |
| "learning_rate": 1.6774408602150537e-05, |
| "loss": 1.3552, |
| "step": 15000 |
| }, |
| { |
| "epoch": 24.193548387096776, |
| "eval_loss": 1.7234201431274414, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3061, |
| "eval_samples_per_second": 1705.173, |
| "eval_steps_per_second": 107.798, |
| "step": 15000 |
| }, |
| { |
| "epoch": 25.806451612903224, |
| "grad_norm": 8.178566932678223, |
| "learning_rate": 1.6559354838709676e-05, |
| "loss": 1.313, |
| "step": 16000 |
| }, |
| { |
| "epoch": 25.806451612903224, |
| "eval_loss": 1.8201613426208496, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3194, |
| "eval_samples_per_second": 1634.314, |
| "eval_steps_per_second": 103.319, |
| "step": 16000 |
| }, |
| { |
| "epoch": 27.419354838709676, |
| "grad_norm": 16.086894989013672, |
| "learning_rate": 1.634430107526882e-05, |
| "loss": 1.2751, |
| "step": 17000 |
| }, |
| { |
| "epoch": 27.419354838709676, |
| "eval_loss": 1.6344752311706543, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3053, |
| "eval_samples_per_second": 1709.792, |
| "eval_steps_per_second": 108.09, |
| "step": 17000 |
| }, |
| { |
| "epoch": 29.032258064516128, |
| "grad_norm": 9.854013442993164, |
| "learning_rate": 1.612924731182796e-05, |
| "loss": 1.2377, |
| "step": 18000 |
| }, |
| { |
| "epoch": 29.032258064516128, |
| "eval_loss": 1.6381661891937256, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.343, |
| "eval_samples_per_second": 1521.702, |
| "eval_steps_per_second": 96.2, |
| "step": 18000 |
| }, |
| { |
| "epoch": 30.64516129032258, |
| "grad_norm": 13.270648956298828, |
| "learning_rate": 1.59141935483871e-05, |
| "loss": 1.1772, |
| "step": 19000 |
| }, |
| { |
| "epoch": 30.64516129032258, |
| "eval_loss": 1.6601710319519043, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3037, |
| "eval_samples_per_second": 1718.661, |
| "eval_steps_per_second": 108.651, |
| "step": 19000 |
| }, |
| { |
| "epoch": 32.25806451612903, |
| "grad_norm": 20.389537811279297, |
| "learning_rate": 1.569913978494624e-05, |
| "loss": 1.176, |
| "step": 20000 |
| }, |
| { |
| "epoch": 32.25806451612903, |
| "eval_loss": 1.6632287502288818, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3045, |
| "eval_samples_per_second": 1714.128, |
| "eval_steps_per_second": 108.364, |
| "step": 20000 |
| }, |
| { |
| "epoch": 33.87096774193548, |
| "grad_norm": 11.229137420654297, |
| "learning_rate": 1.548408602150538e-05, |
| "loss": 1.1184, |
| "step": 21000 |
| }, |
| { |
| "epoch": 33.87096774193548, |
| "eval_loss": 1.7555991411209106, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3043, |
| "eval_samples_per_second": 1715.561, |
| "eval_steps_per_second": 108.455, |
| "step": 21000 |
| }, |
| { |
| "epoch": 35.483870967741936, |
| "grad_norm": 10.823155403137207, |
| "learning_rate": 1.5269032258064518e-05, |
| "loss": 1.0793, |
| "step": 22000 |
| }, |
| { |
| "epoch": 35.483870967741936, |
| "eval_loss": 1.6087384223937988, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3059, |
| "eval_samples_per_second": 1706.414, |
| "eval_steps_per_second": 107.877, |
| "step": 22000 |
| }, |
| { |
| "epoch": 37.096774193548384, |
| "grad_norm": 6.54916524887085, |
| "learning_rate": 1.5053978494623658e-05, |
| "loss": 1.0632, |
| "step": 23000 |
| }, |
| { |
| "epoch": 37.096774193548384, |
| "eval_loss": 1.6815119981765747, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3044, |
| "eval_samples_per_second": 1714.683, |
| "eval_steps_per_second": 108.399, |
| "step": 23000 |
| }, |
| { |
| "epoch": 38.70967741935484, |
| "grad_norm": 14.550293922424316, |
| "learning_rate": 1.4838924731182798e-05, |
| "loss": 1.0185, |
| "step": 24000 |
| }, |
| { |
| "epoch": 38.70967741935484, |
| "eval_loss": 1.6611889600753784, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3028, |
| "eval_samples_per_second": 1724.12, |
| "eval_steps_per_second": 108.996, |
| "step": 24000 |
| }, |
| { |
| "epoch": 40.32258064516129, |
| "grad_norm": 14.825828552246094, |
| "learning_rate": 1.4623870967741937e-05, |
| "loss": 1.0148, |
| "step": 25000 |
| }, |
| { |
| "epoch": 40.32258064516129, |
| "eval_loss": 1.5314302444458008, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3061, |
| "eval_samples_per_second": 1705.416, |
| "eval_steps_per_second": 107.814, |
| "step": 25000 |
| }, |
| { |
| "epoch": 41.935483870967744, |
| "grad_norm": 15.808582305908203, |
| "learning_rate": 1.4408817204301075e-05, |
| "loss": 0.9492, |
| "step": 26000 |
| }, |
| { |
| "epoch": 41.935483870967744, |
| "eval_loss": 1.717032790184021, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3131, |
| "eval_samples_per_second": 1667.386, |
| "eval_steps_per_second": 105.409, |
| "step": 26000 |
| }, |
| { |
| "epoch": 43.54838709677419, |
| "grad_norm": 13.56778621673584, |
| "learning_rate": 1.4193763440860215e-05, |
| "loss": 0.9352, |
| "step": 27000 |
| }, |
| { |
| "epoch": 43.54838709677419, |
| "eval_loss": 1.631635069847107, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3065, |
| "eval_samples_per_second": 1703.186, |
| "eval_steps_per_second": 107.673, |
| "step": 27000 |
| }, |
| { |
| "epoch": 45.16129032258065, |
| "grad_norm": 14.375411987304688, |
| "learning_rate": 1.3978709677419355e-05, |
| "loss": 0.9287, |
| "step": 28000 |
| }, |
| { |
| "epoch": 45.16129032258065, |
| "eval_loss": 1.643862247467041, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3075, |
| "eval_samples_per_second": 1697.381, |
| "eval_steps_per_second": 107.306, |
| "step": 28000 |
| }, |
| { |
| "epoch": 46.774193548387096, |
| "grad_norm": 12.451338768005371, |
| "learning_rate": 1.3763655913978495e-05, |
| "loss": 0.9052, |
| "step": 29000 |
| }, |
| { |
| "epoch": 46.774193548387096, |
| "eval_loss": 1.4976590871810913, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3151, |
| "eval_samples_per_second": 1656.651, |
| "eval_steps_per_second": 104.731, |
| "step": 29000 |
| }, |
| { |
| "epoch": 48.38709677419355, |
| "grad_norm": 15.790621757507324, |
| "learning_rate": 1.3548602150537636e-05, |
| "loss": 0.8897, |
| "step": 30000 |
| }, |
| { |
| "epoch": 48.38709677419355, |
| "eval_loss": 1.544758915901184, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3045, |
| "eval_samples_per_second": 1714.222, |
| "eval_steps_per_second": 108.37, |
| "step": 30000 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 15.337139129638672, |
| "learning_rate": 1.3333548387096776e-05, |
| "loss": 0.9353, |
| "step": 31000 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_loss": 1.7019206285476685, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3181, |
| "eval_samples_per_second": 1640.743, |
| "eval_steps_per_second": 103.725, |
| "step": 31000 |
| }, |
| { |
| "epoch": 51.61290322580645, |
| "grad_norm": 17.48087501525879, |
| "learning_rate": 1.3118494623655916e-05, |
| "loss": 0.8976, |
| "step": 32000 |
| }, |
| { |
| "epoch": 51.61290322580645, |
| "eval_loss": 1.6256884336471558, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3131, |
| "eval_samples_per_second": 1667.367, |
| "eval_steps_per_second": 105.408, |
| "step": 32000 |
| }, |
| { |
| "epoch": 53.225806451612904, |
| "grad_norm": 15.387638092041016, |
| "learning_rate": 1.2903440860215055e-05, |
| "loss": 0.8414, |
| "step": 33000 |
| }, |
| { |
| "epoch": 53.225806451612904, |
| "eval_loss": 1.5139249563217163, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3191, |
| "eval_samples_per_second": 1635.972, |
| "eval_steps_per_second": 103.424, |
| "step": 33000 |
| }, |
| { |
| "epoch": 54.83870967741935, |
| "grad_norm": 15.2994384765625, |
| "learning_rate": 1.2688387096774195e-05, |
| "loss": 0.7897, |
| "step": 34000 |
| }, |
| { |
| "epoch": 54.83870967741935, |
| "eval_loss": 1.7013849020004272, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3268, |
| "eval_samples_per_second": 1597.145, |
| "eval_steps_per_second": 100.969, |
| "step": 34000 |
| }, |
| { |
| "epoch": 56.45161290322581, |
| "grad_norm": 14.40909481048584, |
| "learning_rate": 1.2473333333333335e-05, |
| "loss": 0.8627, |
| "step": 35000 |
| }, |
| { |
| "epoch": 56.45161290322581, |
| "eval_loss": 1.7141073942184448, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3089, |
| "eval_samples_per_second": 1689.899, |
| "eval_steps_per_second": 106.833, |
| "step": 35000 |
| }, |
| { |
| "epoch": 58.064516129032256, |
| "grad_norm": 19.243818283081055, |
| "learning_rate": 1.2258279569892474e-05, |
| "loss": 0.9135, |
| "step": 36000 |
| }, |
| { |
| "epoch": 58.064516129032256, |
| "eval_loss": 1.678747296333313, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3278, |
| "eval_samples_per_second": 1592.41, |
| "eval_steps_per_second": 100.67, |
| "step": 36000 |
| }, |
| { |
| "epoch": 59.67741935483871, |
| "grad_norm": 14.35431957244873, |
| "learning_rate": 1.2043225806451614e-05, |
| "loss": 0.9226, |
| "step": 37000 |
| }, |
| { |
| "epoch": 59.67741935483871, |
| "eval_loss": 1.9941015243530273, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3066, |
| "eval_samples_per_second": 1702.667, |
| "eval_steps_per_second": 107.64, |
| "step": 37000 |
| }, |
| { |
| "epoch": 61.29032258064516, |
| "grad_norm": 16.02369499206543, |
| "learning_rate": 1.1828172043010752e-05, |
| "loss": 0.8849, |
| "step": 38000 |
| }, |
| { |
| "epoch": 61.29032258064516, |
| "eval_loss": 1.5911988019943237, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3059, |
| "eval_samples_per_second": 1706.321, |
| "eval_steps_per_second": 107.871, |
| "step": 38000 |
| }, |
| { |
| "epoch": 62.903225806451616, |
| "grad_norm": 24.164094924926758, |
| "learning_rate": 1.1613118279569892e-05, |
| "loss": 0.7974, |
| "step": 39000 |
| }, |
| { |
| "epoch": 62.903225806451616, |
| "eval_loss": 1.5700287818908691, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3059, |
| "eval_samples_per_second": 1706.437, |
| "eval_steps_per_second": 107.878, |
| "step": 39000 |
| }, |
| { |
| "epoch": 64.51612903225806, |
| "grad_norm": 10.7676420211792, |
| "learning_rate": 1.1398064516129033e-05, |
| "loss": 0.7892, |
| "step": 40000 |
| }, |
| { |
| "epoch": 64.51612903225806, |
| "eval_loss": 1.6208666563034058, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3135, |
| "eval_samples_per_second": 1665.325, |
| "eval_steps_per_second": 105.279, |
| "step": 40000 |
| }, |
| { |
| "epoch": 66.12903225806451, |
| "grad_norm": 8.90040111541748, |
| "learning_rate": 1.1183010752688173e-05, |
| "loss": 0.7728, |
| "step": 41000 |
| }, |
| { |
| "epoch": 66.12903225806451, |
| "eval_loss": 1.5275108814239502, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3064, |
| "eval_samples_per_second": 1703.395, |
| "eval_steps_per_second": 107.686, |
| "step": 41000 |
| }, |
| { |
| "epoch": 67.74193548387096, |
| "grad_norm": 16.836742401123047, |
| "learning_rate": 1.0967956989247313e-05, |
| "loss": 0.7309, |
| "step": 42000 |
| }, |
| { |
| "epoch": 67.74193548387096, |
| "eval_loss": 1.6568617820739746, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3052, |
| "eval_samples_per_second": 1710.328, |
| "eval_steps_per_second": 108.124, |
| "step": 42000 |
| }, |
| { |
| "epoch": 69.35483870967742, |
| "grad_norm": 16.19956398010254, |
| "learning_rate": 1.0752903225806453e-05, |
| "loss": 0.6891, |
| "step": 43000 |
| }, |
| { |
| "epoch": 69.35483870967742, |
| "eval_loss": 1.4376003742218018, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3272, |
| "eval_samples_per_second": 1595.464, |
| "eval_steps_per_second": 100.863, |
| "step": 43000 |
| }, |
| { |
| "epoch": 70.96774193548387, |
| "grad_norm": 19.571664810180664, |
| "learning_rate": 1.0537849462365592e-05, |
| "loss": 0.6732, |
| "step": 44000 |
| }, |
| { |
| "epoch": 70.96774193548387, |
| "eval_loss": 1.6094655990600586, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3144, |
| "eval_samples_per_second": 1660.491, |
| "eval_steps_per_second": 104.974, |
| "step": 44000 |
| }, |
| { |
| "epoch": 72.58064516129032, |
| "grad_norm": 11.60450267791748, |
| "learning_rate": 1.0322795698924732e-05, |
| "loss": 0.6475, |
| "step": 45000 |
| }, |
| { |
| "epoch": 72.58064516129032, |
| "eval_loss": 1.569161295890808, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3241, |
| "eval_samples_per_second": 1610.77, |
| "eval_steps_per_second": 101.83, |
| "step": 45000 |
| }, |
| { |
| "epoch": 74.19354838709677, |
| "grad_norm": 14.973388671875, |
| "learning_rate": 1.0107741935483872e-05, |
| "loss": 0.674, |
| "step": 46000 |
| }, |
| { |
| "epoch": 74.19354838709677, |
| "eval_loss": 1.4532381296157837, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3117, |
| "eval_samples_per_second": 1674.469, |
| "eval_steps_per_second": 105.857, |
| "step": 46000 |
| }, |
| { |
| "epoch": 75.80645161290323, |
| "grad_norm": 19.416486740112305, |
| "learning_rate": 9.892688172043012e-06, |
| "loss": 0.6339, |
| "step": 47000 |
| }, |
| { |
| "epoch": 75.80645161290323, |
| "eval_loss": 1.5601801872253418, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3055, |
| "eval_samples_per_second": 1708.783, |
| "eval_steps_per_second": 108.027, |
| "step": 47000 |
| }, |
| { |
| "epoch": 77.41935483870968, |
| "grad_norm": 12.237533569335938, |
| "learning_rate": 9.677634408602151e-06, |
| "loss": 0.628, |
| "step": 48000 |
| }, |
| { |
| "epoch": 77.41935483870968, |
| "eval_loss": 1.5352447032928467, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3243, |
| "eval_samples_per_second": 1609.448, |
| "eval_steps_per_second": 101.747, |
| "step": 48000 |
| }, |
| { |
| "epoch": 79.03225806451613, |
| "grad_norm": 8.90131664276123, |
| "learning_rate": 9.462580645161291e-06, |
| "loss": 0.6123, |
| "step": 49000 |
| }, |
| { |
| "epoch": 79.03225806451613, |
| "eval_loss": 1.6023005247116089, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3064, |
| "eval_samples_per_second": 1703.925, |
| "eval_steps_per_second": 107.719, |
| "step": 49000 |
| }, |
| { |
| "epoch": 80.64516129032258, |
| "grad_norm": 19.542125701904297, |
| "learning_rate": 9.24752688172043e-06, |
| "loss": 0.5913, |
| "step": 50000 |
| }, |
| { |
| "epoch": 80.64516129032258, |
| "eval_loss": 1.4985138177871704, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3143, |
| "eval_samples_per_second": 1660.843, |
| "eval_steps_per_second": 104.996, |
| "step": 50000 |
| }, |
| { |
| "epoch": 82.25806451612904, |
| "grad_norm": 15.9403715133667, |
| "learning_rate": 9.03247311827957e-06, |
| "loss": 0.5919, |
| "step": 51000 |
| }, |
| { |
| "epoch": 82.25806451612904, |
| "eval_loss": 1.557279109954834, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3138, |
| "eval_samples_per_second": 1663.684, |
| "eval_steps_per_second": 105.175, |
| "step": 51000 |
| }, |
| { |
| "epoch": 83.87096774193549, |
| "grad_norm": 16.341463088989258, |
| "learning_rate": 8.81741935483871e-06, |
| "loss": 0.5849, |
| "step": 52000 |
| }, |
| { |
| "epoch": 83.87096774193549, |
| "eval_loss": 1.744088888168335, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3074, |
| "eval_samples_per_second": 1698.241, |
| "eval_steps_per_second": 107.36, |
| "step": 52000 |
| }, |
| { |
| "epoch": 85.48387096774194, |
| "grad_norm": 17.496572494506836, |
| "learning_rate": 8.60236559139785e-06, |
| "loss": 0.5798, |
| "step": 53000 |
| }, |
| { |
| "epoch": 85.48387096774194, |
| "eval_loss": 1.5605759620666504, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3312, |
| "eval_samples_per_second": 1576.078, |
| "eval_steps_per_second": 99.637, |
| "step": 53000 |
| }, |
| { |
| "epoch": 87.09677419354838, |
| "grad_norm": 22.154132843017578, |
| "learning_rate": 8.38731182795699e-06, |
| "loss": 0.5627, |
| "step": 54000 |
| }, |
| { |
| "epoch": 87.09677419354838, |
| "eval_loss": 1.486401081085205, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3046, |
| "eval_samples_per_second": 1713.463, |
| "eval_steps_per_second": 108.322, |
| "step": 54000 |
| }, |
| { |
| "epoch": 88.70967741935483, |
| "grad_norm": 12.007641792297363, |
| "learning_rate": 8.17225806451613e-06, |
| "loss": 0.5926, |
| "step": 55000 |
| }, |
| { |
| "epoch": 88.70967741935483, |
| "eval_loss": 1.533622145652771, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3189, |
| "eval_samples_per_second": 1636.753, |
| "eval_steps_per_second": 103.473, |
| "step": 55000 |
| }, |
| { |
| "epoch": 90.3225806451613, |
| "grad_norm": 16.921255111694336, |
| "learning_rate": 7.957204301075269e-06, |
| "loss": 0.5737, |
| "step": 56000 |
| }, |
| { |
| "epoch": 90.3225806451613, |
| "eval_loss": 1.595588207244873, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3248, |
| "eval_samples_per_second": 1607.001, |
| "eval_steps_per_second": 101.592, |
| "step": 56000 |
| }, |
| { |
| "epoch": 91.93548387096774, |
| "grad_norm": 14.567840576171875, |
| "learning_rate": 7.74215053763441e-06, |
| "loss": 0.5521, |
| "step": 57000 |
| }, |
| { |
| "epoch": 91.93548387096774, |
| "eval_loss": 1.6286988258361816, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.308, |
| "eval_samples_per_second": 1694.658, |
| "eval_steps_per_second": 107.134, |
| "step": 57000 |
| }, |
| { |
| "epoch": 93.54838709677419, |
| "grad_norm": 7.83158016204834, |
| "learning_rate": 7.5270967741935486e-06, |
| "loss": 0.5672, |
| "step": 58000 |
| }, |
| { |
| "epoch": 93.54838709677419, |
| "eval_loss": 1.6612709760665894, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3047, |
| "eval_samples_per_second": 1713.282, |
| "eval_steps_per_second": 108.311, |
| "step": 58000 |
| }, |
| { |
| "epoch": 95.16129032258064, |
| "grad_norm": 20.766202926635742, |
| "learning_rate": 7.312043010752688e-06, |
| "loss": 0.5685, |
| "step": 59000 |
| }, |
| { |
| "epoch": 95.16129032258064, |
| "eval_loss": 1.5319266319274902, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3061, |
| "eval_samples_per_second": 1705.367, |
| "eval_steps_per_second": 107.811, |
| "step": 59000 |
| }, |
| { |
| "epoch": 96.7741935483871, |
| "grad_norm": 13.834534645080566, |
| "learning_rate": 7.096989247311829e-06, |
| "loss": 0.5394, |
| "step": 60000 |
| }, |
| { |
| "epoch": 96.7741935483871, |
| "eval_loss": 1.5068557262420654, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3061, |
| "eval_samples_per_second": 1705.255, |
| "eval_steps_per_second": 107.803, |
| "step": 60000 |
| }, |
| { |
| "epoch": 98.38709677419355, |
| "grad_norm": 9.130626678466797, |
| "learning_rate": 6.881935483870969e-06, |
| "loss": 0.5095, |
| "step": 61000 |
| }, |
| { |
| "epoch": 98.38709677419355, |
| "eval_loss": 1.4926313161849976, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3074, |
| "eval_samples_per_second": 1698.19, |
| "eval_steps_per_second": 107.357, |
| "step": 61000 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 18.79903793334961, |
| "learning_rate": 6.666881720430108e-06, |
| "loss": 0.5327, |
| "step": 62000 |
| }, |
| { |
| "epoch": 100.0, |
| "eval_loss": 1.4378135204315186, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3146, |
| "eval_samples_per_second": 1659.02, |
| "eval_steps_per_second": 104.881, |
| "step": 62000 |
| }, |
| { |
| "epoch": 101.61290322580645, |
| "grad_norm": 17.528038024902344, |
| "learning_rate": 6.451827956989248e-06, |
| "loss": 0.5108, |
| "step": 63000 |
| }, |
| { |
| "epoch": 101.61290322580645, |
| "eval_loss": 1.4716895818710327, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3118, |
| "eval_samples_per_second": 1673.899, |
| "eval_steps_per_second": 105.821, |
| "step": 63000 |
| }, |
| { |
| "epoch": 103.2258064516129, |
| "grad_norm": 9.862174034118652, |
| "learning_rate": 6.236774193548387e-06, |
| "loss": 0.4874, |
| "step": 64000 |
| }, |
| { |
| "epoch": 103.2258064516129, |
| "eval_loss": 1.519917368888855, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3105, |
| "eval_samples_per_second": 1681.31, |
| "eval_steps_per_second": 106.29, |
| "step": 64000 |
| }, |
| { |
| "epoch": 104.83870967741936, |
| "grad_norm": 11.85350513458252, |
| "learning_rate": 6.0217204301075275e-06, |
| "loss": 0.4856, |
| "step": 65000 |
| }, |
| { |
| "epoch": 104.83870967741936, |
| "eval_loss": 1.5175796747207642, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3143, |
| "eval_samples_per_second": 1661.035, |
| "eval_steps_per_second": 105.008, |
| "step": 65000 |
| }, |
| { |
| "epoch": 106.45161290322581, |
| "grad_norm": 21.145742416381836, |
| "learning_rate": 5.806666666666667e-06, |
| "loss": 0.4665, |
| "step": 66000 |
| }, |
| { |
| "epoch": 106.45161290322581, |
| "eval_loss": 1.5837030410766602, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3108, |
| "eval_samples_per_second": 1679.611, |
| "eval_steps_per_second": 106.182, |
| "step": 66000 |
| }, |
| { |
| "epoch": 108.06451612903226, |
| "grad_norm": 8.358002662658691, |
| "learning_rate": 5.591612903225807e-06, |
| "loss": 0.4846, |
| "step": 67000 |
| }, |
| { |
| "epoch": 108.06451612903226, |
| "eval_loss": 1.3910651206970215, |
| "eval_model_preparation_time": 0.0012, |
| "eval_runtime": 0.3115, |
| "eval_samples_per_second": 1676.029, |
| "eval_steps_per_second": 105.956, |
| "step": 67000 |
| } |
| ], |
| "logging_steps": 1000, |
| "max_steps": 93000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 150, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 80, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1978332035428352e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|