| { |
| "best_metric": 0.7115384615384616, |
| "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV4/checkpoint-230", |
| "epoch": 30.0, |
| "eval_steps": 500, |
| "global_step": 690, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 10.513816833496094, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 3.2338, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 12.824058532714844, |
| "learning_rate": 1.7142857142857142e-05, |
| "loss": 3.1112, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.4423076923076923, |
| "eval_loss": 1.46155846118927, |
| "eval_runtime": 1.3036, |
| "eval_samples_per_second": 39.89, |
| "eval_steps_per_second": 3.068, |
| "step": 23 |
| }, |
| { |
| "epoch": 1.3043478260869565, |
| "grad_norm": 29.83488655090332, |
| "learning_rate": 2.5714285714285714e-05, |
| "loss": 2.8604, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.7391304347826086, |
| "grad_norm": 16.40433120727539, |
| "learning_rate": 2.9770992366412214e-05, |
| "loss": 2.4301, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.38461538461538464, |
| "eval_loss": 1.337827205657959, |
| "eval_runtime": 0.9543, |
| "eval_samples_per_second": 54.49, |
| "eval_steps_per_second": 4.192, |
| "step": 46 |
| }, |
| { |
| "epoch": 2.1739130434782608, |
| "grad_norm": 41.61374282836914, |
| "learning_rate": 2.931297709923664e-05, |
| "loss": 2.0128, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.608695652173913, |
| "grad_norm": 39.92607879638672, |
| "learning_rate": 2.885496183206107e-05, |
| "loss": 1.8107, |
| "step": 60 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.4423076923076923, |
| "eval_loss": 1.1497355699539185, |
| "eval_runtime": 1.3328, |
| "eval_samples_per_second": 39.014, |
| "eval_steps_per_second": 3.001, |
| "step": 69 |
| }, |
| { |
| "epoch": 3.0434782608695654, |
| "grad_norm": 29.89019775390625, |
| "learning_rate": 2.8396946564885498e-05, |
| "loss": 1.7832, |
| "step": 70 |
| }, |
| { |
| "epoch": 3.4782608695652173, |
| "grad_norm": 24.518857955932617, |
| "learning_rate": 2.7938931297709925e-05, |
| "loss": 1.5113, |
| "step": 80 |
| }, |
| { |
| "epoch": 3.9130434782608696, |
| "grad_norm": 18.705516815185547, |
| "learning_rate": 2.7480916030534352e-05, |
| "loss": 1.3272, |
| "step": 90 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.5, |
| "eval_loss": 1.2176709175109863, |
| "eval_runtime": 0.9735, |
| "eval_samples_per_second": 53.418, |
| "eval_steps_per_second": 4.109, |
| "step": 92 |
| }, |
| { |
| "epoch": 4.3478260869565215, |
| "grad_norm": 31.572586059570312, |
| "learning_rate": 2.702290076335878e-05, |
| "loss": 1.2364, |
| "step": 100 |
| }, |
| { |
| "epoch": 4.782608695652174, |
| "grad_norm": 22.11482810974121, |
| "learning_rate": 2.656488549618321e-05, |
| "loss": 1.2039, |
| "step": 110 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.5576923076923077, |
| "eval_loss": 1.1250420808792114, |
| "eval_runtime": 0.9391, |
| "eval_samples_per_second": 55.369, |
| "eval_steps_per_second": 4.259, |
| "step": 115 |
| }, |
| { |
| "epoch": 5.217391304347826, |
| "grad_norm": 55.77997589111328, |
| "learning_rate": 2.6106870229007633e-05, |
| "loss": 1.1647, |
| "step": 120 |
| }, |
| { |
| "epoch": 5.6521739130434785, |
| "grad_norm": 34.06718444824219, |
| "learning_rate": 2.564885496183206e-05, |
| "loss": 1.0311, |
| "step": 130 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.5576923076923077, |
| "eval_loss": 1.0659610033035278, |
| "eval_runtime": 1.3032, |
| "eval_samples_per_second": 39.903, |
| "eval_steps_per_second": 3.069, |
| "step": 138 |
| }, |
| { |
| "epoch": 6.086956521739131, |
| "grad_norm": 41.274539947509766, |
| "learning_rate": 2.5190839694656487e-05, |
| "loss": 0.9508, |
| "step": 140 |
| }, |
| { |
| "epoch": 6.521739130434782, |
| "grad_norm": 40.79165267944336, |
| "learning_rate": 2.4732824427480917e-05, |
| "loss": 0.7912, |
| "step": 150 |
| }, |
| { |
| "epoch": 6.956521739130435, |
| "grad_norm": 39.19831848144531, |
| "learning_rate": 2.4274809160305344e-05, |
| "loss": 1.0515, |
| "step": 160 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.5, |
| "eval_loss": 1.224168062210083, |
| "eval_runtime": 0.957, |
| "eval_samples_per_second": 54.335, |
| "eval_steps_per_second": 4.18, |
| "step": 161 |
| }, |
| { |
| "epoch": 7.391304347826087, |
| "grad_norm": 31.34825897216797, |
| "learning_rate": 2.381679389312977e-05, |
| "loss": 0.8947, |
| "step": 170 |
| }, |
| { |
| "epoch": 7.826086956521739, |
| "grad_norm": 31.671894073486328, |
| "learning_rate": 2.3358778625954198e-05, |
| "loss": 0.8709, |
| "step": 180 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.5961538461538461, |
| "eval_loss": 1.0952348709106445, |
| "eval_runtime": 0.9678, |
| "eval_samples_per_second": 53.732, |
| "eval_steps_per_second": 4.133, |
| "step": 184 |
| }, |
| { |
| "epoch": 8.26086956521739, |
| "grad_norm": 25.144756317138672, |
| "learning_rate": 2.2900763358778628e-05, |
| "loss": 0.7413, |
| "step": 190 |
| }, |
| { |
| "epoch": 8.695652173913043, |
| "grad_norm": 40.311012268066406, |
| "learning_rate": 2.2442748091603055e-05, |
| "loss": 0.677, |
| "step": 200 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.5384615384615384, |
| "eval_loss": 1.103259801864624, |
| "eval_runtime": 1.3517, |
| "eval_samples_per_second": 38.47, |
| "eval_steps_per_second": 2.959, |
| "step": 207 |
| }, |
| { |
| "epoch": 9.130434782608695, |
| "grad_norm": 39.030181884765625, |
| "learning_rate": 2.198473282442748e-05, |
| "loss": 0.6599, |
| "step": 210 |
| }, |
| { |
| "epoch": 9.565217391304348, |
| "grad_norm": 63.20571517944336, |
| "learning_rate": 2.152671755725191e-05, |
| "loss": 0.5162, |
| "step": 220 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 46.92601776123047, |
| "learning_rate": 2.1068702290076335e-05, |
| "loss": 0.6763, |
| "step": 230 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.7115384615384616, |
| "eval_loss": 0.9550628662109375, |
| "eval_runtime": 0.995, |
| "eval_samples_per_second": 52.264, |
| "eval_steps_per_second": 4.02, |
| "step": 230 |
| }, |
| { |
| "epoch": 10.434782608695652, |
| "grad_norm": 40.177490234375, |
| "learning_rate": 2.0610687022900766e-05, |
| "loss": 0.6229, |
| "step": 240 |
| }, |
| { |
| "epoch": 10.869565217391305, |
| "grad_norm": 26.802276611328125, |
| "learning_rate": 2.0152671755725193e-05, |
| "loss": 0.5749, |
| "step": 250 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.6346153846153846, |
| "eval_loss": 1.0428466796875, |
| "eval_runtime": 1.0415, |
| "eval_samples_per_second": 49.926, |
| "eval_steps_per_second": 3.84, |
| "step": 253 |
| }, |
| { |
| "epoch": 11.304347826086957, |
| "grad_norm": 17.216228485107422, |
| "learning_rate": 1.969465648854962e-05, |
| "loss": 0.5132, |
| "step": 260 |
| }, |
| { |
| "epoch": 11.73913043478261, |
| "grad_norm": 23.057968139648438, |
| "learning_rate": 1.9236641221374046e-05, |
| "loss": 0.4896, |
| "step": 270 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.6538461538461539, |
| "eval_loss": 1.098140835762024, |
| "eval_runtime": 0.965, |
| "eval_samples_per_second": 53.885, |
| "eval_steps_per_second": 4.145, |
| "step": 276 |
| }, |
| { |
| "epoch": 12.173913043478262, |
| "grad_norm": 61.902565002441406, |
| "learning_rate": 1.8778625954198473e-05, |
| "loss": 0.5783, |
| "step": 280 |
| }, |
| { |
| "epoch": 12.608695652173914, |
| "grad_norm": 13.046545028686523, |
| "learning_rate": 1.83206106870229e-05, |
| "loss": 0.4817, |
| "step": 290 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.4807692307692308, |
| "eval_loss": 1.3429468870162964, |
| "eval_runtime": 0.9645, |
| "eval_samples_per_second": 53.911, |
| "eval_steps_per_second": 4.147, |
| "step": 299 |
| }, |
| { |
| "epoch": 13.043478260869565, |
| "grad_norm": 21.68009376525879, |
| "learning_rate": 1.7862595419847327e-05, |
| "loss": 0.5016, |
| "step": 300 |
| }, |
| { |
| "epoch": 13.478260869565217, |
| "grad_norm": 36.084537506103516, |
| "learning_rate": 1.7404580152671754e-05, |
| "loss": 0.5528, |
| "step": 310 |
| }, |
| { |
| "epoch": 13.91304347826087, |
| "grad_norm": 23.846952438354492, |
| "learning_rate": 1.6946564885496184e-05, |
| "loss": 0.4264, |
| "step": 320 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.6153846153846154, |
| "eval_loss": 1.304045557975769, |
| "eval_runtime": 1.2031, |
| "eval_samples_per_second": 43.222, |
| "eval_steps_per_second": 3.325, |
| "step": 322 |
| }, |
| { |
| "epoch": 14.347826086956522, |
| "grad_norm": 27.06462860107422, |
| "learning_rate": 1.648854961832061e-05, |
| "loss": 0.4405, |
| "step": 330 |
| }, |
| { |
| "epoch": 14.782608695652174, |
| "grad_norm": 32.62549591064453, |
| "learning_rate": 1.6030534351145038e-05, |
| "loss": 0.5637, |
| "step": 340 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.4807692307692308, |
| "eval_loss": 1.2591590881347656, |
| "eval_runtime": 0.9518, |
| "eval_samples_per_second": 54.633, |
| "eval_steps_per_second": 4.203, |
| "step": 345 |
| }, |
| { |
| "epoch": 15.217391304347826, |
| "grad_norm": 26.797325134277344, |
| "learning_rate": 1.5572519083969465e-05, |
| "loss": 0.5098, |
| "step": 350 |
| }, |
| { |
| "epoch": 15.652173913043478, |
| "grad_norm": 23.297649383544922, |
| "learning_rate": 1.5114503816793892e-05, |
| "loss": 0.3846, |
| "step": 360 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.6153846153846154, |
| "eval_loss": 1.1848912239074707, |
| "eval_runtime": 0.96, |
| "eval_samples_per_second": 54.168, |
| "eval_steps_per_second": 4.167, |
| "step": 368 |
| }, |
| { |
| "epoch": 16.08695652173913, |
| "grad_norm": 27.464319229125977, |
| "learning_rate": 1.465648854961832e-05, |
| "loss": 0.4457, |
| "step": 370 |
| }, |
| { |
| "epoch": 16.52173913043478, |
| "grad_norm": 48.690948486328125, |
| "learning_rate": 1.4198473282442749e-05, |
| "loss": 0.3769, |
| "step": 380 |
| }, |
| { |
| "epoch": 16.956521739130434, |
| "grad_norm": 19.746826171875, |
| "learning_rate": 1.3740458015267176e-05, |
| "loss": 0.5337, |
| "step": 390 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.6346153846153846, |
| "eval_loss": 1.202493667602539, |
| "eval_runtime": 0.9605, |
| "eval_samples_per_second": 54.139, |
| "eval_steps_per_second": 4.165, |
| "step": 391 |
| }, |
| { |
| "epoch": 17.391304347826086, |
| "grad_norm": 32.50077819824219, |
| "learning_rate": 1.3282442748091605e-05, |
| "loss": 0.4225, |
| "step": 400 |
| }, |
| { |
| "epoch": 17.82608695652174, |
| "grad_norm": 8.899702072143555, |
| "learning_rate": 1.282442748091603e-05, |
| "loss": 0.34, |
| "step": 410 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.6346153846153846, |
| "eval_loss": 1.0893843173980713, |
| "eval_runtime": 0.9454, |
| "eval_samples_per_second": 55.003, |
| "eval_steps_per_second": 4.231, |
| "step": 414 |
| }, |
| { |
| "epoch": 18.26086956521739, |
| "grad_norm": 38.04591751098633, |
| "learning_rate": 1.2366412213740458e-05, |
| "loss": 0.2243, |
| "step": 420 |
| }, |
| { |
| "epoch": 18.695652173913043, |
| "grad_norm": 21.51419448852539, |
| "learning_rate": 1.1908396946564885e-05, |
| "loss": 0.3511, |
| "step": 430 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.6346153846153846, |
| "eval_loss": 1.214515209197998, |
| "eval_runtime": 0.9642, |
| "eval_samples_per_second": 53.931, |
| "eval_steps_per_second": 4.149, |
| "step": 437 |
| }, |
| { |
| "epoch": 19.130434782608695, |
| "grad_norm": 23.986791610717773, |
| "learning_rate": 1.1450381679389314e-05, |
| "loss": 0.3188, |
| "step": 440 |
| }, |
| { |
| "epoch": 19.565217391304348, |
| "grad_norm": 12.316106796264648, |
| "learning_rate": 1.099236641221374e-05, |
| "loss": 0.4559, |
| "step": 450 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 8.235713005065918, |
| "learning_rate": 1.0534351145038168e-05, |
| "loss": 0.2539, |
| "step": 460 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.6346153846153846, |
| "eval_loss": 1.1755187511444092, |
| "eval_runtime": 0.9612, |
| "eval_samples_per_second": 54.1, |
| "eval_steps_per_second": 4.162, |
| "step": 460 |
| }, |
| { |
| "epoch": 20.434782608695652, |
| "grad_norm": 10.826574325561523, |
| "learning_rate": 1.0076335877862596e-05, |
| "loss": 0.37, |
| "step": 470 |
| }, |
| { |
| "epoch": 20.869565217391305, |
| "grad_norm": 7.709938049316406, |
| "learning_rate": 9.618320610687023e-06, |
| "loss": 0.2683, |
| "step": 480 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.6730769230769231, |
| "eval_loss": 1.2358968257904053, |
| "eval_runtime": 1.3065, |
| "eval_samples_per_second": 39.801, |
| "eval_steps_per_second": 3.062, |
| "step": 483 |
| }, |
| { |
| "epoch": 21.304347826086957, |
| "grad_norm": 43.30666732788086, |
| "learning_rate": 9.16030534351145e-06, |
| "loss": 0.2744, |
| "step": 490 |
| }, |
| { |
| "epoch": 21.73913043478261, |
| "grad_norm": 40.226722717285156, |
| "learning_rate": 8.702290076335877e-06, |
| "loss": 0.3144, |
| "step": 500 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.6538461538461539, |
| "eval_loss": 1.2632596492767334, |
| "eval_runtime": 0.9336, |
| "eval_samples_per_second": 55.7, |
| "eval_steps_per_second": 4.285, |
| "step": 506 |
| }, |
| { |
| "epoch": 22.17391304347826, |
| "grad_norm": 22.006948471069336, |
| "learning_rate": 8.244274809160306e-06, |
| "loss": 0.3078, |
| "step": 510 |
| }, |
| { |
| "epoch": 22.608695652173914, |
| "grad_norm": 35.692108154296875, |
| "learning_rate": 7.786259541984733e-06, |
| "loss": 0.3249, |
| "step": 520 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.6346153846153846, |
| "eval_loss": 1.2980352640151978, |
| "eval_runtime": 0.9904, |
| "eval_samples_per_second": 52.503, |
| "eval_steps_per_second": 4.039, |
| "step": 529 |
| }, |
| { |
| "epoch": 23.043478260869566, |
| "grad_norm": 21.394447326660156, |
| "learning_rate": 7.32824427480916e-06, |
| "loss": 0.3136, |
| "step": 530 |
| }, |
| { |
| "epoch": 23.47826086956522, |
| "grad_norm": 22.745704650878906, |
| "learning_rate": 6.870229007633588e-06, |
| "loss": 0.3224, |
| "step": 540 |
| }, |
| { |
| "epoch": 23.91304347826087, |
| "grad_norm": 9.297228813171387, |
| "learning_rate": 6.412213740458015e-06, |
| "loss": 0.2363, |
| "step": 550 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.6538461538461539, |
| "eval_loss": 1.187163233757019, |
| "eval_runtime": 1.3252, |
| "eval_samples_per_second": 39.24, |
| "eval_steps_per_second": 3.018, |
| "step": 552 |
| }, |
| { |
| "epoch": 24.347826086956523, |
| "grad_norm": 18.777847290039062, |
| "learning_rate": 5.954198473282443e-06, |
| "loss": 0.2501, |
| "step": 560 |
| }, |
| { |
| "epoch": 24.782608695652176, |
| "grad_norm": 8.218061447143555, |
| "learning_rate": 5.49618320610687e-06, |
| "loss": 0.2876, |
| "step": 570 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.6923076923076923, |
| "eval_loss": 1.2377344369888306, |
| "eval_runtime": 0.958, |
| "eval_samples_per_second": 54.28, |
| "eval_steps_per_second": 4.175, |
| "step": 575 |
| }, |
| { |
| "epoch": 25.217391304347824, |
| "grad_norm": 35.46883773803711, |
| "learning_rate": 5.038167938931298e-06, |
| "loss": 0.1978, |
| "step": 580 |
| }, |
| { |
| "epoch": 25.652173913043477, |
| "grad_norm": 26.11338996887207, |
| "learning_rate": 4.580152671755725e-06, |
| "loss": 0.2694, |
| "step": 590 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.6538461538461539, |
| "eval_loss": 1.2695034742355347, |
| "eval_runtime": 0.9333, |
| "eval_samples_per_second": 55.716, |
| "eval_steps_per_second": 4.286, |
| "step": 598 |
| }, |
| { |
| "epoch": 26.08695652173913, |
| "grad_norm": 13.483443260192871, |
| "learning_rate": 4.122137404580153e-06, |
| "loss": 0.256, |
| "step": 600 |
| }, |
| { |
| "epoch": 26.52173913043478, |
| "grad_norm": 17.057382583618164, |
| "learning_rate": 3.66412213740458e-06, |
| "loss": 0.2429, |
| "step": 610 |
| }, |
| { |
| "epoch": 26.956521739130434, |
| "grad_norm": 21.891084671020508, |
| "learning_rate": 3.2061068702290075e-06, |
| "loss": 0.2307, |
| "step": 620 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.6730769230769231, |
| "eval_loss": 1.2481114864349365, |
| "eval_runtime": 1.0091, |
| "eval_samples_per_second": 51.533, |
| "eval_steps_per_second": 3.964, |
| "step": 621 |
| }, |
| { |
| "epoch": 27.391304347826086, |
| "grad_norm": 27.533578872680664, |
| "learning_rate": 2.748091603053435e-06, |
| "loss": 0.2291, |
| "step": 630 |
| }, |
| { |
| "epoch": 27.82608695652174, |
| "grad_norm": 18.34809684753418, |
| "learning_rate": 2.2900763358778625e-06, |
| "loss": 0.2508, |
| "step": 640 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.6730769230769231, |
| "eval_loss": 1.3111752271652222, |
| "eval_runtime": 0.9801, |
| "eval_samples_per_second": 53.056, |
| "eval_steps_per_second": 4.081, |
| "step": 644 |
| }, |
| { |
| "epoch": 28.26086956521739, |
| "grad_norm": 21.041872024536133, |
| "learning_rate": 1.83206106870229e-06, |
| "loss": 0.239, |
| "step": 650 |
| }, |
| { |
| "epoch": 28.695652173913043, |
| "grad_norm": 18.809953689575195, |
| "learning_rate": 1.3740458015267176e-06, |
| "loss": 0.3558, |
| "step": 660 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.6730769230769231, |
| "eval_loss": 1.3208931684494019, |
| "eval_runtime": 1.3538, |
| "eval_samples_per_second": 38.41, |
| "eval_steps_per_second": 2.955, |
| "step": 667 |
| }, |
| { |
| "epoch": 29.130434782608695, |
| "grad_norm": 39.28880310058594, |
| "learning_rate": 9.16030534351145e-07, |
| "loss": 0.2069, |
| "step": 670 |
| }, |
| { |
| "epoch": 29.565217391304348, |
| "grad_norm": 32.49592971801758, |
| "learning_rate": 4.580152671755725e-07, |
| "loss": 0.2021, |
| "step": 680 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 8.152503967285156, |
| "learning_rate": 0.0, |
| "loss": 0.2418, |
| "step": 690 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.6538461538461539, |
| "eval_loss": 1.3233000040054321, |
| "eval_runtime": 1.2554, |
| "eval_samples_per_second": 41.422, |
| "eval_steps_per_second": 3.186, |
| "step": 690 |
| }, |
| { |
| "epoch": 30.0, |
| "step": 690, |
| "total_flos": 7.115852825454182e+17, |
| "train_loss": 0.7116180352542711, |
| "train_runtime": 585.1112, |
| "train_samples_per_second": 37.378, |
| "train_steps_per_second": 1.179 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 690, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.115852825454182e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|