| { | |
| "best_metric": 0.7608695652173914, | |
| "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-DMAE-da2-colab/checkpoint-180", | |
| "epoch": 39.108695652173914, | |
| "eval_steps": 500, | |
| "global_step": 440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 22.860248565673828, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 1.4149, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.9565217391304348, | |
| "eval_accuracy": 0.21739130434782608, | |
| "eval_loss": 1.3904842138290405, | |
| "eval_runtime": 0.7623, | |
| "eval_samples_per_second": 60.346, | |
| "eval_steps_per_second": 3.936, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 1.7608695652173914, | |
| "grad_norm": 21.9100341796875, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 1.3431, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.9347826086956523, | |
| "eval_accuracy": 0.30434782608695654, | |
| "eval_loss": 1.3828414678573608, | |
| "eval_runtime": 0.7435, | |
| "eval_samples_per_second": 61.87, | |
| "eval_steps_per_second": 4.035, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 2.6521739130434785, | |
| "grad_norm": 25.524051666259766, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 1.2396, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.9130434782608696, | |
| "eval_accuracy": 0.43478260869565216, | |
| "eval_loss": 1.2675390243530273, | |
| "eval_runtime": 0.7435, | |
| "eval_samples_per_second": 61.871, | |
| "eval_steps_per_second": 4.035, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 3.5434782608695654, | |
| "grad_norm": 39.9882926940918, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 1.1377, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 3.9782608695652173, | |
| "eval_accuracy": 0.34782608695652173, | |
| "eval_loss": 1.2067060470581055, | |
| "eval_runtime": 1.0097, | |
| "eval_samples_per_second": 45.56, | |
| "eval_steps_per_second": 2.971, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 4.434782608695652, | |
| "grad_norm": 32.264034271240234, | |
| "learning_rate": 3.93939393939394e-05, | |
| "loss": 1.0144, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 4.956521739130435, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.9060260057449341, | |
| "eval_runtime": 0.7487, | |
| "eval_samples_per_second": 61.443, | |
| "eval_steps_per_second": 4.007, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 5.326086956521739, | |
| "grad_norm": 38.01457977294922, | |
| "learning_rate": 3.838383838383839e-05, | |
| "loss": 0.9016, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 5.934782608695652, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.8024644255638123, | |
| "eval_runtime": 0.7779, | |
| "eval_samples_per_second": 59.132, | |
| "eval_steps_per_second": 3.856, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 6.217391304347826, | |
| "grad_norm": 36.355648040771484, | |
| "learning_rate": 3.7373737373737376e-05, | |
| "loss": 0.7941, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 6.913043478260869, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.7811539769172668, | |
| "eval_runtime": 0.8396, | |
| "eval_samples_per_second": 54.787, | |
| "eval_steps_per_second": 3.573, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 7.108695652173913, | |
| "grad_norm": 41.1641845703125, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 0.7031, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 7.978260869565218, | |
| "grad_norm": 51.79063415527344, | |
| "learning_rate": 3.535353535353536e-05, | |
| "loss": 0.6986, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 7.978260869565218, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 0.9441068768501282, | |
| "eval_runtime": 0.7666, | |
| "eval_samples_per_second": 60.006, | |
| "eval_steps_per_second": 3.913, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 8.869565217391305, | |
| "grad_norm": 71.17655181884766, | |
| "learning_rate": 3.434343434343435e-05, | |
| "loss": 0.6245, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 8.956521739130435, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.864142894744873, | |
| "eval_runtime": 0.7557, | |
| "eval_samples_per_second": 60.867, | |
| "eval_steps_per_second": 3.97, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 9.76086956521739, | |
| "grad_norm": 41.29151916503906, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.6044, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 9.934782608695652, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 0.8647807836532593, | |
| "eval_runtime": 0.7668, | |
| "eval_samples_per_second": 59.987, | |
| "eval_steps_per_second": 3.912, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 10.652173913043478, | |
| "grad_norm": 35.93144989013672, | |
| "learning_rate": 3.232323232323232e-05, | |
| "loss": 0.536, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 10.91304347826087, | |
| "eval_accuracy": 0.5869565217391305, | |
| "eval_loss": 0.8800004124641418, | |
| "eval_runtime": 0.7619, | |
| "eval_samples_per_second": 60.377, | |
| "eval_steps_per_second": 3.938, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 11.543478260869565, | |
| "grad_norm": 42.50069808959961, | |
| "learning_rate": 3.131313131313132e-05, | |
| "loss": 0.4825, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 11.978260869565217, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8387995958328247, | |
| "eval_runtime": 0.7698, | |
| "eval_samples_per_second": 59.754, | |
| "eval_steps_per_second": 3.897, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 12.434782608695652, | |
| "grad_norm": 82.49532318115234, | |
| "learning_rate": 3.0303030303030306e-05, | |
| "loss": 0.4972, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 12.956521739130435, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8763275146484375, | |
| "eval_runtime": 0.7588, | |
| "eval_samples_per_second": 60.62, | |
| "eval_steps_per_second": 3.953, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 13.326086956521738, | |
| "grad_norm": 44.642826080322266, | |
| "learning_rate": 2.9292929292929297e-05, | |
| "loss": 0.4284, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 13.934782608695652, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.8228161931037903, | |
| "eval_runtime": 1.0261, | |
| "eval_samples_per_second": 44.832, | |
| "eval_steps_per_second": 2.924, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 14.217391304347826, | |
| "grad_norm": 57.17488479614258, | |
| "learning_rate": 2.8282828282828285e-05, | |
| "loss": 0.3961, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 14.91304347826087, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8260169625282288, | |
| "eval_runtime": 0.8274, | |
| "eval_samples_per_second": 55.597, | |
| "eval_steps_per_second": 3.626, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 15.108695652173912, | |
| "grad_norm": 31.25395393371582, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 0.4208, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 15.978260869565217, | |
| "grad_norm": 42.941837310791016, | |
| "learning_rate": 2.6262626262626265e-05, | |
| "loss": 0.3877, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 15.978260869565217, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.9367595911026001, | |
| "eval_runtime": 0.7841, | |
| "eval_samples_per_second": 58.667, | |
| "eval_steps_per_second": 3.826, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 16.869565217391305, | |
| "grad_norm": 72.04235076904297, | |
| "learning_rate": 2.5252525252525253e-05, | |
| "loss": 0.3744, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 16.956521739130434, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.1220568418502808, | |
| "eval_runtime": 1.0796, | |
| "eval_samples_per_second": 42.606, | |
| "eval_steps_per_second": 2.779, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 17.76086956521739, | |
| "grad_norm": 43.762760162353516, | |
| "learning_rate": 2.4242424242424244e-05, | |
| "loss": 0.3266, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 17.934782608695652, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.0177233219146729, | |
| "eval_runtime": 0.7517, | |
| "eval_samples_per_second": 61.193, | |
| "eval_steps_per_second": 3.991, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 18.652173913043477, | |
| "grad_norm": 76.85489654541016, | |
| "learning_rate": 2.3232323232323232e-05, | |
| "loss": 0.3257, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 18.91304347826087, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.0300357341766357, | |
| "eval_runtime": 0.7634, | |
| "eval_samples_per_second": 60.254, | |
| "eval_steps_per_second": 3.93, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 19.543478260869566, | |
| "grad_norm": 40.13884353637695, | |
| "learning_rate": 2.2222222222222227e-05, | |
| "loss": 0.3164, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 19.97826086956522, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.134386658668518, | |
| "eval_runtime": 0.8709, | |
| "eval_samples_per_second": 52.817, | |
| "eval_steps_per_second": 3.445, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 20.434782608695652, | |
| "grad_norm": 50.51103591918945, | |
| "learning_rate": 2.121212121212121e-05, | |
| "loss": 0.2965, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 20.956521739130434, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.9283356070518494, | |
| "eval_runtime": 0.7697, | |
| "eval_samples_per_second": 59.76, | |
| "eval_steps_per_second": 3.897, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 21.32608695652174, | |
| "grad_norm": 40.85714340209961, | |
| "learning_rate": 2.0202020202020206e-05, | |
| "loss": 0.293, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 21.934782608695652, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.012825608253479, | |
| "eval_runtime": 0.7852, | |
| "eval_samples_per_second": 58.582, | |
| "eval_steps_per_second": 3.821, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 22.217391304347824, | |
| "grad_norm": 31.803544998168945, | |
| "learning_rate": 1.9191919191919194e-05, | |
| "loss": 0.2929, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 22.91304347826087, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 1.0449855327606201, | |
| "eval_runtime": 1.4131, | |
| "eval_samples_per_second": 32.553, | |
| "eval_steps_per_second": 2.123, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 23.108695652173914, | |
| "grad_norm": 44.02550506591797, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.2661, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 23.97826086956522, | |
| "grad_norm": 62.81007766723633, | |
| "learning_rate": 1.7171717171717173e-05, | |
| "loss": 0.2878, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 23.97826086956522, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.1481518745422363, | |
| "eval_runtime": 0.7494, | |
| "eval_samples_per_second": 61.383, | |
| "eval_steps_per_second": 4.003, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 24.869565217391305, | |
| "grad_norm": 46.08440017700195, | |
| "learning_rate": 1.616161616161616e-05, | |
| "loss": 0.2447, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 24.956521739130434, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.0715558528900146, | |
| "eval_runtime": 0.7605, | |
| "eval_samples_per_second": 60.483, | |
| "eval_steps_per_second": 3.945, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 25.76086956521739, | |
| "grad_norm": 65.74066162109375, | |
| "learning_rate": 1.5151515151515153e-05, | |
| "loss": 0.2601, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 25.934782608695652, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.0769968032836914, | |
| "eval_runtime": 0.9896, | |
| "eval_samples_per_second": 46.483, | |
| "eval_steps_per_second": 3.032, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 26.652173913043477, | |
| "grad_norm": 39.677207946777344, | |
| "learning_rate": 1.4141414141414143e-05, | |
| "loss": 0.2299, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 26.91304347826087, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 1.176945686340332, | |
| "eval_runtime": 1.0303, | |
| "eval_samples_per_second": 44.647, | |
| "eval_steps_per_second": 2.912, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 27.543478260869566, | |
| "grad_norm": 51.47335433959961, | |
| "learning_rate": 1.3131313131313132e-05, | |
| "loss": 0.2401, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 27.97826086956522, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.140651822090149, | |
| "eval_runtime": 0.7578, | |
| "eval_samples_per_second": 60.699, | |
| "eval_steps_per_second": 3.959, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 28.434782608695652, | |
| "grad_norm": 35.28546905517578, | |
| "learning_rate": 1.2121212121212122e-05, | |
| "loss": 0.2347, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 28.956521739130434, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.1928998231887817, | |
| "eval_runtime": 0.7598, | |
| "eval_samples_per_second": 60.544, | |
| "eval_steps_per_second": 3.949, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 29.32608695652174, | |
| "grad_norm": 34.29880905151367, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 0.2584, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 29.934782608695652, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.095699667930603, | |
| "eval_runtime": 0.7593, | |
| "eval_samples_per_second": 60.584, | |
| "eval_steps_per_second": 3.951, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 30.217391304347824, | |
| "grad_norm": 35.58168411254883, | |
| "learning_rate": 1.0101010101010103e-05, | |
| "loss": 0.2204, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 30.91304347826087, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.172129511833191, | |
| "eval_runtime": 1.009, | |
| "eval_samples_per_second": 45.59, | |
| "eval_steps_per_second": 2.973, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 31.108695652173914, | |
| "grad_norm": 40.2222900390625, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.2057, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 31.97826086956522, | |
| "grad_norm": 61.76468276977539, | |
| "learning_rate": 8.08080808080808e-06, | |
| "loss": 0.2031, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 31.97826086956522, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 1.0842841863632202, | |
| "eval_runtime": 0.7718, | |
| "eval_samples_per_second": 59.603, | |
| "eval_steps_per_second": 3.887, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 32.869565217391305, | |
| "grad_norm": 50.13585662841797, | |
| "learning_rate": 7.070707070707071e-06, | |
| "loss": 0.2241, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 32.95652173913044, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.1350224018096924, | |
| "eval_runtime": 0.7479, | |
| "eval_samples_per_second": 61.506, | |
| "eval_steps_per_second": 4.011, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 33.76086956521739, | |
| "grad_norm": 45.946346282958984, | |
| "learning_rate": 6.060606060606061e-06, | |
| "loss": 0.1798, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 33.93478260869565, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.2418912649154663, | |
| "eval_runtime": 0.8117, | |
| "eval_samples_per_second": 56.668, | |
| "eval_steps_per_second": 3.696, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 34.65217391304348, | |
| "grad_norm": 53.28645324707031, | |
| "learning_rate": 5.0505050505050515e-06, | |
| "loss": 0.2435, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 34.91304347826087, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.1522233486175537, | |
| "eval_runtime": 1.0835, | |
| "eval_samples_per_second": 42.454, | |
| "eval_steps_per_second": 2.769, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 35.54347826086956, | |
| "grad_norm": 119.8594970703125, | |
| "learning_rate": 4.04040404040404e-06, | |
| "loss": 0.1857, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 35.97826086956522, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.1206859350204468, | |
| "eval_runtime": 0.8139, | |
| "eval_samples_per_second": 56.519, | |
| "eval_steps_per_second": 3.686, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 36.43478260869565, | |
| "grad_norm": 62.90666198730469, | |
| "learning_rate": 3.0303030303030305e-06, | |
| "loss": 0.1889, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 36.95652173913044, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.1711089611053467, | |
| "eval_runtime": 0.8068, | |
| "eval_samples_per_second": 57.018, | |
| "eval_steps_per_second": 3.719, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 37.32608695652174, | |
| "grad_norm": 36.55961608886719, | |
| "learning_rate": 2.02020202020202e-06, | |
| "loss": 0.2043, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 37.93478260869565, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 1.1978377103805542, | |
| "eval_runtime": 0.8416, | |
| "eval_samples_per_second": 54.66, | |
| "eval_steps_per_second": 3.565, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 38.21739130434783, | |
| "grad_norm": 41.95515441894531, | |
| "learning_rate": 1.01010101010101e-06, | |
| "loss": 0.1951, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 38.91304347826087, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.2106621265411377, | |
| "eval_runtime": 1.1122, | |
| "eval_samples_per_second": 41.361, | |
| "eval_steps_per_second": 2.697, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 39.108695652173914, | |
| "grad_norm": 97.34293365478516, | |
| "learning_rate": 0.0, | |
| "loss": 0.1901, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 39.108695652173914, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 1.2108197212219238, | |
| "eval_runtime": 0.8571, | |
| "eval_samples_per_second": 53.667, | |
| "eval_steps_per_second": 3.5, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 39.108695652173914, | |
| "step": 440, | |
| "total_flos": 9.162177814462464e+17, | |
| "train_loss": 0.45710954666137693, | |
| "train_runtime": 931.6489, | |
| "train_samples_per_second": 30.956, | |
| "train_steps_per_second": 0.472 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.162177814462464e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |