| { |
| "best_metric": 0.12307874858379364, |
| "best_model_checkpoint": "./vit-base-lcdoctypev1_session2/checkpoint-495", |
| "epoch": 10.0, |
| "eval_steps": 5, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08, |
| "eval_accuracy": 0.9008264462809917, |
| "eval_loss": 0.3652230203151703, |
| "eval_runtime": 14.9109, |
| "eval_samples_per_second": 8.115, |
| "eval_steps_per_second": 1.073, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 14.786271095275879, |
| "learning_rate": 0.00019666666666666666, |
| "loss": 0.2494, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_accuracy": 0.8925619834710744, |
| "eval_loss": 0.4354330003261566, |
| "eval_runtime": 8.5868, |
| "eval_samples_per_second": 14.091, |
| "eval_steps_per_second": 1.863, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_accuracy": 0.8264462809917356, |
| "eval_loss": 0.6027403473854065, |
| "eval_runtime": 9.2069, |
| "eval_samples_per_second": 13.142, |
| "eval_steps_per_second": 1.738, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 4.415946960449219, |
| "learning_rate": 0.00019333333333333333, |
| "loss": 0.4714, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_accuracy": 0.8181818181818182, |
| "eval_loss": 0.6015945076942444, |
| "eval_runtime": 9.5688, |
| "eval_samples_per_second": 12.645, |
| "eval_steps_per_second": 1.672, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_accuracy": 0.8016528925619835, |
| "eval_loss": 0.6494660377502441, |
| "eval_runtime": 10.377, |
| "eval_samples_per_second": 11.66, |
| "eval_steps_per_second": 1.542, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 6.862515926361084, |
| "learning_rate": 0.00019, |
| "loss": 0.6257, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_accuracy": 0.7768595041322314, |
| "eval_loss": 0.6560969352722168, |
| "eval_runtime": 8.3435, |
| "eval_samples_per_second": 14.502, |
| "eval_steps_per_second": 1.918, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_accuracy": 0.8264462809917356, |
| "eval_loss": 0.43593400716781616, |
| "eval_runtime": 8.4257, |
| "eval_samples_per_second": 14.361, |
| "eval_steps_per_second": 1.899, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.0207698345184326, |
| "learning_rate": 0.0001866666666666667, |
| "loss": 0.3534, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_accuracy": 0.8760330578512396, |
| "eval_loss": 0.4082220494747162, |
| "eval_runtime": 8.58, |
| "eval_samples_per_second": 14.103, |
| "eval_steps_per_second": 1.865, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.75, |
| "eval_accuracy": 0.9173553719008265, |
| "eval_loss": 0.3042462468147278, |
| "eval_runtime": 8.562, |
| "eval_samples_per_second": 14.132, |
| "eval_steps_per_second": 1.869, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.4216089248657227, |
| "learning_rate": 0.00018333333333333334, |
| "loss": 0.2059, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_accuracy": 0.8842975206611571, |
| "eval_loss": 0.39185529947280884, |
| "eval_runtime": 9.1512, |
| "eval_samples_per_second": 13.222, |
| "eval_steps_per_second": 1.748, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.92, |
| "eval_accuracy": 0.7520661157024794, |
| "eval_loss": 0.9258511066436768, |
| "eval_runtime": 8.9229, |
| "eval_samples_per_second": 13.561, |
| "eval_steps_per_second": 1.793, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.3684077262878418, |
| "learning_rate": 0.00018033333333333334, |
| "loss": 0.3693, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8429752066115702, |
| "eval_loss": 0.5526923537254333, |
| "eval_runtime": 8.7728, |
| "eval_samples_per_second": 13.793, |
| "eval_steps_per_second": 1.824, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.08, |
| "eval_accuracy": 0.859504132231405, |
| "eval_loss": 0.34124353528022766, |
| "eval_runtime": 8.8789, |
| "eval_samples_per_second": 13.628, |
| "eval_steps_per_second": 1.802, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 4.804208278656006, |
| "learning_rate": 0.00017700000000000002, |
| "loss": 0.3436, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.17, |
| "eval_accuracy": 0.859504132231405, |
| "eval_loss": 0.39490050077438354, |
| "eval_runtime": 8.8248, |
| "eval_samples_per_second": 13.711, |
| "eval_steps_per_second": 1.813, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_accuracy": 0.8760330578512396, |
| "eval_loss": 0.4402238726615906, |
| "eval_runtime": 8.0873, |
| "eval_samples_per_second": 14.962, |
| "eval_steps_per_second": 1.978, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 8.084527015686035, |
| "learning_rate": 0.00017366666666666667, |
| "loss": 0.361, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.33, |
| "eval_accuracy": 0.7355371900826446, |
| "eval_loss": 0.8711686730384827, |
| "eval_runtime": 9.1801, |
| "eval_samples_per_second": 13.181, |
| "eval_steps_per_second": 1.743, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_accuracy": 0.8429752066115702, |
| "eval_loss": 0.41366249322891235, |
| "eval_runtime": 8.6865, |
| "eval_samples_per_second": 13.93, |
| "eval_steps_per_second": 1.842, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.58330899477005, |
| "learning_rate": 0.00017033333333333334, |
| "loss": 0.4531, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_accuracy": 0.9256198347107438, |
| "eval_loss": 0.24840475618839264, |
| "eval_runtime": 9.2797, |
| "eval_samples_per_second": 13.039, |
| "eval_steps_per_second": 1.724, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_accuracy": 0.9008264462809917, |
| "eval_loss": 0.30718564987182617, |
| "eval_runtime": 8.3573, |
| "eval_samples_per_second": 14.478, |
| "eval_steps_per_second": 1.914, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 3.304471254348755, |
| "learning_rate": 0.000167, |
| "loss": 0.223, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_accuracy": 0.9008264462809917, |
| "eval_loss": 0.3936420679092407, |
| "eval_runtime": 9.6744, |
| "eval_samples_per_second": 12.507, |
| "eval_steps_per_second": 1.654, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.75, |
| "eval_accuracy": 0.859504132231405, |
| "eval_loss": 0.47976797819137573, |
| "eval_runtime": 8.673, |
| "eval_samples_per_second": 13.951, |
| "eval_steps_per_second": 1.845, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 5.060550689697266, |
| "learning_rate": 0.00016366666666666667, |
| "loss": 0.3396, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_accuracy": 0.8760330578512396, |
| "eval_loss": 0.37310853600502014, |
| "eval_runtime": 9.0352, |
| "eval_samples_per_second": 13.392, |
| "eval_steps_per_second": 1.771, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_accuracy": 0.9173553719008265, |
| "eval_loss": 0.3138490915298462, |
| "eval_runtime": 8.1608, |
| "eval_samples_per_second": 14.827, |
| "eval_steps_per_second": 1.961, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.8099405765533447, |
| "learning_rate": 0.00016033333333333335, |
| "loss": 0.1516, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9008264462809917, |
| "eval_loss": 0.3139742314815521, |
| "eval_runtime": 9.3296, |
| "eval_samples_per_second": 12.97, |
| "eval_steps_per_second": 1.715, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.08, |
| "eval_accuracy": 0.8925619834710744, |
| "eval_loss": 0.39376479387283325, |
| "eval_runtime": 8.8987, |
| "eval_samples_per_second": 13.597, |
| "eval_steps_per_second": 1.798, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.17, |
| "grad_norm": 1.9554457664489746, |
| "learning_rate": 0.00015700000000000002, |
| "loss": 0.1709, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_accuracy": 0.9173553719008265, |
| "eval_loss": 0.3310838043689728, |
| "eval_runtime": 9.5915, |
| "eval_samples_per_second": 12.615, |
| "eval_steps_per_second": 1.668, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.25, |
| "eval_accuracy": 0.9090909090909091, |
| "eval_loss": 0.365205317735672, |
| "eval_runtime": 8.8296, |
| "eval_samples_per_second": 13.704, |
| "eval_steps_per_second": 1.812, |
| "step": 135 |
| }, |
| { |
| "epoch": 2.33, |
| "grad_norm": 2.2786786556243896, |
| "learning_rate": 0.00015366666666666667, |
| "loss": 0.1193, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.33, |
| "eval_accuracy": 0.9008264462809917, |
| "eval_loss": 0.40327194333076477, |
| "eval_runtime": 8.3012, |
| "eval_samples_per_second": 14.576, |
| "eval_steps_per_second": 1.927, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.42, |
| "eval_accuracy": 0.8925619834710744, |
| "eval_loss": 0.3220081925392151, |
| "eval_runtime": 9.1665, |
| "eval_samples_per_second": 13.2, |
| "eval_steps_per_second": 1.745, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 1.6346056461334229, |
| "learning_rate": 0.00015033333333333335, |
| "loss": 0.2134, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_accuracy": 0.859504132231405, |
| "eval_loss": 0.4836377799510956, |
| "eval_runtime": 8.5024, |
| "eval_samples_per_second": 14.231, |
| "eval_steps_per_second": 1.882, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_accuracy": 0.9090909090909091, |
| "eval_loss": 0.2969018518924713, |
| "eval_runtime": 8.2452, |
| "eval_samples_per_second": 14.675, |
| "eval_steps_per_second": 1.941, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.67, |
| "grad_norm": 1.028257966041565, |
| "learning_rate": 0.000147, |
| "loss": 0.1314, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.67, |
| "eval_accuracy": 0.9173553719008265, |
| "eval_loss": 0.30324843525886536, |
| "eval_runtime": 9.1615, |
| "eval_samples_per_second": 13.208, |
| "eval_steps_per_second": 1.746, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.75, |
| "eval_accuracy": 0.8842975206611571, |
| "eval_loss": 0.34657248854637146, |
| "eval_runtime": 9.0554, |
| "eval_samples_per_second": 13.362, |
| "eval_steps_per_second": 1.767, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.83, |
| "grad_norm": 1.7740827798843384, |
| "learning_rate": 0.00014366666666666667, |
| "loss": 0.2197, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.83, |
| "eval_accuracy": 0.8842975206611571, |
| "eval_loss": 0.27489545941352844, |
| "eval_runtime": 9.1051, |
| "eval_samples_per_second": 13.289, |
| "eval_steps_per_second": 1.757, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_accuracy": 0.9256198347107438, |
| "eval_loss": 0.20714518427848816, |
| "eval_runtime": 8.5633, |
| "eval_samples_per_second": 14.13, |
| "eval_steps_per_second": 1.868, |
| "step": 175 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 11.374886512756348, |
| "learning_rate": 0.00014033333333333335, |
| "loss": 0.1748, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9256198347107438, |
| "eval_loss": 0.24768640100955963, |
| "eval_runtime": 8.6985, |
| "eval_samples_per_second": 13.91, |
| "eval_steps_per_second": 1.839, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.08, |
| "eval_accuracy": 0.9173553719008265, |
| "eval_loss": 0.22793906927108765, |
| "eval_runtime": 8.7909, |
| "eval_samples_per_second": 13.764, |
| "eval_steps_per_second": 1.82, |
| "step": 185 |
| }, |
| { |
| "epoch": 3.17, |
| "grad_norm": 3.5179049968719482, |
| "learning_rate": 0.00013700000000000002, |
| "loss": 0.0949, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.17, |
| "eval_accuracy": 0.9090909090909091, |
| "eval_loss": 0.2858579456806183, |
| "eval_runtime": 8.5635, |
| "eval_samples_per_second": 14.13, |
| "eval_steps_per_second": 1.868, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.25, |
| "eval_accuracy": 0.9008264462809917, |
| "eval_loss": 0.27311527729034424, |
| "eval_runtime": 8.4148, |
| "eval_samples_per_second": 14.379, |
| "eval_steps_per_second": 1.901, |
| "step": 195 |
| }, |
| { |
| "epoch": 3.33, |
| "grad_norm": 0.2969065308570862, |
| "learning_rate": 0.00013366666666666667, |
| "loss": 0.1901, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.33, |
| "eval_accuracy": 0.9338842975206612, |
| "eval_loss": 0.2329086810350418, |
| "eval_runtime": 8.2873, |
| "eval_samples_per_second": 14.601, |
| "eval_steps_per_second": 1.931, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.42, |
| "eval_accuracy": 0.9008264462809917, |
| "eval_loss": 0.43126779794692993, |
| "eval_runtime": 8.1358, |
| "eval_samples_per_second": 14.872, |
| "eval_steps_per_second": 1.967, |
| "step": 205 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 1.591079831123352, |
| "learning_rate": 0.00013033333333333332, |
| "loss": 0.1897, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.5, |
| "eval_accuracy": 0.9008264462809917, |
| "eval_loss": 0.3919370472431183, |
| "eval_runtime": 8.8739, |
| "eval_samples_per_second": 13.635, |
| "eval_steps_per_second": 1.803, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.58, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.2083331048488617, |
| "eval_runtime": 8.8078, |
| "eval_samples_per_second": 13.738, |
| "eval_steps_per_second": 1.817, |
| "step": 215 |
| }, |
| { |
| "epoch": 3.67, |
| "grad_norm": 0.08221092075109482, |
| "learning_rate": 0.000127, |
| "loss": 0.1213, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.67, |
| "eval_accuracy": 0.9421487603305785, |
| "eval_loss": 0.2194199413061142, |
| "eval_runtime": 8.0138, |
| "eval_samples_per_second": 15.099, |
| "eval_steps_per_second": 1.997, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.75, |
| "eval_accuracy": 0.9421487603305785, |
| "eval_loss": 0.25043562054634094, |
| "eval_runtime": 8.958, |
| "eval_samples_per_second": 13.507, |
| "eval_steps_per_second": 1.786, |
| "step": 225 |
| }, |
| { |
| "epoch": 3.83, |
| "grad_norm": 1.115979552268982, |
| "learning_rate": 0.00012366666666666667, |
| "loss": 0.1225, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.83, |
| "eval_accuracy": 0.9338842975206612, |
| "eval_loss": 0.27267003059387207, |
| "eval_runtime": 9.1697, |
| "eval_samples_per_second": 13.196, |
| "eval_steps_per_second": 1.745, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.92, |
| "eval_accuracy": 0.859504132231405, |
| "eval_loss": 0.5614737272262573, |
| "eval_runtime": 8.8895, |
| "eval_samples_per_second": 13.612, |
| "eval_steps_per_second": 1.8, |
| "step": 235 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 3.0505728721618652, |
| "learning_rate": 0.00012033333333333335, |
| "loss": 0.148, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9090909090909091, |
| "eval_loss": 0.3485010266304016, |
| "eval_runtime": 8.5815, |
| "eval_samples_per_second": 14.1, |
| "eval_steps_per_second": 1.864, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.08, |
| "eval_accuracy": 0.9008264462809917, |
| "eval_loss": 0.23242339491844177, |
| "eval_runtime": 8.9078, |
| "eval_samples_per_second": 13.584, |
| "eval_steps_per_second": 1.796, |
| "step": 245 |
| }, |
| { |
| "epoch": 4.17, |
| "grad_norm": 1.005444049835205, |
| "learning_rate": 0.000117, |
| "loss": 0.1498, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.17, |
| "eval_accuracy": 0.9256198347107438, |
| "eval_loss": 0.26890477538108826, |
| "eval_runtime": 8.5801, |
| "eval_samples_per_second": 14.102, |
| "eval_steps_per_second": 1.865, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.25, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.19466207921504974, |
| "eval_runtime": 8.5893, |
| "eval_samples_per_second": 14.087, |
| "eval_steps_per_second": 1.863, |
| "step": 255 |
| }, |
| { |
| "epoch": 4.33, |
| "grad_norm": 0.06609375029802322, |
| "learning_rate": 0.00011366666666666667, |
| "loss": 0.0504, |
| "step": 260 |
| }, |
| { |
| "epoch": 4.33, |
| "eval_accuracy": 0.9338842975206612, |
| "eval_loss": 0.22750769555568695, |
| "eval_runtime": 8.9359, |
| "eval_samples_per_second": 13.541, |
| "eval_steps_per_second": 1.791, |
| "step": 260 |
| }, |
| { |
| "epoch": 4.42, |
| "eval_accuracy": 0.9338842975206612, |
| "eval_loss": 0.27902674674987793, |
| "eval_runtime": 8.1737, |
| "eval_samples_per_second": 14.804, |
| "eval_steps_per_second": 1.958, |
| "step": 265 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.6615959405899048, |
| "learning_rate": 0.00011033333333333334, |
| "loss": 0.0465, |
| "step": 270 |
| }, |
| { |
| "epoch": 4.5, |
| "eval_accuracy": 0.9256198347107438, |
| "eval_loss": 0.2726598381996155, |
| "eval_runtime": 8.8273, |
| "eval_samples_per_second": 13.708, |
| "eval_steps_per_second": 1.813, |
| "step": 270 |
| }, |
| { |
| "epoch": 4.58, |
| "eval_accuracy": 0.9256198347107438, |
| "eval_loss": 0.29490870237350464, |
| "eval_runtime": 9.315, |
| "eval_samples_per_second": 12.99, |
| "eval_steps_per_second": 1.718, |
| "step": 275 |
| }, |
| { |
| "epoch": 4.67, |
| "grad_norm": 0.5490814447402954, |
| "learning_rate": 0.00010700000000000001, |
| "loss": 0.0744, |
| "step": 280 |
| }, |
| { |
| "epoch": 4.67, |
| "eval_accuracy": 0.9421487603305785, |
| "eval_loss": 0.27046242356300354, |
| "eval_runtime": 8.8539, |
| "eval_samples_per_second": 13.666, |
| "eval_steps_per_second": 1.807, |
| "step": 280 |
| }, |
| { |
| "epoch": 4.75, |
| "eval_accuracy": 0.9338842975206612, |
| "eval_loss": 0.27866455912590027, |
| "eval_runtime": 8.2611, |
| "eval_samples_per_second": 14.647, |
| "eval_steps_per_second": 1.937, |
| "step": 285 |
| }, |
| { |
| "epoch": 4.83, |
| "grad_norm": 2.34318470954895, |
| "learning_rate": 0.00010366666666666666, |
| "loss": 0.1504, |
| "step": 290 |
| }, |
| { |
| "epoch": 4.83, |
| "eval_accuracy": 0.9421487603305785, |
| "eval_loss": 0.23609085381031036, |
| "eval_runtime": 8.7424, |
| "eval_samples_per_second": 13.841, |
| "eval_steps_per_second": 1.83, |
| "step": 290 |
| }, |
| { |
| "epoch": 4.92, |
| "eval_accuracy": 0.9256198347107438, |
| "eval_loss": 0.2885703444480896, |
| "eval_runtime": 9.0184, |
| "eval_samples_per_second": 13.417, |
| "eval_steps_per_second": 1.774, |
| "step": 295 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.1489334106445312, |
| "learning_rate": 0.00010033333333333335, |
| "loss": 0.1192, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9256198347107438, |
| "eval_loss": 0.26016703248023987, |
| "eval_runtime": 8.8229, |
| "eval_samples_per_second": 13.714, |
| "eval_steps_per_second": 1.813, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.08, |
| "eval_accuracy": 0.9256198347107438, |
| "eval_loss": 0.21833574771881104, |
| "eval_runtime": 8.7885, |
| "eval_samples_per_second": 13.768, |
| "eval_steps_per_second": 1.821, |
| "step": 305 |
| }, |
| { |
| "epoch": 5.17, |
| "grad_norm": 0.05103103816509247, |
| "learning_rate": 9.7e-05, |
| "loss": 0.0749, |
| "step": 310 |
| }, |
| { |
| "epoch": 5.17, |
| "eval_accuracy": 0.9173553719008265, |
| "eval_loss": 0.2774341404438019, |
| "eval_runtime": 9.1103, |
| "eval_samples_per_second": 13.282, |
| "eval_steps_per_second": 1.756, |
| "step": 310 |
| }, |
| { |
| "epoch": 5.25, |
| "eval_accuracy": 0.9090909090909091, |
| "eval_loss": 0.24913911521434784, |
| "eval_runtime": 8.9144, |
| "eval_samples_per_second": 13.574, |
| "eval_steps_per_second": 1.795, |
| "step": 315 |
| }, |
| { |
| "epoch": 5.33, |
| "grad_norm": 0.2730455696582794, |
| "learning_rate": 9.366666666666668e-05, |
| "loss": 0.0719, |
| "step": 320 |
| }, |
| { |
| "epoch": 5.33, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.19020099937915802, |
| "eval_runtime": 8.6412, |
| "eval_samples_per_second": 14.003, |
| "eval_steps_per_second": 1.852, |
| "step": 320 |
| }, |
| { |
| "epoch": 5.42, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.19153207540512085, |
| "eval_runtime": 8.3669, |
| "eval_samples_per_second": 14.462, |
| "eval_steps_per_second": 1.912, |
| "step": 325 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 0.15992069244384766, |
| "learning_rate": 9.033333333333334e-05, |
| "loss": 0.061, |
| "step": 330 |
| }, |
| { |
| "epoch": 5.5, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.1713331937789917, |
| "eval_runtime": 9.0048, |
| "eval_samples_per_second": 13.437, |
| "eval_steps_per_second": 1.777, |
| "step": 330 |
| }, |
| { |
| "epoch": 5.58, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.21169038116931915, |
| "eval_runtime": 9.7391, |
| "eval_samples_per_second": 12.424, |
| "eval_steps_per_second": 1.643, |
| "step": 335 |
| }, |
| { |
| "epoch": 5.67, |
| "grad_norm": 0.049180515110492706, |
| "learning_rate": 8.7e-05, |
| "loss": 0.0381, |
| "step": 340 |
| }, |
| { |
| "epoch": 5.67, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.21453851461410522, |
| "eval_runtime": 8.8439, |
| "eval_samples_per_second": 13.682, |
| "eval_steps_per_second": 1.809, |
| "step": 340 |
| }, |
| { |
| "epoch": 5.75, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.21080859005451202, |
| "eval_runtime": 8.0349, |
| "eval_samples_per_second": 15.059, |
| "eval_steps_per_second": 1.991, |
| "step": 345 |
| }, |
| { |
| "epoch": 5.83, |
| "grad_norm": 0.06704477965831757, |
| "learning_rate": 8.366666666666668e-05, |
| "loss": 0.0494, |
| "step": 350 |
| }, |
| { |
| "epoch": 5.83, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.20059601962566376, |
| "eval_runtime": 8.938, |
| "eval_samples_per_second": 13.538, |
| "eval_steps_per_second": 1.79, |
| "step": 350 |
| }, |
| { |
| "epoch": 5.92, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.1934904307126999, |
| "eval_runtime": 8.6422, |
| "eval_samples_per_second": 14.001, |
| "eval_steps_per_second": 1.851, |
| "step": 355 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.9374514818191528, |
| "learning_rate": 8.033333333333334e-05, |
| "loss": 0.0341, |
| "step": 360 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.18909606337547302, |
| "eval_runtime": 7.8226, |
| "eval_samples_per_second": 15.468, |
| "eval_steps_per_second": 2.045, |
| "step": 360 |
| }, |
| { |
| "epoch": 6.08, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.16753415763378143, |
| "eval_runtime": 9.5822, |
| "eval_samples_per_second": 12.628, |
| "eval_steps_per_second": 1.67, |
| "step": 365 |
| }, |
| { |
| "epoch": 6.17, |
| "grad_norm": 0.034694839268922806, |
| "learning_rate": 7.7e-05, |
| "loss": 0.0186, |
| "step": 370 |
| }, |
| { |
| "epoch": 6.17, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.1582479178905487, |
| "eval_runtime": 8.884, |
| "eval_samples_per_second": 13.62, |
| "eval_steps_per_second": 1.801, |
| "step": 370 |
| }, |
| { |
| "epoch": 6.25, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.14699509739875793, |
| "eval_runtime": 8.4323, |
| "eval_samples_per_second": 14.35, |
| "eval_steps_per_second": 1.897, |
| "step": 375 |
| }, |
| { |
| "epoch": 6.33, |
| "grad_norm": 0.03971986472606659, |
| "learning_rate": 7.366666666666668e-05, |
| "loss": 0.0444, |
| "step": 380 |
| }, |
| { |
| "epoch": 6.33, |
| "eval_accuracy": 0.9752066115702479, |
| "eval_loss": 0.14012493193149567, |
| "eval_runtime": 8.8224, |
| "eval_samples_per_second": 13.715, |
| "eval_steps_per_second": 1.814, |
| "step": 380 |
| }, |
| { |
| "epoch": 6.42, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.1611337959766388, |
| "eval_runtime": 8.8487, |
| "eval_samples_per_second": 13.674, |
| "eval_steps_per_second": 1.808, |
| "step": 385 |
| }, |
| { |
| "epoch": 6.5, |
| "grad_norm": 0.25355586409568787, |
| "learning_rate": 7.033333333333334e-05, |
| "loss": 0.031, |
| "step": 390 |
| }, |
| { |
| "epoch": 6.5, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.1922798752784729, |
| "eval_runtime": 9.1161, |
| "eval_samples_per_second": 13.273, |
| "eval_steps_per_second": 1.755, |
| "step": 390 |
| }, |
| { |
| "epoch": 6.58, |
| "eval_accuracy": 0.9338842975206612, |
| "eval_loss": 0.2467259168624878, |
| "eval_runtime": 9.4027, |
| "eval_samples_per_second": 12.869, |
| "eval_steps_per_second": 1.702, |
| "step": 395 |
| }, |
| { |
| "epoch": 6.67, |
| "grad_norm": 2.2045631408691406, |
| "learning_rate": 6.7e-05, |
| "loss": 0.0415, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.67, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.19972926378250122, |
| "eval_runtime": 8.6069, |
| "eval_samples_per_second": 14.058, |
| "eval_steps_per_second": 1.859, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.75, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.17411363124847412, |
| "eval_runtime": 8.2589, |
| "eval_samples_per_second": 14.651, |
| "eval_steps_per_second": 1.937, |
| "step": 405 |
| }, |
| { |
| "epoch": 6.83, |
| "grad_norm": 0.03469853475689888, |
| "learning_rate": 6.366666666666668e-05, |
| "loss": 0.0396, |
| "step": 410 |
| }, |
| { |
| "epoch": 6.83, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.18815675377845764, |
| "eval_runtime": 8.9038, |
| "eval_samples_per_second": 13.59, |
| "eval_steps_per_second": 1.797, |
| "step": 410 |
| }, |
| { |
| "epoch": 6.92, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.22658267617225647, |
| "eval_runtime": 8.6049, |
| "eval_samples_per_second": 14.062, |
| "eval_steps_per_second": 1.859, |
| "step": 415 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 3.518434762954712, |
| "learning_rate": 6.033333333333334e-05, |
| "loss": 0.1419, |
| "step": 420 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.2240477055311203, |
| "eval_runtime": 8.4794, |
| "eval_samples_per_second": 14.27, |
| "eval_steps_per_second": 1.887, |
| "step": 420 |
| }, |
| { |
| "epoch": 7.08, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.18324477970600128, |
| "eval_runtime": 9.3693, |
| "eval_samples_per_second": 12.914, |
| "eval_steps_per_second": 1.708, |
| "step": 425 |
| }, |
| { |
| "epoch": 7.17, |
| "grad_norm": 0.044734060764312744, |
| "learning_rate": 5.6999999999999996e-05, |
| "loss": 0.0401, |
| "step": 430 |
| }, |
| { |
| "epoch": 7.17, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.17666588723659515, |
| "eval_runtime": 9.1672, |
| "eval_samples_per_second": 13.199, |
| "eval_steps_per_second": 1.745, |
| "step": 430 |
| }, |
| { |
| "epoch": 7.25, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.1653054654598236, |
| "eval_runtime": 8.8493, |
| "eval_samples_per_second": 13.673, |
| "eval_steps_per_second": 1.808, |
| "step": 435 |
| }, |
| { |
| "epoch": 7.33, |
| "grad_norm": 0.11878877878189087, |
| "learning_rate": 5.3666666666666666e-05, |
| "loss": 0.0435, |
| "step": 440 |
| }, |
| { |
| "epoch": 7.33, |
| "eval_accuracy": 0.9421487603305785, |
| "eval_loss": 0.18726998567581177, |
| "eval_runtime": 8.2068, |
| "eval_samples_per_second": 14.744, |
| "eval_steps_per_second": 1.95, |
| "step": 440 |
| }, |
| { |
| "epoch": 7.42, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.2050751894712448, |
| "eval_runtime": 8.9571, |
| "eval_samples_per_second": 13.509, |
| "eval_steps_per_second": 1.786, |
| "step": 445 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 2.2258145809173584, |
| "learning_rate": 5.0333333333333335e-05, |
| "loss": 0.0368, |
| "step": 450 |
| }, |
| { |
| "epoch": 7.5, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.2074841856956482, |
| "eval_runtime": 8.6188, |
| "eval_samples_per_second": 14.039, |
| "eval_steps_per_second": 1.856, |
| "step": 450 |
| }, |
| { |
| "epoch": 7.58, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.19320955872535706, |
| "eval_runtime": 9.3527, |
| "eval_samples_per_second": 12.937, |
| "eval_steps_per_second": 1.711, |
| "step": 455 |
| }, |
| { |
| "epoch": 7.67, |
| "grad_norm": 0.17616397142410278, |
| "learning_rate": 4.7e-05, |
| "loss": 0.0144, |
| "step": 460 |
| }, |
| { |
| "epoch": 7.67, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.1878087967634201, |
| "eval_runtime": 8.2188, |
| "eval_samples_per_second": 14.722, |
| "eval_steps_per_second": 1.947, |
| "step": 460 |
| }, |
| { |
| "epoch": 7.75, |
| "eval_accuracy": 0.9504132231404959, |
| "eval_loss": 0.18545229732990265, |
| "eval_runtime": 8.9738, |
| "eval_samples_per_second": 13.484, |
| "eval_steps_per_second": 1.783, |
| "step": 465 |
| }, |
| { |
| "epoch": 7.83, |
| "grad_norm": 0.18021804094314575, |
| "learning_rate": 4.3666666666666666e-05, |
| "loss": 0.0245, |
| "step": 470 |
| }, |
| { |
| "epoch": 7.83, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.18264397978782654, |
| "eval_runtime": 8.9166, |
| "eval_samples_per_second": 13.57, |
| "eval_steps_per_second": 1.794, |
| "step": 470 |
| }, |
| { |
| "epoch": 7.92, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.16517475247383118, |
| "eval_runtime": 8.6422, |
| "eval_samples_per_second": 14.001, |
| "eval_steps_per_second": 1.851, |
| "step": 475 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.1717393398284912, |
| "learning_rate": 4.0333333333333336e-05, |
| "loss": 0.049, |
| "step": 480 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.153965562582016, |
| "eval_runtime": 9.3675, |
| "eval_samples_per_second": 12.917, |
| "eval_steps_per_second": 1.708, |
| "step": 480 |
| }, |
| { |
| "epoch": 8.08, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.15659011900424957, |
| "eval_runtime": 9.3381, |
| "eval_samples_per_second": 12.958, |
| "eval_steps_per_second": 1.713, |
| "step": 485 |
| }, |
| { |
| "epoch": 8.17, |
| "grad_norm": 0.03389211744070053, |
| "learning_rate": 3.7e-05, |
| "loss": 0.0205, |
| "step": 490 |
| }, |
| { |
| "epoch": 8.17, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.1390300989151001, |
| "eval_runtime": 9.2535, |
| "eval_samples_per_second": 13.076, |
| "eval_steps_per_second": 1.729, |
| "step": 490 |
| }, |
| { |
| "epoch": 8.25, |
| "eval_accuracy": 0.9752066115702479, |
| "eval_loss": 0.12307874858379364, |
| "eval_runtime": 8.374, |
| "eval_samples_per_second": 14.449, |
| "eval_steps_per_second": 1.911, |
| "step": 495 |
| }, |
| { |
| "epoch": 8.33, |
| "grad_norm": 0.07099808752536774, |
| "learning_rate": 3.366666666666667e-05, |
| "loss": 0.0582, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.33, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.1489211469888687, |
| "eval_runtime": 9.0639, |
| "eval_samples_per_second": 13.35, |
| "eval_steps_per_second": 1.765, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.42, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.16356107592582703, |
| "eval_runtime": 8.5786, |
| "eval_samples_per_second": 14.105, |
| "eval_steps_per_second": 1.865, |
| "step": 505 |
| }, |
| { |
| "epoch": 8.5, |
| "grad_norm": 0.027900293469429016, |
| "learning_rate": 3.0333333333333337e-05, |
| "loss": 0.0233, |
| "step": 510 |
| }, |
| { |
| "epoch": 8.5, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.17380845546722412, |
| "eval_runtime": 8.9993, |
| "eval_samples_per_second": 13.445, |
| "eval_steps_per_second": 1.778, |
| "step": 510 |
| }, |
| { |
| "epoch": 8.58, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.18304291367530823, |
| "eval_runtime": 9.1114, |
| "eval_samples_per_second": 13.28, |
| "eval_steps_per_second": 1.756, |
| "step": 515 |
| }, |
| { |
| "epoch": 8.67, |
| "grad_norm": 0.030846718698740005, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.026, |
| "step": 520 |
| }, |
| { |
| "epoch": 8.67, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.1868060678243637, |
| "eval_runtime": 8.5179, |
| "eval_samples_per_second": 14.205, |
| "eval_steps_per_second": 1.878, |
| "step": 520 |
| }, |
| { |
| "epoch": 8.75, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.18475578725337982, |
| "eval_runtime": 8.3044, |
| "eval_samples_per_second": 14.571, |
| "eval_steps_per_second": 1.927, |
| "step": 525 |
| }, |
| { |
| "epoch": 8.83, |
| "grad_norm": 0.0629822164773941, |
| "learning_rate": 2.3666666666666668e-05, |
| "loss": 0.0193, |
| "step": 530 |
| }, |
| { |
| "epoch": 8.83, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.1804869920015335, |
| "eval_runtime": 8.7018, |
| "eval_samples_per_second": 13.905, |
| "eval_steps_per_second": 1.839, |
| "step": 530 |
| }, |
| { |
| "epoch": 8.92, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.17735344171524048, |
| "eval_runtime": 8.5823, |
| "eval_samples_per_second": 14.099, |
| "eval_steps_per_second": 1.864, |
| "step": 535 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.9116852283477783, |
| "learning_rate": 2.0333333333333334e-05, |
| "loss": 0.0174, |
| "step": 540 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.17092934250831604, |
| "eval_runtime": 8.3532, |
| "eval_samples_per_second": 14.486, |
| "eval_steps_per_second": 1.915, |
| "step": 540 |
| }, |
| { |
| "epoch": 9.08, |
| "eval_accuracy": 0.9586776859504132, |
| "eval_loss": 0.16256949305534363, |
| "eval_runtime": 9.2093, |
| "eval_samples_per_second": 13.139, |
| "eval_steps_per_second": 1.737, |
| "step": 545 |
| }, |
| { |
| "epoch": 9.17, |
| "grad_norm": 0.6017179489135742, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 0.036, |
| "step": 550 |
| }, |
| { |
| "epoch": 9.17, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.14820979535579681, |
| "eval_runtime": 8.8207, |
| "eval_samples_per_second": 13.718, |
| "eval_steps_per_second": 1.814, |
| "step": 550 |
| }, |
| { |
| "epoch": 9.25, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.1521998941898346, |
| "eval_runtime": 9.0595, |
| "eval_samples_per_second": 13.356, |
| "eval_steps_per_second": 1.766, |
| "step": 555 |
| }, |
| { |
| "epoch": 9.33, |
| "grad_norm": 0.05736929923295975, |
| "learning_rate": 1.3666666666666666e-05, |
| "loss": 0.0248, |
| "step": 560 |
| }, |
| { |
| "epoch": 9.33, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.15244396030902863, |
| "eval_runtime": 8.3412, |
| "eval_samples_per_second": 14.506, |
| "eval_steps_per_second": 1.918, |
| "step": 560 |
| }, |
| { |
| "epoch": 9.42, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.15296421945095062, |
| "eval_runtime": 8.8174, |
| "eval_samples_per_second": 13.723, |
| "eval_steps_per_second": 1.815, |
| "step": 565 |
| }, |
| { |
| "epoch": 9.5, |
| "grad_norm": 0.05882992222905159, |
| "learning_rate": 1.0333333333333333e-05, |
| "loss": 0.0097, |
| "step": 570 |
| }, |
| { |
| "epoch": 9.5, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.15370462834835052, |
| "eval_runtime": 8.7738, |
| "eval_samples_per_second": 13.791, |
| "eval_steps_per_second": 1.824, |
| "step": 570 |
| }, |
| { |
| "epoch": 9.58, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.15461915731430054, |
| "eval_runtime": 9.5826, |
| "eval_samples_per_second": 12.627, |
| "eval_steps_per_second": 1.67, |
| "step": 575 |
| }, |
| { |
| "epoch": 9.67, |
| "grad_norm": 0.8011133670806885, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 0.0154, |
| "step": 580 |
| }, |
| { |
| "epoch": 9.67, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.1550207883119583, |
| "eval_runtime": 8.6934, |
| "eval_samples_per_second": 13.919, |
| "eval_steps_per_second": 1.84, |
| "step": 580 |
| }, |
| { |
| "epoch": 9.75, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.15535828471183777, |
| "eval_runtime": 8.5014, |
| "eval_samples_per_second": 14.233, |
| "eval_steps_per_second": 1.882, |
| "step": 585 |
| }, |
| { |
| "epoch": 9.83, |
| "grad_norm": 0.027278412133455276, |
| "learning_rate": 3.666666666666667e-06, |
| "loss": 0.0208, |
| "step": 590 |
| }, |
| { |
| "epoch": 9.83, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.15486347675323486, |
| "eval_runtime": 8.7866, |
| "eval_samples_per_second": 13.771, |
| "eval_steps_per_second": 1.821, |
| "step": 590 |
| }, |
| { |
| "epoch": 9.92, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.15409855544567108, |
| "eval_runtime": 9.0006, |
| "eval_samples_per_second": 13.444, |
| "eval_steps_per_second": 1.778, |
| "step": 595 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.026343151926994324, |
| "learning_rate": 3.3333333333333335e-07, |
| "loss": 0.0167, |
| "step": 600 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9669421487603306, |
| "eval_loss": 0.15418902039527893, |
| "eval_runtime": 8.8745, |
| "eval_samples_per_second": 13.635, |
| "eval_steps_per_second": 1.803, |
| "step": 600 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 600, |
| "total_flos": 7.39286832673751e+17, |
| "train_loss": 0.12961051397025586, |
| "train_runtime": 2644.5796, |
| "train_samples_per_second": 3.607, |
| "train_steps_per_second": 0.227 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 5, |
| "total_flos": 7.39286832673751e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|