| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.753086419753085, | |
| "eval_steps": 100, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.24691358024691357, | |
| "grad_norm": 15.961835861206055, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.6418, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.49382716049382713, | |
| "grad_norm": 8.321958541870117, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.6123, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 10.797492980957031, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5325, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.9876543209876543, | |
| "grad_norm": 8.990424156188965, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.3937, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.2345679012345678, | |
| "grad_norm": 5.069626331329346, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2157, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 2.54313063621521, | |
| "learning_rate": 6e-06, | |
| "loss": 0.1184, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.7283950617283952, | |
| "grad_norm": 1.0293046236038208, | |
| "learning_rate": 7e-06, | |
| "loss": 0.0469, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.9753086419753085, | |
| "grad_norm": 0.32547664642333984, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.0176, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 0.20642875134944916, | |
| "learning_rate": 9e-06, | |
| "loss": 0.0088, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.4691358024691357, | |
| "grad_norm": 0.16824281215667725, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0056, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.4691358024691357, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_GD622:Null": 1.0, | |
| "eval_accuracy_label_GD622:YES": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.004177506547421217, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.7278, | |
| "eval_samples_per_second": 133.281, | |
| "eval_steps_per_second": 9.618, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.7160493827160495, | |
| "grad_norm": 0.10963490605354309, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 0.0038, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "grad_norm": 0.07338671386241913, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.0029, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.2098765432098766, | |
| "grad_norm": 0.0706721693277359, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.0024, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.45679012345679, | |
| "grad_norm": 0.06313496083021164, | |
| "learning_rate": 1.4e-05, | |
| "loss": 0.002, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.7037037037037037, | |
| "grad_norm": 0.063168965280056, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.0018, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.950617283950617, | |
| "grad_norm": 0.05419662222266197, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.0016, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 4.197530864197531, | |
| "grad_norm": 0.047441959381103516, | |
| "learning_rate": 1.7e-05, | |
| "loss": 0.0014, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.444444444444445, | |
| "grad_norm": 0.04058253392577171, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.0012, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.6913580246913575, | |
| "grad_norm": 0.029308538883924484, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.0011, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.938271604938271, | |
| "grad_norm": 0.027978356927633286, | |
| "learning_rate": 2e-05, | |
| "loss": 0.001, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.938271604938271, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_GD622:Null": 1.0, | |
| "eval_accuracy_label_GD622:YES": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.0008954937802627683, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.521, | |
| "eval_samples_per_second": 186.189, | |
| "eval_steps_per_second": 13.436, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 5.185185185185185, | |
| "grad_norm": 0.023471660912036896, | |
| "learning_rate": 1.9666666666666666e-05, | |
| "loss": 0.0009, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 5.432098765432099, | |
| "grad_norm": 0.021547624841332436, | |
| "learning_rate": 1.9333333333333333e-05, | |
| "loss": 0.0008, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 5.679012345679013, | |
| "grad_norm": 0.02687031961977482, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.0007, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 5.925925925925926, | |
| "grad_norm": 0.021016767248511314, | |
| "learning_rate": 1.866666666666667e-05, | |
| "loss": 0.0006, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 6.172839506172839, | |
| "grad_norm": 0.017553668469190598, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 0.0006, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 6.419753086419753, | |
| "grad_norm": 0.016819961369037628, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.0006, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 0.018349776044487953, | |
| "learning_rate": 1.7666666666666668e-05, | |
| "loss": 0.0005, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 6.91358024691358, | |
| "grad_norm": 0.01844148337841034, | |
| "learning_rate": 1.7333333333333336e-05, | |
| "loss": 0.0005, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 7.160493827160494, | |
| "grad_norm": 0.016825733706355095, | |
| "learning_rate": 1.7e-05, | |
| "loss": 0.0005, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 7.407407407407407, | |
| "grad_norm": 0.013994095847010612, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0005, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 7.407407407407407, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_GD622:Null": 1.0, | |
| "eval_accuracy_label_GD622:YES": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.00043683411786332726, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.5186, | |
| "eval_samples_per_second": 187.027, | |
| "eval_steps_per_second": 13.497, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 7.654320987654321, | |
| "grad_norm": 0.015141790732741356, | |
| "learning_rate": 1.6333333333333335e-05, | |
| "loss": 0.0004, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 7.901234567901234, | |
| "grad_norm": 0.013697362504899502, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.0004, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 8.148148148148149, | |
| "grad_norm": 0.01174458209425211, | |
| "learning_rate": 1.5666666666666667e-05, | |
| "loss": 0.0004, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 8.395061728395062, | |
| "grad_norm": 0.013013974763453007, | |
| "learning_rate": 1.5333333333333334e-05, | |
| "loss": 0.0004, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 8.641975308641975, | |
| "grad_norm": 0.011320522986352444, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.0004, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 8.88888888888889, | |
| "grad_norm": 0.010261823423206806, | |
| "learning_rate": 1.4666666666666666e-05, | |
| "loss": 0.0003, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 9.135802469135802, | |
| "grad_norm": 0.009858865290880203, | |
| "learning_rate": 1.4333333333333334e-05, | |
| "loss": 0.0003, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 9.382716049382717, | |
| "grad_norm": 0.009954158216714859, | |
| "learning_rate": 1.4e-05, | |
| "loss": 0.0003, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 9.62962962962963, | |
| "grad_norm": 0.010688001289963722, | |
| "learning_rate": 1.3666666666666667e-05, | |
| "loss": 0.0003, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 9.876543209876543, | |
| "grad_norm": 0.009526471607387066, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.0003, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 9.876543209876543, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_GD622:Null": 1.0, | |
| "eval_accuracy_label_GD622:YES": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.0002707206876948476, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.5189, | |
| "eval_samples_per_second": 186.945, | |
| "eval_steps_per_second": 13.491, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 10.123456790123457, | |
| "grad_norm": 0.00999557226896286, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.0003, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 10.37037037037037, | |
| "grad_norm": 0.009755443781614304, | |
| "learning_rate": 1.2666666666666667e-05, | |
| "loss": 0.0003, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 10.617283950617283, | |
| "grad_norm": 0.008844558149576187, | |
| "learning_rate": 1.2333333333333334e-05, | |
| "loss": 0.0003, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 10.864197530864198, | |
| "grad_norm": 0.00740455137565732, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.0003, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 11.11111111111111, | |
| "grad_norm": 0.007182607427239418, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 0.0002, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 11.358024691358025, | |
| "grad_norm": 0.007493776269257069, | |
| "learning_rate": 1.1333333333333334e-05, | |
| "loss": 0.0003, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 11.604938271604938, | |
| "grad_norm": 0.008535212837159634, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 0.0002, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 11.851851851851851, | |
| "grad_norm": 0.007039290387183428, | |
| "learning_rate": 1.0666666666666667e-05, | |
| "loss": 0.0002, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 12.098765432098766, | |
| "grad_norm": 0.007746797055006027, | |
| "learning_rate": 1.0333333333333335e-05, | |
| "loss": 0.0002, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 12.345679012345679, | |
| "grad_norm": 0.008360541425645351, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0002, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 12.345679012345679, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_GD622:Null": 1.0, | |
| "eval_accuracy_label_GD622:YES": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.00022185646230354905, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.5185, | |
| "eval_samples_per_second": 187.093, | |
| "eval_steps_per_second": 13.502, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 12.592592592592592, | |
| "grad_norm": 0.006791314110159874, | |
| "learning_rate": 9.666666666666667e-06, | |
| "loss": 0.0002, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 12.839506172839506, | |
| "grad_norm": 0.007602753583341837, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.0002, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 13.08641975308642, | |
| "grad_norm": 0.0071947514079511166, | |
| "learning_rate": 9e-06, | |
| "loss": 0.0002, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 13.333333333333334, | |
| "grad_norm": 0.007956212386488914, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 0.0002, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 13.580246913580247, | |
| "grad_norm": 0.007944190874695778, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0002, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 13.82716049382716, | |
| "grad_norm": 0.007252030540257692, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.0002, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 14.074074074074074, | |
| "grad_norm": 0.007420521695166826, | |
| "learning_rate": 7.666666666666667e-06, | |
| "loss": 0.0002, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 14.320987654320987, | |
| "grad_norm": 0.007615529000759125, | |
| "learning_rate": 7.333333333333333e-06, | |
| "loss": 0.0002, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 14.567901234567902, | |
| "grad_norm": 0.007768448442220688, | |
| "learning_rate": 7e-06, | |
| "loss": 0.0002, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 14.814814814814815, | |
| "grad_norm": 0.005428287200629711, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.0002, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 14.814814814814815, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_GD622:Null": 1.0, | |
| "eval_accuracy_label_GD622:YES": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.00019342350424267352, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.5188, | |
| "eval_samples_per_second": 186.984, | |
| "eval_steps_per_second": 13.494, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 15.061728395061728, | |
| "grad_norm": 0.005406714044511318, | |
| "learning_rate": 6.333333333333333e-06, | |
| "loss": 0.0002, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 15.308641975308642, | |
| "grad_norm": 0.007229079958051443, | |
| "learning_rate": 6e-06, | |
| "loss": 0.0002, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 15.555555555555555, | |
| "grad_norm": 0.006031244061887264, | |
| "learning_rate": 5.666666666666667e-06, | |
| "loss": 0.0002, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 15.802469135802468, | |
| "grad_norm": 0.0075646815821528435, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 0.0002, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 16.049382716049383, | |
| "grad_norm": 0.006907324306666851, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0002, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 16.296296296296298, | |
| "grad_norm": 0.005848431494086981, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 0.0002, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 16.54320987654321, | |
| "grad_norm": 0.007128111552447081, | |
| "learning_rate": 4.333333333333334e-06, | |
| "loss": 0.0002, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 16.790123456790123, | |
| "grad_norm": 0.00657699815928936, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0002, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 17.037037037037038, | |
| "grad_norm": 0.005719279404729605, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": 0.0002, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 17.28395061728395, | |
| "grad_norm": 0.0061570280231535435, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.0002, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 17.28395061728395, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_GD622:Null": 1.0, | |
| "eval_accuracy_label_GD622:YES": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.0001789480447769165, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.5201, | |
| "eval_samples_per_second": 186.493, | |
| "eval_steps_per_second": 13.458, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 17.530864197530864, | |
| "grad_norm": 0.006241227500140667, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0002, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 17.77777777777778, | |
| "grad_norm": 0.006561820395290852, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.0002, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 18.02469135802469, | |
| "grad_norm": 0.00643093092367053, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "loss": 0.0002, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 18.271604938271604, | |
| "grad_norm": 0.005693737417459488, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.0002, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 18.51851851851852, | |
| "grad_norm": 0.0065653519704937935, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0002, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 18.765432098765434, | |
| "grad_norm": 0.004837734624743462, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 0.0002, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 19.012345679012345, | |
| "grad_norm": 0.005498081911355257, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.0002, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 19.25925925925926, | |
| "grad_norm": 0.0063169412314891815, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 0.0002, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 19.506172839506174, | |
| "grad_norm": 0.00681178318336606, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 0.0002, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 19.753086419753085, | |
| "grad_norm": 0.006289825774729252, | |
| "learning_rate": 0.0, | |
| "loss": 0.0002, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 19.753086419753085, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_GD622:Null": 1.0, | |
| "eval_accuracy_label_GD622:YES": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.0001743907341733575, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.5213, | |
| "eval_samples_per_second": 186.078, | |
| "eval_steps_per_second": 13.428, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 19.753086419753085, | |
| "step": 800, | |
| "total_flos": 93696895492200.0, | |
| "train_loss": 0.032879929275804895, | |
| "train_runtime": 407.7795, | |
| "train_samples_per_second": 63.073, | |
| "train_steps_per_second": 1.962 | |
| }, | |
| { | |
| "epoch": 19.753086419753085, | |
| "eval_accuracy": 1.0, | |
| "eval_accuracy_label_GD622:Null": 1.0, | |
| "eval_accuracy_label_GD622:YES": 1.0, | |
| "eval_f1": 1.0, | |
| "eval_loss": 0.0001743907341733575, | |
| "eval_precision": 1.0, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 0.5104, | |
| "eval_samples_per_second": 190.041, | |
| "eval_steps_per_second": 13.714, | |
| "step": 800 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 93696895492200.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |