| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 1080, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 6.0880818367004395, | |
| "learning_rate": 3.6e-05, | |
| "loss": 3.2849, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.010123239436619719, | |
| "eval_bleu": 0.10518697359828132, | |
| "eval_f1": 0.010153581748448041, | |
| "eval_loss": 2.6219797134399414, | |
| "eval_precision": 0.010202794685557478, | |
| "eval_recall": 0.010123239436619719, | |
| "eval_runtime": 6.2453, | |
| "eval_samples_per_second": 22.737, | |
| "eval_steps_per_second": 2.882, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 6.915263652801514, | |
| "learning_rate": 4.893203883495146e-05, | |
| "loss": 2.2637, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.01248899647887324, | |
| "eval_bleu": 0.13792868891001447, | |
| "eval_f1": 0.010680838430589857, | |
| "eval_loss": 2.017970561981201, | |
| "eval_precision": 0.009509030569956453, | |
| "eval_recall": 0.012488996478873238, | |
| "eval_runtime": 7.0653, | |
| "eval_samples_per_second": 20.098, | |
| "eval_steps_per_second": 2.548, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 7.544444561004639, | |
| "learning_rate": 4.718446601941748e-05, | |
| "loss": 1.789, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.012654049295774648, | |
| "eval_bleu": 0.17566093139607744, | |
| "eval_f1": 0.011466852239797607, | |
| "eval_loss": 1.7966645956039429, | |
| "eval_precision": 0.01049697288064746, | |
| "eval_recall": 0.012654049295774647, | |
| "eval_runtime": 6.0435, | |
| "eval_samples_per_second": 23.496, | |
| "eval_steps_per_second": 2.978, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 6.955804347991943, | |
| "learning_rate": 4.543689320388349e-05, | |
| "loss": 1.5522, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.012378961267605635, | |
| "eval_bleu": 0.20413318737157596, | |
| "eval_f1": 0.011132478900133054, | |
| "eval_loss": 1.6657302379608154, | |
| "eval_precision": 0.010120661403149588, | |
| "eval_recall": 0.012378961267605633, | |
| "eval_runtime": 6.1905, | |
| "eval_samples_per_second": 22.938, | |
| "eval_steps_per_second": 2.908, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 7.229798793792725, | |
| "learning_rate": 4.368932038834951e-05, | |
| "loss": 1.3885, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.012378961267605635, | |
| "eval_bleu": 0.2305940937780942, | |
| "eval_f1": 0.011073979352246164, | |
| "eval_loss": 1.5827170610427856, | |
| "eval_precision": 0.010022195626080541, | |
| "eval_recall": 0.012378961267605633, | |
| "eval_runtime": 6.9109, | |
| "eval_samples_per_second": 20.547, | |
| "eval_steps_per_second": 2.605, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 6.714682102203369, | |
| "learning_rate": 4.194174757281554e-05, | |
| "loss": 1.2561, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.012268926056338027, | |
| "eval_bleu": 0.25012821385731565, | |
| "eval_f1": 0.01143102719262594, | |
| "eval_loss": 1.5294432640075684, | |
| "eval_precision": 0.010707247710344536, | |
| "eval_recall": 0.012268926056338027, | |
| "eval_runtime": 6.5697, | |
| "eval_samples_per_second": 21.614, | |
| "eval_steps_per_second": 2.74, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 6.063908100128174, | |
| "learning_rate": 4.019417475728156e-05, | |
| "loss": 1.1466, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.012268926056338027, | |
| "eval_bleu": 0.2719546447213355, | |
| "eval_f1": 0.01116903184495446, | |
| "eval_loss": 1.4926397800445557, | |
| "eval_precision": 0.010253161244814542, | |
| "eval_recall": 0.012268926056338027, | |
| "eval_runtime": 6.4161, | |
| "eval_samples_per_second": 22.132, | |
| "eval_steps_per_second": 2.805, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 6.155008316040039, | |
| "learning_rate": 3.844660194174757e-05, | |
| "loss": 1.0616, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.012378961267605635, | |
| "eval_bleu": 0.28742380325292655, | |
| "eval_f1": 0.01144092667248244, | |
| "eval_loss": 1.4650629758834839, | |
| "eval_precision": 0.010640060069611993, | |
| "eval_recall": 0.012378961267605633, | |
| "eval_runtime": 6.3235, | |
| "eval_samples_per_second": 22.456, | |
| "eval_steps_per_second": 2.847, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 5.874176979064941, | |
| "learning_rate": 3.6699029126213596e-05, | |
| "loss": 0.9821, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.012268926056338027, | |
| "eval_bleu": 0.30478814512361185, | |
| "eval_f1": 0.011331946253740536, | |
| "eval_loss": 1.4492626190185547, | |
| "eval_precision": 0.010531151474565863, | |
| "eval_recall": 0.012268926056338027, | |
| "eval_runtime": 6.2565, | |
| "eval_samples_per_second": 22.696, | |
| "eval_steps_per_second": 2.877, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 5.705406665802002, | |
| "learning_rate": 3.4951456310679615e-05, | |
| "loss": 0.9071, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.012268926056338027, | |
| "eval_bleu": 0.3250108739654091, | |
| "eval_f1": 0.011214281037053235, | |
| "eval_loss": 1.4369895458221436, | |
| "eval_precision": 0.010327542425543311, | |
| "eval_recall": 0.012268926056338027, | |
| "eval_runtime": 6.1165, | |
| "eval_samples_per_second": 23.216, | |
| "eval_steps_per_second": 2.943, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 5.345541000366211, | |
| "learning_rate": 3.3203883495145634e-05, | |
| "loss": 0.8445, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.012268926056338027, | |
| "eval_bleu": 0.32825465058506376, | |
| "eval_f1": 0.011419929314754076, | |
| "eval_loss": 1.4337056875228882, | |
| "eval_precision": 0.010683679060359908, | |
| "eval_recall": 0.012268926056338027, | |
| "eval_runtime": 6.9862, | |
| "eval_samples_per_second": 20.326, | |
| "eval_steps_per_second": 2.577, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 4.928064346313477, | |
| "learning_rate": 3.145631067961165e-05, | |
| "loss": 0.7869, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.012268926056338027, | |
| "eval_bleu": 0.34315751189373966, | |
| "eval_f1": 0.011425905613358798, | |
| "eval_loss": 1.431230902671814, | |
| "eval_precision": 0.010692912514740159, | |
| "eval_recall": 0.012268926056338027, | |
| "eval_runtime": 6.3375, | |
| "eval_samples_per_second": 22.406, | |
| "eval_steps_per_second": 2.84, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 4.787758827209473, | |
| "learning_rate": 2.9708737864077673e-05, | |
| "loss": 0.7388, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.012268926056338027, | |
| "eval_bleu": 0.3506530097105922, | |
| "eval_f1": 0.011567695105672023, | |
| "eval_loss": 1.432782530784607, | |
| "eval_precision": 0.010942929955906473, | |
| "eval_recall": 0.012268926056338027, | |
| "eval_runtime": 6.6203, | |
| "eval_samples_per_second": 21.449, | |
| "eval_steps_per_second": 2.719, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 4.721966743469238, | |
| "learning_rate": 2.7961165048543692e-05, | |
| "loss": 0.6948, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.012268926056338027, | |
| "eval_bleu": 0.36203576750790184, | |
| "eval_f1": 0.011535961695115268, | |
| "eval_loss": 1.4401381015777588, | |
| "eval_precision": 0.010887377543860504, | |
| "eval_recall": 0.012268926056338027, | |
| "eval_runtime": 6.8054, | |
| "eval_samples_per_second": 20.866, | |
| "eval_steps_per_second": 2.645, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 3.970003128051758, | |
| "learning_rate": 2.6213592233009708e-05, | |
| "loss": 0.646, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.37067819665088503, | |
| "eval_f1": 0.011355838282605724, | |
| "eval_loss": 1.4626511335372925, | |
| "eval_precision": 0.010738384191565671, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.6898, | |
| "eval_samples_per_second": 21.226, | |
| "eval_steps_per_second": 2.691, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 3.967465877532959, | |
| "learning_rate": 2.446601941747573e-05, | |
| "loss": 0.6137, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.3721186663899375, | |
| "eval_f1": 0.01150499351512691, | |
| "eval_loss": 1.460695505142212, | |
| "eval_precision": 0.01100817275358665, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.1473, | |
| "eval_samples_per_second": 23.1, | |
| "eval_steps_per_second": 2.928, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 3.9431395530700684, | |
| "learning_rate": 2.2718446601941746e-05, | |
| "loss": 0.5833, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.3782352067209992, | |
| "eval_f1": 0.011355838282605724, | |
| "eval_loss": 1.4740684032440186, | |
| "eval_precision": 0.010738384191565671, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.2111, | |
| "eval_samples_per_second": 22.862, | |
| "eval_steps_per_second": 2.898, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 4.299834251403809, | |
| "learning_rate": 2.097087378640777e-05, | |
| "loss": 0.5523, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.38158208167842117, | |
| "eval_f1": 0.011454837538514543, | |
| "eval_loss": 1.4804751873016357, | |
| "eval_precision": 0.010916734980993936, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.2563, | |
| "eval_samples_per_second": 22.697, | |
| "eval_steps_per_second": 2.877, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 3.584228038787842, | |
| "learning_rate": 1.9223300970873785e-05, | |
| "loss": 0.5282, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.012268926056338027, | |
| "eval_bleu": 0.38941442914279584, | |
| "eval_f1": 0.011665531630013295, | |
| "eval_loss": 1.4897931814193726, | |
| "eval_precision": 0.011119728166708294, | |
| "eval_recall": 0.012268926056338027, | |
| "eval_runtime": 6.1705, | |
| "eval_samples_per_second": 23.013, | |
| "eval_steps_per_second": 2.917, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 3.575045108795166, | |
| "learning_rate": 1.7475728155339808e-05, | |
| "loss": 0.5047, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.01210387323943662, | |
| "eval_bleu": 0.3904044704756756, | |
| "eval_f1": 0.011583122303403293, | |
| "eval_loss": 1.4997267723083496, | |
| "eval_precision": 0.011105397197031464, | |
| "eval_recall": 0.01210387323943662, | |
| "eval_runtime": 6.3359, | |
| "eval_samples_per_second": 22.412, | |
| "eval_steps_per_second": 2.841, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 3.5939736366271973, | |
| "learning_rate": 1.5728155339805823e-05, | |
| "loss": 0.4854, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.38824458290957736, | |
| "eval_f1": 0.011442367439933718, | |
| "eval_loss": 1.5054779052734375, | |
| "eval_precision": 0.010894114653359857, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.2963, | |
| "eval_samples_per_second": 22.553, | |
| "eval_steps_per_second": 2.859, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 3.807431697845459, | |
| "learning_rate": 1.3980582524271846e-05, | |
| "loss": 0.4697, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.3905756115063895, | |
| "eval_f1": 0.011442367439933718, | |
| "eval_loss": 1.5122706890106201, | |
| "eval_precision": 0.010894114653359857, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 7.012, | |
| "eval_samples_per_second": 20.251, | |
| "eval_steps_per_second": 2.567, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 3.8207340240478516, | |
| "learning_rate": 1.2233009708737865e-05, | |
| "loss": 0.4547, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.39316674027140547, | |
| "eval_f1": 0.011517601862123769, | |
| "eval_loss": 1.5183604955673218, | |
| "eval_precision": 0.011031274351852924, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.8261, | |
| "eval_samples_per_second": 20.803, | |
| "eval_steps_per_second": 2.637, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 3.831852912902832, | |
| "learning_rate": 1.0485436893203885e-05, | |
| "loss": 0.4413, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.3956605229111985, | |
| "eval_f1": 0.011392759914288347, | |
| "eval_loss": 1.5295231342315674, | |
| "eval_precision": 0.010804571945629947, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.289, | |
| "eval_samples_per_second": 22.579, | |
| "eval_steps_per_second": 2.862, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 3.056539297103882, | |
| "learning_rate": 8.737864077669904e-06, | |
| "loss": 0.4325, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.39496386454550636, | |
| "eval_f1": 0.0114798602280593, | |
| "eval_loss": 1.535691738128662, | |
| "eval_precision": 0.010962261366716413, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.1059, | |
| "eval_samples_per_second": 23.256, | |
| "eval_steps_per_second": 2.948, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 3.1146154403686523, | |
| "learning_rate": 6.990291262135923e-06, | |
| "loss": 0.4188, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.3960440421138887, | |
| "eval_f1": 0.011517601862123769, | |
| "eval_loss": 1.5396318435668945, | |
| "eval_precision": 0.011031274351852924, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 7.2756, | |
| "eval_samples_per_second": 19.517, | |
| "eval_steps_per_second": 2.474, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 3.4233100414276123, | |
| "learning_rate": 5.242718446601942e-06, | |
| "loss": 0.4131, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.4001388307186371, | |
| "eval_f1": 0.011492413001125629, | |
| "eval_loss": 1.5463387966156006, | |
| "eval_precision": 0.010985168630418547, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 7.2564, | |
| "eval_samples_per_second": 19.569, | |
| "eval_steps_per_second": 2.481, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 2.70424485206604, | |
| "learning_rate": 3.4951456310679615e-06, | |
| "loss": 0.4089, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.39854032138633055, | |
| "eval_f1": 0.011492413001125629, | |
| "eval_loss": 1.544076681137085, | |
| "eval_precision": 0.010985168630418547, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 7.0435, | |
| "eval_samples_per_second": 20.16, | |
| "eval_steps_per_second": 2.556, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 3.052654266357422, | |
| "learning_rate": 1.7475728155339808e-06, | |
| "loss": 0.4026, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.3987203294842987, | |
| "eval_f1": 0.01150499351512691, | |
| "eval_loss": 1.5447367429733276, | |
| "eval_precision": 0.01100817275358665, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.3248, | |
| "eval_samples_per_second": 22.451, | |
| "eval_steps_per_second": 2.846, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 3.11586856842041, | |
| "learning_rate": 0.0, | |
| "loss": 0.3999, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.012048855633802816, | |
| "eval_bleu": 0.398637528263251, | |
| "eval_f1": 0.011517601862123769, | |
| "eval_loss": 1.5456310510635376, | |
| "eval_precision": 0.011031274351852924, | |
| "eval_recall": 0.012048855633802815, | |
| "eval_runtime": 6.2921, | |
| "eval_samples_per_second": 22.568, | |
| "eval_steps_per_second": 2.861, | |
| "step": 1080 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1080, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1113104056320000.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |