{ "best_global_step": 1000, "best_metric": 2.0130758995663705, "best_model_checkpoint": "./SALAMA_C5/checkpoint-1000", "epoch": 0.6807351940095303, "eval_steps": 1000, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013614703880190605, "grad_norm": 5.017816066741943, "learning_rate": 3.8e-07, "loss": 0.0279, "step": 20 }, { "epoch": 0.02722940776038121, "grad_norm": 3.6539018154144287, "learning_rate": 7.8e-07, "loss": 0.0334, "step": 40 }, { "epoch": 0.04084411164057182, "grad_norm": 2.4141480922698975, "learning_rate": 1.1800000000000001e-06, "loss": 0.0266, "step": 60 }, { "epoch": 0.05445881552076242, "grad_norm": 2.1062567234039307, "learning_rate": 1.5800000000000001e-06, "loss": 0.0197, "step": 80 }, { "epoch": 0.06807351940095303, "grad_norm": 4.6255106925964355, "learning_rate": 1.98e-06, "loss": 0.0279, "step": 100 }, { "epoch": 0.08168822328114364, "grad_norm": 1.933724284172058, "learning_rate": 2.38e-06, "loss": 0.0261, "step": 120 }, { "epoch": 0.09530292716133425, "grad_norm": 2.3127458095550537, "learning_rate": 2.7800000000000005e-06, "loss": 0.0261, "step": 140 }, { "epoch": 0.10891763104152484, "grad_norm": 2.6971325874328613, "learning_rate": 3.1800000000000005e-06, "loss": 0.0186, "step": 160 }, { "epoch": 0.12253233492171545, "grad_norm": 3.273725986480713, "learning_rate": 3.58e-06, "loss": 0.0278, "step": 180 }, { "epoch": 0.13614703880190607, "grad_norm": 2.669668197631836, "learning_rate": 3.980000000000001e-06, "loss": 0.0308, "step": 200 }, { "epoch": 0.14976174268209666, "grad_norm": 0.6969190239906311, "learning_rate": 4.38e-06, "loss": 0.0271, "step": 220 }, { "epoch": 0.16337644656228728, "grad_norm": 2.6846354007720947, "learning_rate": 4.78e-06, "loss": 0.0298, "step": 240 }, { "epoch": 0.17699115044247787, "grad_norm": 3.2715647220611572, "learning_rate": 5.18e-06, "loss": 0.0223, "step": 260 }, { "epoch": 0.1906058543226685, "grad_norm": 3.50703763961792, "learning_rate": 5.580000000000001e-06, "loss": 0.0273, "step": 280 }, { "epoch": 0.2042205582028591, "grad_norm": 1.7984274625778198, "learning_rate": 5.98e-06, "loss": 0.0305, "step": 300 }, { "epoch": 0.21783526208304968, "grad_norm": 2.7656664848327637, "learning_rate": 6.380000000000001e-06, "loss": 0.0484, "step": 320 }, { "epoch": 0.2314499659632403, "grad_norm": 0.7585025429725647, "learning_rate": 6.780000000000001e-06, "loss": 0.0213, "step": 340 }, { "epoch": 0.2450646698434309, "grad_norm": 4.038341522216797, "learning_rate": 7.180000000000001e-06, "loss": 0.0333, "step": 360 }, { "epoch": 0.2586793737236215, "grad_norm": 2.8464272022247314, "learning_rate": 7.58e-06, "loss": 0.0269, "step": 380 }, { "epoch": 0.27229407760381213, "grad_norm": 3.064093589782715, "learning_rate": 7.980000000000002e-06, "loss": 0.0169, "step": 400 }, { "epoch": 0.2859087814840027, "grad_norm": 5.7664971351623535, "learning_rate": 8.380000000000001e-06, "loss": 0.0275, "step": 420 }, { "epoch": 0.2995234853641933, "grad_norm": 1.4587912559509277, "learning_rate": 8.78e-06, "loss": 0.0258, "step": 440 }, { "epoch": 0.3131381892443839, "grad_norm": 2.867338180541992, "learning_rate": 9.180000000000002e-06, "loss": 0.0279, "step": 460 }, { "epoch": 0.32675289312457456, "grad_norm": 4.9567484855651855, "learning_rate": 9.58e-06, "loss": 0.0301, "step": 480 }, { "epoch": 0.34036759700476515, "grad_norm": 4.536691665649414, "learning_rate": 9.980000000000001e-06, "loss": 0.0329, "step": 500 }, { "epoch": 0.35398230088495575, "grad_norm": 2.4109556674957275, "learning_rate": 9.95136933708728e-06, "loss": 0.0351, "step": 520 }, { "epoch": 0.36759700476514634, "grad_norm": 3.561450958251953, "learning_rate": 9.900179165600206e-06, "loss": 0.0368, "step": 540 }, { "epoch": 0.381211708645337, "grad_norm": 1.7630550861358643, "learning_rate": 9.848988994113131e-06, "loss": 0.0388, "step": 560 }, { "epoch": 0.3948264125255276, "grad_norm": 1.5144799947738647, "learning_rate": 9.797798822626056e-06, "loss": 0.0304, "step": 580 }, { "epoch": 0.4084411164057182, "grad_norm": 4.403763771057129, "learning_rate": 9.746608651138983e-06, "loss": 0.033, "step": 600 }, { "epoch": 0.42205582028590877, "grad_norm": 3.6199100017547607, "learning_rate": 9.695418479651908e-06, "loss": 0.0377, "step": 620 }, { "epoch": 0.43567052416609936, "grad_norm": 3.1659555435180664, "learning_rate": 9.644228308164833e-06, "loss": 0.0381, "step": 640 }, { "epoch": 0.44928522804629, "grad_norm": 4.508129596710205, "learning_rate": 9.59303813667776e-06, "loss": 0.0277, "step": 660 }, { "epoch": 0.4628999319264806, "grad_norm": 3.981079578399658, "learning_rate": 9.541847965190683e-06, "loss": 0.0297, "step": 680 }, { "epoch": 0.4765146358066712, "grad_norm": 2.7824220657348633, "learning_rate": 9.49065779370361e-06, "loss": 0.0398, "step": 700 }, { "epoch": 0.4901293396868618, "grad_norm": 5.651052951812744, "learning_rate": 9.439467622216535e-06, "loss": 0.0448, "step": 720 }, { "epoch": 0.5037440435670524, "grad_norm": 4.3824920654296875, "learning_rate": 9.38827745072946e-06, "loss": 0.0314, "step": 740 }, { "epoch": 0.517358747447243, "grad_norm": 4.516641139984131, "learning_rate": 9.337087279242385e-06, "loss": 0.0437, "step": 760 }, { "epoch": 0.5309734513274337, "grad_norm": 3.0896172523498535, "learning_rate": 9.285897107755312e-06, "loss": 0.0389, "step": 780 }, { "epoch": 0.5445881552076243, "grad_norm": 2.9489505290985107, "learning_rate": 9.234706936268237e-06, "loss": 0.036, "step": 800 }, { "epoch": 0.5582028590878149, "grad_norm": 1.6065051555633545, "learning_rate": 9.183516764781162e-06, "loss": 0.0431, "step": 820 }, { "epoch": 0.5718175629680055, "grad_norm": 2.8498787879943848, "learning_rate": 9.132326593294089e-06, "loss": 0.035, "step": 840 }, { "epoch": 0.585432266848196, "grad_norm": 1.6527795791625977, "learning_rate": 9.081136421807014e-06, "loss": 0.0351, "step": 860 }, { "epoch": 0.5990469707283866, "grad_norm": 3.358445405960083, "learning_rate": 9.02994625031994e-06, "loss": 0.0287, "step": 880 }, { "epoch": 0.6126616746085772, "grad_norm": 3.706794500350952, "learning_rate": 8.978756078832866e-06, "loss": 0.0413, "step": 900 }, { "epoch": 0.6262763784887678, "grad_norm": 3.2217488288879395, "learning_rate": 8.92756590734579e-06, "loss": 0.0335, "step": 920 }, { "epoch": 0.6398910823689585, "grad_norm": 4.742300987243652, "learning_rate": 8.876375735858716e-06, "loss": 0.0365, "step": 940 }, { "epoch": 0.6535057862491491, "grad_norm": 2.290529727935791, "learning_rate": 8.825185564371641e-06, "loss": 0.0288, "step": 960 }, { "epoch": 0.6671204901293397, "grad_norm": 1.5146220922470093, "learning_rate": 8.773995392884566e-06, "loss": 0.0379, "step": 980 }, { "epoch": 0.6807351940095303, "grad_norm": 5.127984046936035, "learning_rate": 8.722805221397493e-06, "loss": 0.0256, "step": 1000 }, { "epoch": 0.6807351940095303, "eval_loss": 0.02619771473109722, "eval_runtime": 4340.6262, "eval_samples_per_second": 2.707, "eval_steps_per_second": 0.338, "eval_wer": 2.0130758995663705, "step": 1000 } ], "logging_steps": 20, "max_steps": 4407, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.23473281024e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }