{ "best_global_step": 2000, "best_metric": 2.37511335409849, "best_model_checkpoint": "./SALAMA_NEWMED6/checkpoint-2000", "epoch": 1.1267605633802817, "eval_steps": 2000, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.056338028169014086, "grad_norm": 2.0612876415252686, "learning_rate": 1.98e-06, "loss": 0.0564, "step": 100 }, { "epoch": 0.11267605633802817, "grad_norm": 1.467324137687683, "learning_rate": 3.980000000000001e-06, "loss": 0.0525, "step": 200 }, { "epoch": 0.16901408450704225, "grad_norm": 1.1306756734848022, "learning_rate": 5.98e-06, "loss": 0.0595, "step": 300 }, { "epoch": 0.22535211267605634, "grad_norm": 2.680368661880493, "learning_rate": 7.980000000000002e-06, "loss": 0.0561, "step": 400 }, { "epoch": 0.28169014084507044, "grad_norm": 4.0752339363098145, "learning_rate": 9.980000000000001e-06, "loss": 0.0614, "step": 500 }, { "epoch": 0.3380281690140845, "grad_norm": 2.2631568908691406, "learning_rate": 9.794818652849742e-06, "loss": 0.0635, "step": 600 }, { "epoch": 0.39436619718309857, "grad_norm": 3.81473445892334, "learning_rate": 9.587564766839379e-06, "loss": 0.0656, "step": 700 }, { "epoch": 0.4507042253521127, "grad_norm": 4.52515983581543, "learning_rate": 9.380310880829016e-06, "loss": 0.0674, "step": 800 }, { "epoch": 0.5070422535211268, "grad_norm": 5.0459513664245605, "learning_rate": 9.173056994818653e-06, "loss": 0.0746, "step": 900 }, { "epoch": 0.5633802816901409, "grad_norm": 2.449471950531006, "learning_rate": 8.96580310880829e-06, "loss": 0.0677, "step": 1000 }, { "epoch": 0.6197183098591549, "grad_norm": 2.445279121398926, "learning_rate": 8.758549222797929e-06, "loss": 0.0696, "step": 1100 }, { "epoch": 0.676056338028169, "grad_norm": 4.043335437774658, "learning_rate": 8.551295336787564e-06, "loss": 0.0685, "step": 1200 }, { "epoch": 0.7323943661971831, "grad_norm": 1.7198883295059204, "learning_rate": 8.344041450777203e-06, "loss": 0.0647, "step": 1300 }, { "epoch": 0.7887323943661971, "grad_norm": 3.9584922790527344, "learning_rate": 8.13678756476684e-06, "loss": 0.0754, "step": 1400 }, { "epoch": 0.8450704225352113, "grad_norm": 1.50466787815094, "learning_rate": 7.929533678756477e-06, "loss": 0.0658, "step": 1500 }, { "epoch": 0.9014084507042254, "grad_norm": 3.0848331451416016, "learning_rate": 7.722279792746114e-06, "loss": 0.0683, "step": 1600 }, { "epoch": 0.9577464788732394, "grad_norm": 2.9346296787261963, "learning_rate": 7.515025906735752e-06, "loss": 0.0664, "step": 1700 }, { "epoch": 1.0140845070422535, "grad_norm": 2.4397850036621094, "learning_rate": 7.307772020725389e-06, "loss": 0.0512, "step": 1800 }, { "epoch": 1.0704225352112675, "grad_norm": 1.455031156539917, "learning_rate": 7.100518134715026e-06, "loss": 0.0204, "step": 1900 }, { "epoch": 1.1267605633802817, "grad_norm": 1.2992125749588013, "learning_rate": 6.893264248704664e-06, "loss": 0.0225, "step": 2000 }, { "epoch": 1.1267605633802817, "eval_loss": 0.02766764909029007, "eval_runtime": 10449.8663, "eval_samples_per_second": 1.359, "eval_steps_per_second": 0.17, "eval_wer": 2.37511335409849, "step": 2000 } ], "logging_steps": 100, "max_steps": 5325, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.531667287146496e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }