| { | |
| "best_global_step": 2000, | |
| "best_metric": 2.37511335409849, | |
| "best_model_checkpoint": "./SALAMA_NEWMED6/checkpoint-2000", | |
| "epoch": 1.1267605633802817, | |
| "eval_steps": 2000, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.056338028169014086, | |
| "grad_norm": 2.0612876415252686, | |
| "learning_rate": 1.98e-06, | |
| "loss": 0.0564, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11267605633802817, | |
| "grad_norm": 1.467324137687683, | |
| "learning_rate": 3.980000000000001e-06, | |
| "loss": 0.0525, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16901408450704225, | |
| "grad_norm": 1.1306756734848022, | |
| "learning_rate": 5.98e-06, | |
| "loss": 0.0595, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22535211267605634, | |
| "grad_norm": 2.680368661880493, | |
| "learning_rate": 7.980000000000002e-06, | |
| "loss": 0.0561, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.28169014084507044, | |
| "grad_norm": 4.0752339363098145, | |
| "learning_rate": 9.980000000000001e-06, | |
| "loss": 0.0614, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3380281690140845, | |
| "grad_norm": 2.2631568908691406, | |
| "learning_rate": 9.794818652849742e-06, | |
| "loss": 0.0635, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.39436619718309857, | |
| "grad_norm": 3.81473445892334, | |
| "learning_rate": 9.587564766839379e-06, | |
| "loss": 0.0656, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4507042253521127, | |
| "grad_norm": 4.52515983581543, | |
| "learning_rate": 9.380310880829016e-06, | |
| "loss": 0.0674, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5070422535211268, | |
| "grad_norm": 5.0459513664245605, | |
| "learning_rate": 9.173056994818653e-06, | |
| "loss": 0.0746, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5633802816901409, | |
| "grad_norm": 2.449471950531006, | |
| "learning_rate": 8.96580310880829e-06, | |
| "loss": 0.0677, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6197183098591549, | |
| "grad_norm": 2.445279121398926, | |
| "learning_rate": 8.758549222797929e-06, | |
| "loss": 0.0696, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.676056338028169, | |
| "grad_norm": 4.043335437774658, | |
| "learning_rate": 8.551295336787564e-06, | |
| "loss": 0.0685, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7323943661971831, | |
| "grad_norm": 1.7198883295059204, | |
| "learning_rate": 8.344041450777203e-06, | |
| "loss": 0.0647, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7887323943661971, | |
| "grad_norm": 3.9584922790527344, | |
| "learning_rate": 8.13678756476684e-06, | |
| "loss": 0.0754, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.8450704225352113, | |
| "grad_norm": 1.50466787815094, | |
| "learning_rate": 7.929533678756477e-06, | |
| "loss": 0.0658, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9014084507042254, | |
| "grad_norm": 3.0848331451416016, | |
| "learning_rate": 7.722279792746114e-06, | |
| "loss": 0.0683, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.9577464788732394, | |
| "grad_norm": 2.9346296787261963, | |
| "learning_rate": 7.515025906735752e-06, | |
| "loss": 0.0664, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.0140845070422535, | |
| "grad_norm": 2.4397850036621094, | |
| "learning_rate": 7.307772020725389e-06, | |
| "loss": 0.0512, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.0704225352112675, | |
| "grad_norm": 1.455031156539917, | |
| "learning_rate": 7.100518134715026e-06, | |
| "loss": 0.0204, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.1267605633802817, | |
| "grad_norm": 1.2992125749588013, | |
| "learning_rate": 6.893264248704664e-06, | |
| "loss": 0.0225, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.1267605633802817, | |
| "eval_loss": 0.02766764909029007, | |
| "eval_runtime": 10449.8663, | |
| "eval_samples_per_second": 1.359, | |
| "eval_steps_per_second": 0.17, | |
| "eval_wer": 2.37511335409849, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 5325, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.531667287146496e+19, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |