| { |
| "best_metric": 0.18096545655791033, |
| "best_model_checkpoint": "../models/baseline_large2_full_1k-n/checkpoint-660", |
| "epoch": 5.1, |
| "eval_steps": 220, |
| "global_step": 660, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1, |
| "grad_norm": 3.4453155994415283, |
| "learning_rate": 9.90909090909091e-06, |
| "loss": 1.5329, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 2.816467046737671, |
| "learning_rate": 8.8989898989899e-06, |
| "loss": 0.4894, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_LhPD_(mipa)": 2.6376535390328346, |
| "eval_cer": 0.24163589269510524, |
| "eval_cer_norm": 0.22863069089892205, |
| "eval_loss": 0.612886905670166, |
| "eval_lvnshtn": 8.382857142857137, |
| "eval_ped": 7.757142857142851, |
| "eval_per": 0.2524059166580333, |
| "eval_pfer": 7.5604320437314785, |
| "eval_runtime": 425.5358, |
| "eval_samples_per_second": 0.822, |
| "eval_steps_per_second": 0.103, |
| "eval_time": 3199.799334049225, |
| "eval_wefed": 22.74642857142856, |
| "eval_wefer": 0.7052453948748261, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 2.1414551734924316, |
| "learning_rate": 7.787878787878789e-06, |
| "loss": 0.2647, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 2.1375880241394043, |
| "learning_rate": 6.676767676767677e-06, |
| "loss": 0.162, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.1, |
| "eval_LhPD_(mipa)": 2.243887904361885, |
| "eval_cer": 0.19601619003578613, |
| "eval_cer_norm": 0.17116995643519559, |
| "eval_loss": 0.5113338232040405, |
| "eval_lvnshtn": 6.691428571428563, |
| "eval_ped": 6.197142857142849, |
| "eval_per": 0.20629669844107124, |
| "eval_pfer": 6.507800864530915, |
| "eval_runtime": 430.255, |
| "eval_samples_per_second": 0.813, |
| "eval_steps_per_second": 0.102, |
| "eval_time": 6436.061186313629, |
| "eval_wefed": 20.86178571428572, |
| "eval_wefer": 0.5902481277563488, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 1.3998914957046509, |
| "learning_rate": 5.565656565656566e-06, |
| "loss": 0.0912, |
| "step": 550 |
| }, |
| { |
| "epoch": 5.1, |
| "grad_norm": 1.5558580160140991, |
| "learning_rate": 4.454545454545455e-06, |
| "loss": 0.045, |
| "step": 660 |
| }, |
| { |
| "epoch": 5.1, |
| "eval_LhPD_(mipa)": 2.021641668726793, |
| "eval_cer": 0.17092720391633578, |
| "eval_cer_norm": 0.1603608557874088, |
| "eval_loss": 0.557009756565094, |
| "eval_lvnshtn": 5.9371428571428515, |
| "eval_ped": 5.522857142857141, |
| "eval_per": 0.18096545655791033, |
| "eval_pfer": 5.746953179375352, |
| "eval_runtime": 424.9133, |
| "eval_samples_per_second": 0.824, |
| "eval_steps_per_second": 0.104, |
| "eval_time": 9660.67699599266, |
| "eval_wefed": 17.925357142857134, |
| "eval_wefer": 0.5507118071695863, |
| "step": 660 |
| } |
| ], |
| "logging_steps": 110, |
| "max_steps": 1100, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 220, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.91731828736e+19, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|