| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.08177652449770885, |
| "eval_steps": 10, |
| "global_step": 290, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0028198801550934085, |
| "grad_norm": 1.4646761417388916, |
| "learning_rate": 2.535211267605634e-07, |
| "loss": 1.6487, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0028198801550934085, |
| "eval_loss": 1.5981513261795044, |
| "eval_runtime": 1507.1995, |
| "eval_samples_per_second": 0.663, |
| "eval_steps_per_second": 0.332, |
| "eval_wer": 23.173577454676, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005639760310186817, |
| "grad_norm": 1.6286903619766235, |
| "learning_rate": 5.352112676056338e-07, |
| "loss": 1.5828, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.005639760310186817, |
| "eval_loss": 1.5729961395263672, |
| "eval_runtime": 1479.7495, |
| "eval_samples_per_second": 0.676, |
| "eval_steps_per_second": 0.338, |
| "eval_wer": 17.930291052820696, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.008459640465280225, |
| "grad_norm": 2.2479844093322754, |
| "learning_rate": 8.169014084507043e-07, |
| "loss": 1.6375, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.008459640465280225, |
| "eval_loss": 1.5637080669403076, |
| "eval_runtime": 1475.9552, |
| "eval_samples_per_second": 0.678, |
| "eval_steps_per_second": 0.339, |
| "eval_wer": 17.930291052820696, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.011279520620373634, |
| "grad_norm": 1.6205031871795654, |
| "learning_rate": 1.098591549295775e-06, |
| "loss": 1.4224, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.011279520620373634, |
| "eval_loss": 1.5508662462234497, |
| "eval_runtime": 1465.0874, |
| "eval_samples_per_second": 0.683, |
| "eval_steps_per_second": 0.341, |
| "eval_wer": 17.876392382321235, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.014099400775467043, |
| "grad_norm": 1.4715520143508911, |
| "learning_rate": 1.3802816901408453e-06, |
| "loss": 1.5848, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.014099400775467043, |
| "eval_loss": 1.5341806411743164, |
| "eval_runtime": 1483.291, |
| "eval_samples_per_second": 0.674, |
| "eval_steps_per_second": 0.337, |
| "eval_wer": 17.822493711821775, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01691928093056045, |
| "grad_norm": 1.752820611000061, |
| "learning_rate": 1.6619718309859157e-06, |
| "loss": 1.4716, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01691928093056045, |
| "eval_loss": 1.5121240615844727, |
| "eval_runtime": 1496.0308, |
| "eval_samples_per_second": 0.668, |
| "eval_steps_per_second": 0.334, |
| "eval_wer": 17.714696370822853, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01973916108565386, |
| "grad_norm": 1.584325909614563, |
| "learning_rate": 1.943661971830986e-06, |
| "loss": 1.4226, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01973916108565386, |
| "eval_loss": 1.4858661890029907, |
| "eval_runtime": 1472.4553, |
| "eval_samples_per_second": 0.679, |
| "eval_steps_per_second": 0.34, |
| "eval_wer": 17.62486525332375, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.022559041240747268, |
| "grad_norm": 1.720224142074585, |
| "learning_rate": 2.2253521126760566e-06, |
| "loss": 1.4217, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.022559041240747268, |
| "eval_loss": 1.4541670083999634, |
| "eval_runtime": 1479.057, |
| "eval_samples_per_second": 0.676, |
| "eval_steps_per_second": 0.338, |
| "eval_wer": 17.46316924182537, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02537892139584068, |
| "grad_norm": 1.3850756883621216, |
| "learning_rate": 2.507042253521127e-06, |
| "loss": 1.444, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02537892139584068, |
| "eval_loss": 1.415584921836853, |
| "eval_runtime": 1481.7443, |
| "eval_samples_per_second": 0.675, |
| "eval_steps_per_second": 0.337, |
| "eval_wer": 17.40927057132591, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.028198801550934086, |
| "grad_norm": 2.1286864280700684, |
| "learning_rate": 2.7887323943661974e-06, |
| "loss": 1.3818, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.028198801550934086, |
| "eval_loss": 1.3688818216323853, |
| "eval_runtime": 1474.4035, |
| "eval_samples_per_second": 0.678, |
| "eval_steps_per_second": 0.339, |
| "eval_wer": 17.337405677326625, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.031018681706027493, |
| "grad_norm": 1.3948097229003906, |
| "learning_rate": 3.0704225352112678e-06, |
| "loss": 1.2849, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.031018681706027493, |
| "eval_loss": 1.3072822093963623, |
| "eval_runtime": 1463.7543, |
| "eval_samples_per_second": 0.683, |
| "eval_steps_per_second": 0.342, |
| "eval_wer": 17.175709665828244, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0338385618611209, |
| "grad_norm": 1.907498836517334, |
| "learning_rate": 3.352112676056338e-06, |
| "loss": 1.2869, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0338385618611209, |
| "eval_loss": 1.2292650938034058, |
| "eval_runtime": 1464.7785, |
| "eval_samples_per_second": 0.683, |
| "eval_steps_per_second": 0.341, |
| "eval_wer": 17.121810995328783, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.036658442016214314, |
| "grad_norm": 2.2028934955596924, |
| "learning_rate": 3.633802816901409e-06, |
| "loss": 1.1545, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.036658442016214314, |
| "eval_loss": 1.1384223699569702, |
| "eval_runtime": 1494.8653, |
| "eval_samples_per_second": 0.669, |
| "eval_steps_per_second": 0.334, |
| "eval_wer": 16.92418253683076, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03947832217130772, |
| "grad_norm": 1.801270604133606, |
| "learning_rate": 3.915492957746479e-06, |
| "loss": 1.1247, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03947832217130772, |
| "eval_loss": 1.0565879344940186, |
| "eval_runtime": 1504.6119, |
| "eval_samples_per_second": 0.665, |
| "eval_steps_per_second": 0.332, |
| "eval_wer": 16.708587854832917, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04229820232640113, |
| "grad_norm": 1.8307346105575562, |
| "learning_rate": 4.19718309859155e-06, |
| "loss": 1.0473, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04229820232640113, |
| "eval_loss": 0.9981860518455505, |
| "eval_runtime": 1504.1852, |
| "eval_samples_per_second": 0.665, |
| "eval_steps_per_second": 0.332, |
| "eval_wer": 16.331297161336686, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.045118082481494536, |
| "grad_norm": 1.1669814586639404, |
| "learning_rate": 4.4788732394366205e-06, |
| "loss": 0.9783, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.045118082481494536, |
| "eval_loss": 0.9503867030143738, |
| "eval_runtime": 1510.1792, |
| "eval_samples_per_second": 0.662, |
| "eval_steps_per_second": 0.331, |
| "eval_wer": 16.241466043837587, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04793796263658794, |
| "grad_norm": 1.8830143213272095, |
| "learning_rate": 4.7605633802816905e-06, |
| "loss": 0.937, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04793796263658794, |
| "eval_loss": 0.906912624835968, |
| "eval_runtime": 1516.9202, |
| "eval_samples_per_second": 0.659, |
| "eval_steps_per_second": 0.33, |
| "eval_wer": 16.349263384836508, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05075784279168136, |
| "grad_norm": 1.4165269136428833, |
| "learning_rate": 5.042253521126761e-06, |
| "loss": 0.942, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.05075784279168136, |
| "eval_loss": 0.868701159954071, |
| "eval_runtime": 1514.2041, |
| "eval_samples_per_second": 0.66, |
| "eval_steps_per_second": 0.33, |
| "eval_wer": 15.45095220984549, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.053577722946774764, |
| "grad_norm": 1.283534049987793, |
| "learning_rate": 5.323943661971831e-06, |
| "loss": 0.7458, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.053577722946774764, |
| "eval_loss": 0.8295483589172363, |
| "eval_runtime": 1513.4352, |
| "eval_samples_per_second": 0.661, |
| "eval_steps_per_second": 0.33, |
| "eval_wer": 15.199425080848005, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.05639760310186817, |
| "grad_norm": 2.037789821624756, |
| "learning_rate": 5.577464788732395e-06, |
| "loss": 0.8498, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05639760310186817, |
| "eval_loss": 0.7952865958213806, |
| "eval_runtime": 1513.3273, |
| "eval_samples_per_second": 0.661, |
| "eval_steps_per_second": 0.33, |
| "eval_wer": 14.768235716852319, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05921748325696158, |
| "grad_norm": 1.5177339315414429, |
| "learning_rate": 5.859154929577466e-06, |
| "loss": 0.8204, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.05921748325696158, |
| "eval_loss": 0.7552182674407959, |
| "eval_runtime": 1510.8116, |
| "eval_samples_per_second": 0.662, |
| "eval_steps_per_second": 0.331, |
| "eval_wer": 14.498742364355014, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.062037363412054986, |
| "grad_norm": 2.0467543601989746, |
| "learning_rate": 6.1408450704225356e-06, |
| "loss": 0.7322, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.062037363412054986, |
| "eval_loss": 0.7149233222007751, |
| "eval_runtime": 1512.5061, |
| "eval_samples_per_second": 0.661, |
| "eval_steps_per_second": 0.331, |
| "eval_wer": 14.31908012935681, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.06485724356714839, |
| "grad_norm": 1.996768832206726, |
| "learning_rate": 6.422535211267606e-06, |
| "loss": 0.7584, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.06485724356714839, |
| "eval_loss": 0.679040789604187, |
| "eval_runtime": 1497.8821, |
| "eval_samples_per_second": 0.668, |
| "eval_steps_per_second": 0.334, |
| "eval_wer": 14.28314768235717, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0676771237222418, |
| "grad_norm": 2.120554208755493, |
| "learning_rate": 6.704225352112676e-06, |
| "loss": 0.6627, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0676771237222418, |
| "eval_loss": 0.6412619352340698, |
| "eval_runtime": 1508.0578, |
| "eval_samples_per_second": 0.663, |
| "eval_steps_per_second": 0.332, |
| "eval_wer": 14.39094502335609, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.07049700387733521, |
| "grad_norm": 2.281755208969116, |
| "learning_rate": 6.985915492957746e-06, |
| "loss": 0.6823, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.07049700387733521, |
| "eval_loss": 0.6027175188064575, |
| "eval_runtime": 1504.4211, |
| "eval_samples_per_second": 0.665, |
| "eval_steps_per_second": 0.332, |
| "eval_wer": 14.35501257635645, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.07331688403242863, |
| "grad_norm": 1.7897437810897827, |
| "learning_rate": 7.267605633802818e-06, |
| "loss": 0.5683, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.07331688403242863, |
| "eval_loss": 0.5633881688117981, |
| "eval_runtime": 1514.5793, |
| "eval_samples_per_second": 0.66, |
| "eval_steps_per_second": 0.33, |
| "eval_wer": 14.28314768235717, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.07613676418752204, |
| "grad_norm": 2.116328001022339, |
| "learning_rate": 7.549295774647888e-06, |
| "loss": 0.5503, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.07613676418752204, |
| "eval_loss": 0.5198965072631836, |
| "eval_runtime": 1518.7263, |
| "eval_samples_per_second": 0.658, |
| "eval_steps_per_second": 0.329, |
| "eval_wer": 14.193316564858065, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.07895664434261544, |
| "grad_norm": 1.2750239372253418, |
| "learning_rate": 7.830985915492958e-06, |
| "loss": 0.4712, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.07895664434261544, |
| "eval_loss": 0.46468213200569153, |
| "eval_runtime": 1520.4856, |
| "eval_samples_per_second": 0.658, |
| "eval_steps_per_second": 0.329, |
| "eval_wer": 14.103485447358965, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.08177652449770885, |
| "grad_norm": 2.0781710147857666, |
| "learning_rate": 8.112676056338029e-06, |
| "loss": 0.4791, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.08177652449770885, |
| "eval_loss": 0.3976580798625946, |
| "eval_runtime": 1514.2321, |
| "eval_samples_per_second": 0.66, |
| "eval_steps_per_second": 0.33, |
| "eval_wer": 14.085519223859144, |
| "step": 290 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3546, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.18355290112e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|