| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1563, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09606147934678194, |
| "grad_norm": 0.09411401301622391, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 1.5688, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.19212295869356388, |
| "grad_norm": 0.22506041824817657, |
| "learning_rate": 1.98e-05, |
| "loss": 1.5186, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2881844380403458, |
| "grad_norm": 0.1974368542432785, |
| "learning_rate": 1.933014354066986e-05, |
| "loss": 1.3774, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.38424591738712777, |
| "grad_norm": 0.19661001861095428, |
| "learning_rate": 1.8646616541353386e-05, |
| "loss": 1.3543, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4803073967339097, |
| "grad_norm": 0.2228914052248001, |
| "learning_rate": 1.7963089542036912e-05, |
| "loss": 1.3374, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5763688760806917, |
| "grad_norm": 0.20485010743141174, |
| "learning_rate": 1.727956254272044e-05, |
| "loss": 1.317, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6724303554274735, |
| "grad_norm": 0.20028738677501678, |
| "learning_rate": 1.6596035543403966e-05, |
| "loss": 1.3222, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7684918347742555, |
| "grad_norm": 0.2254370152950287, |
| "learning_rate": 1.5912508544087493e-05, |
| "loss": 1.3104, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8645533141210374, |
| "grad_norm": 0.2582356333732605, |
| "learning_rate": 1.522898154477102e-05, |
| "loss": 1.3173, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9606147934678194, |
| "grad_norm": 0.20715464651584625, |
| "learning_rate": 1.4545454545454546e-05, |
| "loss": 1.2858, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0557156580211335, |
| "grad_norm": 0.2385847121477127, |
| "learning_rate": 1.3861927546138075e-05, |
| "loss": 1.2917, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.1517771373679155, |
| "grad_norm": 0.2660408914089203, |
| "learning_rate": 1.3178400546821602e-05, |
| "loss": 1.2782, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2478386167146973, |
| "grad_norm": 0.3173312544822693, |
| "learning_rate": 1.2494873547505127e-05, |
| "loss": 1.2685, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.3439000960614793, |
| "grad_norm": 0.31093448400497437, |
| "learning_rate": 1.1811346548188653e-05, |
| "loss": 1.2843, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.4399615754082613, |
| "grad_norm": 0.2999595105648041, |
| "learning_rate": 1.112781954887218e-05, |
| "loss": 1.2555, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.5360230547550433, |
| "grad_norm": 0.2843952178955078, |
| "learning_rate": 1.0444292549555709e-05, |
| "loss": 1.2629, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.6320845341018253, |
| "grad_norm": 0.27973154187202454, |
| "learning_rate": 9.760765550239236e-06, |
| "loss": 1.263, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.728146013448607, |
| "grad_norm": 0.2947864532470703, |
| "learning_rate": 9.077238550922762e-06, |
| "loss": 1.2802, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.824207492795389, |
| "grad_norm": 0.30843278765678406, |
| "learning_rate": 8.393711551606289e-06, |
| "loss": 1.2819, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.9202689721421708, |
| "grad_norm": 0.3192874491214752, |
| "learning_rate": 7.710184552289816e-06, |
| "loss": 1.2659, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.015369836695485, |
| "grad_norm": 0.3018593192100525, |
| "learning_rate": 7.026657552973343e-06, |
| "loss": 1.2355, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.111431316042267, |
| "grad_norm": 0.34733593463897705, |
| "learning_rate": 6.34313055365687e-06, |
| "loss": 1.2444, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.207492795389049, |
| "grad_norm": 0.3217043876647949, |
| "learning_rate": 5.659603554340397e-06, |
| "loss": 1.2676, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.303554274735831, |
| "grad_norm": 0.39473122358322144, |
| "learning_rate": 4.976076555023924e-06, |
| "loss": 1.2632, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.399615754082613, |
| "grad_norm": 0.380327969789505, |
| "learning_rate": 4.292549555707451e-06, |
| "loss": 1.2314, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.4956772334293946, |
| "grad_norm": 0.3401298522949219, |
| "learning_rate": 3.6090225563909775e-06, |
| "loss": 1.243, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.5917387127761766, |
| "grad_norm": 0.39804333448410034, |
| "learning_rate": 2.9254955570745047e-06, |
| "loss": 1.257, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.6878001921229586, |
| "grad_norm": 0.3660435080528259, |
| "learning_rate": 2.2419685577580314e-06, |
| "loss": 1.249, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.7838616714697406, |
| "grad_norm": 0.3209303617477417, |
| "learning_rate": 1.5584415584415584e-06, |
| "loss": 1.2405, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.8799231508165226, |
| "grad_norm": 0.3564263880252838, |
| "learning_rate": 8.749145591250855e-07, |
| "loss": 1.2238, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.9759846301633046, |
| "grad_norm": 0.34031230211257935, |
| "learning_rate": 1.9138755980861244e-07, |
| "loss": 1.2422, |
| "step": 1550 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1563, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.616521324147507e+17, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|