| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.014790468364832, | |
| "eval_steps": 200, | |
| "global_step": 3660, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3286770747740345, | |
| "grad_norm": 0.4525283873081207, | |
| "learning_rate": 1.9879568983287468e-05, | |
| "loss": 2.0164, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3286770747740345, | |
| "eval_loss": 0.808789849281311, | |
| "eval_runtime": 648.6859, | |
| "eval_samples_per_second": 1.893, | |
| "eval_steps_per_second": 0.237, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.657354149548069, | |
| "grad_norm": 0.48705440759658813, | |
| "learning_rate": 1.9466986959828063e-05, | |
| "loss": 1.1703, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.657354149548069, | |
| "eval_loss": 0.7549917697906494, | |
| "eval_runtime": 645.5309, | |
| "eval_samples_per_second": 1.902, | |
| "eval_steps_per_second": 0.239, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9860312243221035, | |
| "grad_norm": 0.6883838176727295, | |
| "learning_rate": 1.877302694199442e-05, | |
| "loss": 1.0269, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9860312243221035, | |
| "eval_loss": 0.7438387870788574, | |
| "eval_runtime": 649.872, | |
| "eval_samples_per_second": 1.89, | |
| "eval_steps_per_second": 0.237, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.314708299096138, | |
| "grad_norm": 0.5763583183288574, | |
| "learning_rate": 1.78183148246803e-05, | |
| "loss": 0.9723, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.314708299096138, | |
| "eval_loss": 0.7379218935966492, | |
| "eval_runtime": 649.5248, | |
| "eval_samples_per_second": 1.891, | |
| "eval_steps_per_second": 0.237, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6433853738701725, | |
| "grad_norm": 0.5101306438446045, | |
| "learning_rate": 1.6631226582407954e-05, | |
| "loss": 0.9505, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6433853738701725, | |
| "eval_loss": 0.7350366711616516, | |
| "eval_runtime": 650.1904, | |
| "eval_samples_per_second": 1.889, | |
| "eval_steps_per_second": 0.237, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.972062448644207, | |
| "grad_norm": 0.5611714124679565, | |
| "learning_rate": 1.524704487799008e-05, | |
| "loss": 0.9255, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.972062448644207, | |
| "eval_loss": 0.7326610684394836, | |
| "eval_runtime": 652.5211, | |
| "eval_samples_per_second": 1.882, | |
| "eval_steps_per_second": 0.236, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.3007395234182417, | |
| "grad_norm": 0.5886391401290894, | |
| "learning_rate": 1.3706910390450679e-05, | |
| "loss": 0.9188, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.3007395234182417, | |
| "eval_loss": 0.7313700914382935, | |
| "eval_runtime": 647.8807, | |
| "eval_samples_per_second": 1.895, | |
| "eval_steps_per_second": 0.238, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.629416598192276, | |
| "grad_norm": 0.5193888545036316, | |
| "learning_rate": 1.2056599030859367e-05, | |
| "loss": 0.9001, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.629416598192276, | |
| "eval_loss": 0.7305780053138733, | |
| "eval_runtime": 648.1334, | |
| "eval_samples_per_second": 1.895, | |
| "eval_steps_per_second": 0.238, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.9580936729663105, | |
| "grad_norm": 0.580741286277771, | |
| "learning_rate": 1.0345161389697083e-05, | |
| "loss": 0.8896, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.9580936729663105, | |
| "eval_loss": 0.7289234399795532, | |
| "eval_runtime": 648.6387, | |
| "eval_samples_per_second": 1.893, | |
| "eval_steps_per_second": 0.237, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.286770747740345, | |
| "grad_norm": 0.5864242911338806, | |
| "learning_rate": 8.62346485412832e-06, | |
| "loss": 0.8851, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.286770747740345, | |
| "eval_loss": 0.7283803224563599, | |
| "eval_runtime": 648.5025, | |
| "eval_samples_per_second": 1.894, | |
| "eval_steps_per_second": 0.237, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.61544782251438, | |
| "grad_norm": 0.8759085536003113, | |
| "learning_rate": 6.942681726402474e-06, | |
| "loss": 0.893, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.61544782251438, | |
| "eval_loss": 0.7277897596359253, | |
| "eval_runtime": 649.5645, | |
| "eval_samples_per_second": 1.89, | |
| "eval_steps_per_second": 0.237, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.944124897288414, | |
| "grad_norm": 0.6328697204589844, | |
| "learning_rate": 5.352768279562315e-06, | |
| "loss": 0.8713, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.944124897288414, | |
| "eval_loss": 0.7274895906448364, | |
| "eval_runtime": 648.0857, | |
| "eval_samples_per_second": 1.895, | |
| "eval_steps_per_second": 0.238, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.272801972062449, | |
| "grad_norm": 0.5513933300971985, | |
| "learning_rate": 3.900979955999271e-06, | |
| "loss": 0.8729, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.272801972062449, | |
| "eval_loss": 0.7273133993148804, | |
| "eval_runtime": 648.0021, | |
| "eval_samples_per_second": 1.895, | |
| "eval_steps_per_second": 0.238, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.6014790468364835, | |
| "grad_norm": 0.4977937638759613, | |
| "learning_rate": 2.6304668401566334e-06, | |
| "loss": 0.8796, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.6014790468364835, | |
| "eval_loss": 0.727094829082489, | |
| "eval_runtime": 650.7111, | |
| "eval_samples_per_second": 1.887, | |
| "eval_steps_per_second": 0.237, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.930156121610517, | |
| "grad_norm": 0.5514353513717651, | |
| "learning_rate": 1.5789911507718824e-06, | |
| "loss": 0.8674, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.930156121610517, | |
| "eval_loss": 0.7267969250679016, | |
| "eval_runtime": 647.9762, | |
| "eval_samples_per_second": 1.895, | |
| "eval_steps_per_second": 0.238, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.258833196384552, | |
| "grad_norm": 0.573556661605835, | |
| "learning_rate": 7.778048713818975e-07, | |
| "loss": 0.8716, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.258833196384552, | |
| "eval_loss": 0.7268173694610596, | |
| "eval_runtime": 646.5688, | |
| "eval_samples_per_second": 1.899, | |
| "eval_steps_per_second": 0.238, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.587510271158587, | |
| "grad_norm": 0.6027952432632446, | |
| "learning_rate": 2.507208781817638e-07, | |
| "loss": 0.8666, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 5.587510271158587, | |
| "eval_loss": 0.726804256439209, | |
| "eval_runtime": 646.8487, | |
| "eval_samples_per_second": 1.898, | |
| "eval_steps_per_second": 0.238, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 5.916187345932621, | |
| "grad_norm": 0.7252454161643982, | |
| "learning_rate": 1.340517319543877e-08, | |
| "loss": 0.8669, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 5.916187345932621, | |
| "eval_loss": 0.7267909049987793, | |
| "eval_runtime": 645.845, | |
| "eval_samples_per_second": 1.901, | |
| "eval_steps_per_second": 0.238, | |
| "step": 3600 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 3660, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.222777133780173e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |