| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 1000, |
| "global_step": 1467, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10224948875255624, |
| "grad_norm": 0.22070330381393433, |
| "learning_rate": 9.999694946400538e-05, |
| "loss": 1.2022, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.20449897750511248, |
| "grad_norm": 0.32263442873954773, |
| "learning_rate": 9.963133532962538e-05, |
| "loss": 1.0028, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3067484662576687, |
| "grad_norm": 0.2867143452167511, |
| "learning_rate": 9.866072190997923e-05, |
| "loss": 0.9375, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.40899795501022496, |
| "grad_norm": 0.2979172170162201, |
| "learning_rate": 9.709694085177272e-05, |
| "loss": 0.9275, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5112474437627812, |
| "grad_norm": 0.3326389789581299, |
| "learning_rate": 9.495905443524156e-05, |
| "loss": 0.9066, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6134969325153374, |
| "grad_norm": 0.3445422649383545, |
| "learning_rate": 9.227312320752585e-05, |
| "loss": 0.9226, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7157464212678937, |
| "grad_norm": 0.3284899890422821, |
| "learning_rate": 8.907188830811434e-05, |
| "loss": 0.8938, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8179959100204499, |
| "grad_norm": 0.3464341461658478, |
| "learning_rate": 8.539437235876908e-05, |
| "loss": 0.9039, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9202453987730062, |
| "grad_norm": 0.35458436608314514, |
| "learning_rate": 8.1285403783028e-05, |
| "loss": 0.888, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.0224948875255624, |
| "grad_norm": 0.3531360626220703, |
| "learning_rate": 7.679507035376672e-05, |
| "loss": 0.8834, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1247443762781186, |
| "grad_norm": 0.38273200392723083, |
| "learning_rate": 7.197810863000116e-05, |
| "loss": 0.8308, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2269938650306749, |
| "grad_norm": 0.3767881691455841, |
| "learning_rate": 6.689323672561398e-05, |
| "loss": 0.8212, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.329243353783231, |
| "grad_norm": 0.4009556472301483, |
| "learning_rate": 6.160243854346398e-05, |
| "loss": 0.8223, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4314928425357873, |
| "grad_norm": 0.430261492729187, |
| "learning_rate": 5.617020819996831e-05, |
| "loss": 0.8233, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.5337423312883436, |
| "grad_norm": 0.39679399132728577, |
| "learning_rate": 5.0662763850519936e-05, |
| "loss": 0.8224, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.6359918200408998, |
| "grad_norm": 0.4061990976333618, |
| "learning_rate": 4.514724049910228e-05, |
| "loss": 0.8018, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.738241308793456, |
| "grad_norm": 0.4112933874130249, |
| "learning_rate": 3.969087163164348e-05, |
| "loss": 0.8233, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.8404907975460123, |
| "grad_norm": 0.41034209728240967, |
| "learning_rate": 3.436016964888865e-05, |
| "loss": 0.8079, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.9427402862985685, |
| "grad_norm": 0.4243488907814026, |
| "learning_rate": 2.922011508920362e-05, |
| "loss": 0.8134, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.044989775051125, |
| "grad_norm": 0.432036817073822, |
| "learning_rate": 2.433336452457431e-05, |
| "loss": 0.763, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.044989775051125, |
| "eval_loss": 0.8031564950942993, |
| "eval_runtime": 238.4795, |
| "eval_samples_per_second": 0.923, |
| "eval_steps_per_second": 0.923, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.147239263803681, |
| "grad_norm": 0.422557532787323, |
| "learning_rate": 1.975948678544301e-05, |
| "loss": 0.7697, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.2494887525562373, |
| "grad_norm": 0.44817760586738586, |
| "learning_rate": 1.5554236824697687e-05, |
| "loss": 0.7843, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.3517382413087935, |
| "grad_norm": 0.4573202133178711, |
| "learning_rate": 1.176887607231434e-05, |
| "loss": 0.7643, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.4539877300613497, |
| "grad_norm": 0.4587993919849396, |
| "learning_rate": 8.449547565437887e-06, |
| "loss": 0.7501, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.556237218813906, |
| "grad_norm": 0.4792107045650482, |
| "learning_rate": 5.6367134709813644e-06, |
| "loss": 0.763, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.658486707566462, |
| "grad_norm": 0.46617749333381653, |
| "learning_rate": 3.364661857267265e-06, |
| "loss": 0.7504, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.7607361963190185, |
| "grad_norm": 0.45423439145088196, |
| "learning_rate": 1.6610887270981423e-06, |
| "loss": 0.7539, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.8629856850715747, |
| "grad_norm": 0.4944806694984436, |
| "learning_rate": 5.467604072171062e-07, |
| "loss": 0.7662, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.965235173824131, |
| "grad_norm": 0.45094653964042664, |
| "learning_rate": 3.526040958494181e-08, |
| "loss": 0.7672, |
| "step": 1450 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1467, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.441743417518326e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|