| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 24591, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0609979260705136, |
| "grad_norm": 2.6735310554504395, |
| "learning_rate": 4.8985401163027127e-05, |
| "loss": 2.9975, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1219958521410272, |
| "grad_norm": 2.0044796466827393, |
| "learning_rate": 4.79687690618519e-05, |
| "loss": 2.801, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1829937782115408, |
| "grad_norm": 2.462843179702759, |
| "learning_rate": 4.6952136960676673e-05, |
| "loss": 2.6891, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2439917042820544, |
| "grad_norm": 2.019771099090576, |
| "learning_rate": 4.593550485950145e-05, |
| "loss": 2.6299, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.304989630352568, |
| "grad_norm": 2.4739561080932617, |
| "learning_rate": 4.491887275832622e-05, |
| "loss": 2.5757, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3659875564230816, |
| "grad_norm": 1.741564393043518, |
| "learning_rate": 4.3902240657150994e-05, |
| "loss": 2.5364, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4269854824935952, |
| "grad_norm": 2.228411912918091, |
| "learning_rate": 4.288560855597577e-05, |
| "loss": 2.4804, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.4879834085641088, |
| "grad_norm": 2.0240838527679443, |
| "learning_rate": 4.186897645480054e-05, |
| "loss": 2.453, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5489813346346224, |
| "grad_norm": 2.8336830139160156, |
| "learning_rate": 4.085234435362531e-05, |
| "loss": 2.4085, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.609979260705136, |
| "grad_norm": 1.7519323825836182, |
| "learning_rate": 3.983571225245009e-05, |
| "loss": 2.3795, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6709771867756497, |
| "grad_norm": 2.075009822845459, |
| "learning_rate": 3.8819080151274854e-05, |
| "loss": 2.3558, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.7319751128461632, |
| "grad_norm": 2.2394227981567383, |
| "learning_rate": 3.7802448050099634e-05, |
| "loss": 2.3254, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.7929730389166768, |
| "grad_norm": 2.2436089515686035, |
| "learning_rate": 3.678581594892441e-05, |
| "loss": 2.3182, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.8539709649871904, |
| "grad_norm": 1.8976473808288574, |
| "learning_rate": 3.5769183847749174e-05, |
| "loss": 2.2847, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.9149688910577041, |
| "grad_norm": 2.127561330795288, |
| "learning_rate": 3.4752551746573955e-05, |
| "loss": 2.2741, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.9759668171282176, |
| "grad_norm": 2.080770254135132, |
| "learning_rate": 3.373591964539872e-05, |
| "loss": 2.2525, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.0369647431987312, |
| "grad_norm": 2.3001296520233154, |
| "learning_rate": 3.2719287544223495e-05, |
| "loss": 2.1812, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.0979626692692448, |
| "grad_norm": 2.002516746520996, |
| "learning_rate": 3.1702655443048275e-05, |
| "loss": 2.1904, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.1589605953397584, |
| "grad_norm": 1.7760947942733765, |
| "learning_rate": 3.068602334187304e-05, |
| "loss": 2.1648, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.219958521410272, |
| "grad_norm": 2.029125452041626, |
| "learning_rate": 2.966939124069782e-05, |
| "loss": 2.1476, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.2809564474807855, |
| "grad_norm": 2.6139211654663086, |
| "learning_rate": 2.865275913952259e-05, |
| "loss": 2.15, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.3419543735512993, |
| "grad_norm": 1.8446179628372192, |
| "learning_rate": 2.7636127038347365e-05, |
| "loss": 2.1283, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.402952299621813, |
| "grad_norm": 1.97454035282135, |
| "learning_rate": 2.6619494937172135e-05, |
| "loss": 2.1233, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.4639502256923265, |
| "grad_norm": 2.268068313598633, |
| "learning_rate": 2.560286283599691e-05, |
| "loss": 2.1084, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.52494815176284, |
| "grad_norm": 1.7861677408218384, |
| "learning_rate": 2.4586230734821682e-05, |
| "loss": 2.0869, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.5859460778333536, |
| "grad_norm": 2.0749287605285645, |
| "learning_rate": 2.3569598633646456e-05, |
| "loss": 2.0835, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.6469440039038674, |
| "grad_norm": 2.6761562824249268, |
| "learning_rate": 2.2552966532471232e-05, |
| "loss": 2.1048, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.707941929974381, |
| "grad_norm": 2.0634262561798096, |
| "learning_rate": 2.1536334431296002e-05, |
| "loss": 2.0877, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.7689398560448946, |
| "grad_norm": 2.3483529090881348, |
| "learning_rate": 2.0519702330120776e-05, |
| "loss": 2.0864, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.8299377821154081, |
| "grad_norm": 1.6350581645965576, |
| "learning_rate": 1.950307022894555e-05, |
| "loss": 2.0859, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.8909357081859217, |
| "grad_norm": 2.286836862564087, |
| "learning_rate": 1.8486438127770323e-05, |
| "loss": 2.063, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.9519336342564353, |
| "grad_norm": 4.733737468719482, |
| "learning_rate": 1.7469806026595096e-05, |
| "loss": 2.0553, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.012931560326949, |
| "grad_norm": 2.2189626693725586, |
| "learning_rate": 1.645317392541987e-05, |
| "loss": 2.063, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.0739294863974624, |
| "grad_norm": 2.3642847537994385, |
| "learning_rate": 1.5436541824244643e-05, |
| "loss": 2.0198, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.134927412467976, |
| "grad_norm": 2.2193996906280518, |
| "learning_rate": 1.4419909723069416e-05, |
| "loss": 2.0039, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.1959253385384896, |
| "grad_norm": 2.039994239807129, |
| "learning_rate": 1.340327762189419e-05, |
| "loss": 2.0012, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.256923264609003, |
| "grad_norm": 3.105970859527588, |
| "learning_rate": 1.2386645520718963e-05, |
| "loss": 2.0174, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.3179211906795167, |
| "grad_norm": 2.1234045028686523, |
| "learning_rate": 1.1370013419543737e-05, |
| "loss": 2.0009, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.3789191167500303, |
| "grad_norm": 1.8237278461456299, |
| "learning_rate": 1.0353381318368509e-05, |
| "loss": 2.0125, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.439917042820544, |
| "grad_norm": 2.1688883304595947, |
| "learning_rate": 9.336749217193284e-06, |
| "loss": 2.0001, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.500914968891058, |
| "grad_norm": 2.9593875408172607, |
| "learning_rate": 8.320117116018055e-06, |
| "loss": 1.9857, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.561912894961571, |
| "grad_norm": 1.8724238872528076, |
| "learning_rate": 7.303485014842829e-06, |
| "loss": 1.9884, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.622910821032085, |
| "grad_norm": 1.7542228698730469, |
| "learning_rate": 6.286852913667603e-06, |
| "loss": 1.9813, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.6839087471025986, |
| "grad_norm": 2.1812169551849365, |
| "learning_rate": 5.270220812492376e-06, |
| "loss": 1.9735, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.744906673173112, |
| "grad_norm": 1.6151964664459229, |
| "learning_rate": 4.253588711317149e-06, |
| "loss": 1.9882, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.805904599243626, |
| "grad_norm": 2.106318950653076, |
| "learning_rate": 3.2369566101419217e-06, |
| "loss": 1.9771, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.8669025253141394, |
| "grad_norm": 1.8604987859725952, |
| "learning_rate": 2.220324508966695e-06, |
| "loss": 1.9741, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.927900451384653, |
| "grad_norm": 2.1725010871887207, |
| "learning_rate": 1.2036924077914685e-06, |
| "loss": 1.9899, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.9888983774551665, |
| "grad_norm": 2.769097089767456, |
| "learning_rate": 1.8706030661624173e-07, |
| "loss": 1.9704, |
| "step": 24500 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 24591, |
| "total_flos": 5.415436121066701e+16, |
| "train_loss": 2.194293288467889, |
| "train_runtime": 7212.7178, |
| "train_samples_per_second": 27.274, |
| "train_steps_per_second": 3.409 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 24591, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.415436121066701e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|