| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "grad_norm": 0.4471716284751892, |
| "learning_rate": 1.8e-05, |
| "loss": 1.287, |
| "step": 10 |
| }, |
| { |
| "grad_norm": 0.20870500802993774, |
| "learning_rate": 3.8e-05, |
| "loss": 1.202, |
| "step": 20 |
| }, |
| { |
| "grad_norm": 0.15201318264007568, |
| "learning_rate": 5.8e-05, |
| "loss": 1.2002, |
| "step": 30 |
| }, |
| { |
| "grad_norm": 0.16838377714157104, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 1.1862, |
| "step": 40 |
| }, |
| { |
| "grad_norm": 0.8558338284492493, |
| "learning_rate": 9.8e-05, |
| "loss": 1.1176, |
| "step": 50 |
| }, |
| { |
| "grad_norm": 0.3106129765510559, |
| "learning_rate": 9.997785653888835e-05, |
| "loss": 1.0896, |
| "step": 60 |
| }, |
| { |
| "grad_norm": 0.18132999539375305, |
| "learning_rate": 9.990133642141359e-05, |
| "loss": 1.0682, |
| "step": 70 |
| }, |
| { |
| "grad_norm": 0.20137448608875275, |
| "learning_rate": 9.977024992520602e-05, |
| "loss": 1.0698, |
| "step": 80 |
| }, |
| { |
| "grad_norm": 0.4021719992160797, |
| "learning_rate": 9.95847403914247e-05, |
| "loss": 1.0598, |
| "step": 90 |
| }, |
| { |
| "grad_norm": 0.25229543447494507, |
| "learning_rate": 9.934501067202117e-05, |
| "loss": 1.0643, |
| "step": 100 |
| }, |
| { |
| "grad_norm": 0.41172972321510315, |
| "learning_rate": 9.905132290792394e-05, |
| "loss": 1.0437, |
| "step": 110 |
| }, |
| { |
| "grad_norm": 0.3424518406391144, |
| "learning_rate": 9.870399824239117e-05, |
| "loss": 1.0168, |
| "step": 120 |
| }, |
| { |
| "grad_norm": 0.5512681007385254, |
| "learning_rate": 9.830341646984521e-05, |
| "loss": 0.9655, |
| "step": 130 |
| }, |
| { |
| "grad_norm": 0.6318266987800598, |
| "learning_rate": 9.785001562057309e-05, |
| "loss": 0.9044, |
| "step": 140 |
| }, |
| { |
| "grad_norm": 0.68224036693573, |
| "learning_rate": 9.734429148174675e-05, |
| "loss": 0.825, |
| "step": 150 |
| }, |
| { |
| "grad_norm": 0.7105351090431213, |
| "learning_rate": 9.6786797055287e-05, |
| "loss": 0.7075, |
| "step": 160 |
| }, |
| { |
| "grad_norm": 0.9974402189254761, |
| "learning_rate": 9.617814195316411e-05, |
| "loss": 0.6054, |
| "step": 170 |
| }, |
| { |
| "grad_norm": 0.7529901266098022, |
| "learning_rate": 9.551899173079607e-05, |
| "loss": 0.5167, |
| "step": 180 |
| }, |
| { |
| "grad_norm": 0.7296067476272583, |
| "learning_rate": 9.481006715927351e-05, |
| "loss": 0.4093, |
| "step": 190 |
| }, |
| { |
| "grad_norm": 0.8161115050315857, |
| "learning_rate": 9.405214343720707e-05, |
| "loss": 0.3432, |
| "step": 200 |
| }, |
| { |
| "grad_norm": 0.8231457471847534, |
| "learning_rate": 9.32460493430591e-05, |
| "loss": 0.3133, |
| "step": 210 |
| }, |
| { |
| "grad_norm": 0.6054251790046692, |
| "learning_rate": 9.239266632888659e-05, |
| "loss": 0.269, |
| "step": 220 |
| }, |
| { |
| "grad_norm": 0.8241866230964661, |
| "learning_rate": 9.14929275564863e-05, |
| "loss": 0.2279, |
| "step": 230 |
| }, |
| { |
| "grad_norm": 0.6421325206756592, |
| "learning_rate": 9.0547816876996e-05, |
| "loss": 0.2042, |
| "step": 240 |
| }, |
| { |
| "grad_norm": 0.7083246111869812, |
| "learning_rate": 8.955836775506776e-05, |
| "loss": 0.2063, |
| "step": 250 |
| }, |
| { |
| "grad_norm": 0.6825037002563477, |
| "learning_rate": 8.852566213878947e-05, |
| "loss": 0.1857, |
| "step": 260 |
| }, |
| { |
| "grad_norm": 0.9095971584320068, |
| "learning_rate": 8.745082927659047e-05, |
| "loss": 0.1583, |
| "step": 270 |
| }, |
| { |
| "grad_norm": 0.7291728258132935, |
| "learning_rate": 8.633504448242505e-05, |
| "loss": 0.168, |
| "step": 280 |
| }, |
| { |
| "grad_norm": 0.5694337487220764, |
| "learning_rate": 8.517952785058385e-05, |
| "loss": 0.1521, |
| "step": 290 |
| }, |
| { |
| "grad_norm": 0.6113538146018982, |
| "learning_rate": 8.398554292153866e-05, |
| "loss": 0.1446, |
| "step": 300 |
| }, |
| { |
| "grad_norm": 0.7520995140075684, |
| "learning_rate": 8.275439530027948e-05, |
| "loss": 0.1436, |
| "step": 310 |
| }, |
| { |
| "grad_norm": 0.5477866530418396, |
| "learning_rate": 8.148743122865463e-05, |
| "loss": 0.1256, |
| "step": 320 |
| }, |
| { |
| "grad_norm": 0.7027655839920044, |
| "learning_rate": 8.018603611327504e-05, |
| "loss": 0.1078, |
| "step": 330 |
| }, |
| { |
| "grad_norm": 0.6788212060928345, |
| "learning_rate": 7.88516330105925e-05, |
| "loss": 0.0999, |
| "step": 340 |
| }, |
| { |
| "grad_norm": 0.8349484801292419, |
| "learning_rate": 7.748568107080832e-05, |
| "loss": 0.0976, |
| "step": 350 |
| }, |
| { |
| "grad_norm": 0.6667965650558472, |
| "learning_rate": 7.608967394231387e-05, |
| "loss": 0.0889, |
| "step": 360 |
| }, |
| { |
| "grad_norm": 0.5367528200149536, |
| "learning_rate": 7.466513813840825e-05, |
| "loss": 0.0831, |
| "step": 370 |
| }, |
| { |
| "grad_norm": 0.6868879199028015, |
| "learning_rate": 7.32136313680782e-05, |
| "loss": 0.0782, |
| "step": 380 |
| }, |
| { |
| "grad_norm": 0.5659252405166626, |
| "learning_rate": 7.173674083266624e-05, |
| "loss": 0.0844, |
| "step": 390 |
| }, |
| { |
| "grad_norm": 0.5637879371643066, |
| "learning_rate": 7.023608149028937e-05, |
| "loss": 0.0857, |
| "step": 400 |
| }, |
| { |
| "grad_norm": 0.5377715826034546, |
| "learning_rate": 6.871329428990602e-05, |
| "loss": 0.0887, |
| "step": 410 |
| }, |
| { |
| "grad_norm": 0.6541546583175659, |
| "learning_rate": 6.71700443769625e-05, |
| "loss": 0.0708, |
| "step": 420 |
| }, |
| { |
| "grad_norm": 0.6478805541992188, |
| "learning_rate": 6.56080192725808e-05, |
| "loss": 0.0722, |
| "step": 430 |
| }, |
| { |
| "grad_norm": 0.5954216122627258, |
| "learning_rate": 6.402892702827916e-05, |
| "loss": 0.0765, |
| "step": 440 |
| }, |
| { |
| "grad_norm": 0.533097505569458, |
| "learning_rate": 6.243449435824276e-05, |
| "loss": 0.0793, |
| "step": 450 |
| }, |
| { |
| "grad_norm": 0.5573644042015076, |
| "learning_rate": 6.0826464751186994e-05, |
| "loss": 0.0747, |
| "step": 460 |
| }, |
| { |
| "grad_norm": 0.5666183829307556, |
| "learning_rate": 5.9206596563878357e-05, |
| "loss": 0.0804, |
| "step": 470 |
| }, |
| { |
| "grad_norm": 0.6182389259338379, |
| "learning_rate": 5.757666109839702e-05, |
| "loss": 0.0675, |
| "step": 480 |
| }, |
| { |
| "grad_norm": 0.41103699803352356, |
| "learning_rate": 5.5938440665244006e-05, |
| "loss": 0.0638, |
| "step": 490 |
| }, |
| { |
| "grad_norm": 0.5240882635116577, |
| "learning_rate": 5.4293726634410855e-05, |
| "loss": 0.064, |
| "step": 500 |
| }, |
| { |
| "grad_norm": 0.5724607110023499, |
| "learning_rate": 5.264431747654284e-05, |
| "loss": 0.0615, |
| "step": 510 |
| }, |
| { |
| "grad_norm": 0.4552757441997528, |
| "learning_rate": 5.0992016796337686e-05, |
| "loss": 0.0627, |
| "step": 520 |
| }, |
| { |
| "grad_norm": 0.5225529670715332, |
| "learning_rate": 4.93386313603304e-05, |
| "loss": 0.0608, |
| "step": 530 |
| }, |
| { |
| "grad_norm": 0.491335391998291, |
| "learning_rate": 4.7685969121220456e-05, |
| "loss": 0.0693, |
| "step": 540 |
| }, |
| { |
| "grad_norm": 0.48702317476272583, |
| "learning_rate": 4.60358372409022e-05, |
| "loss": 0.0595, |
| "step": 550 |
| }, |
| { |
| "grad_norm": 0.5675700902938843, |
| "learning_rate": 4.439004011435979e-05, |
| "loss": 0.0651, |
| "step": 560 |
| }, |
| { |
| "grad_norm": 0.5530186295509338, |
| "learning_rate": 4.275037739658771e-05, |
| "loss": 0.0701, |
| "step": 570 |
| }, |
| { |
| "grad_norm": 0.6130643486976624, |
| "learning_rate": 4.111864203469457e-05, |
| "loss": 0.0589, |
| "step": 580 |
| }, |
| { |
| "grad_norm": 0.523269534111023, |
| "learning_rate": 3.949661830734172e-05, |
| "loss": 0.057, |
| "step": 590 |
| }, |
| { |
| "grad_norm": 0.5698956847190857, |
| "learning_rate": 3.788607987366069e-05, |
| "loss": 0.0677, |
| "step": 600 |
| }, |
| { |
| "grad_norm": 0.4857608675956726, |
| "learning_rate": 3.628878783378302e-05, |
| "loss": 0.0522, |
| "step": 610 |
| }, |
| { |
| "grad_norm": 0.5062139630317688, |
| "learning_rate": 3.470648880310313e-05, |
| "loss": 0.0565, |
| "step": 620 |
| }, |
| { |
| "grad_norm": 0.5451298356056213, |
| "learning_rate": 3.3140913002379995e-05, |
| "loss": 0.0526, |
| "step": 630 |
| }, |
| { |
| "grad_norm": 0.47404196858406067, |
| "learning_rate": 3.1593772365766105e-05, |
| "loss": 0.0498, |
| "step": 640 |
| }, |
| { |
| "grad_norm": 0.4543800354003906, |
| "learning_rate": 3.006675866883275e-05, |
| "loss": 0.0501, |
| "step": 650 |
| }, |
| { |
| "grad_norm": 0.4308624863624573, |
| "learning_rate": 2.8561541678638142e-05, |
| "loss": 0.054, |
| "step": 660 |
| }, |
| { |
| "grad_norm": 0.4590848386287689, |
| "learning_rate": 2.707976732786166e-05, |
| "loss": 0.0527, |
| "step": 670 |
| }, |
| { |
| "grad_norm": 0.46471887826919556, |
| "learning_rate": 2.562305591500069e-05, |
| "loss": 0.0515, |
| "step": 680 |
| }, |
| { |
| "grad_norm": 0.5157451033592224, |
| "learning_rate": 2.419300033259798e-05, |
| "loss": 0.0496, |
| "step": 690 |
| }, |
| { |
| "grad_norm": 0.3607851564884186, |
| "learning_rate": 2.279116432543705e-05, |
| "loss": 0.0495, |
| "step": 700 |
| }, |
| { |
| "grad_norm": 0.45652496814727783, |
| "learning_rate": 2.1419080780610123e-05, |
| "loss": 0.0623, |
| "step": 710 |
| }, |
| { |
| "grad_norm": 0.42067599296569824, |
| "learning_rate": 2.0078250051328784e-05, |
| "loss": 0.0515, |
| "step": 720 |
| }, |
| { |
| "grad_norm": 0.447702556848526, |
| "learning_rate": 1.877013831630961e-05, |
| "loss": 0.0409, |
| "step": 730 |
| }, |
| { |
| "grad_norm": 0.46375778317451477, |
| "learning_rate": 1.749617597652934e-05, |
| "loss": 0.0568, |
| "step": 740 |
| }, |
| { |
| "grad_norm": 0.40698182582855225, |
| "learning_rate": 1.62577560911024e-05, |
| "loss": 0.0549, |
| "step": 750 |
| }, |
| { |
| "grad_norm": 0.36565569043159485, |
| "learning_rate": 1.5056232853991209e-05, |
| "loss": 0.0619, |
| "step": 760 |
| }, |
| { |
| "grad_norm": 0.3338938355445862, |
| "learning_rate": 1.389292011321498e-05, |
| "loss": 0.0471, |
| "step": 770 |
| }, |
| { |
| "grad_norm": 0.391041100025177, |
| "learning_rate": 1.2769089934176126e-05, |
| "loss": 0.0513, |
| "step": 780 |
| }, |
| { |
| "grad_norm": 0.2763245701789856, |
| "learning_rate": 1.1685971208675539e-05, |
| "loss": 0.0551, |
| "step": 790 |
| }, |
| { |
| "grad_norm": 0.30885049700737, |
| "learning_rate": 1.0644748311137376e-05, |
| "loss": 0.0513, |
| "step": 800 |
| }, |
| { |
| "grad_norm": 0.28447505831718445, |
| "learning_rate": 9.646559803512994e-06, |
| "loss": 0.0481, |
| "step": 810 |
| }, |
| { |
| "grad_norm": 0.34004244208335876, |
| "learning_rate": 8.692497190280224e-06, |
| "loss": 0.0498, |
| "step": 820 |
| }, |
| { |
| "grad_norm": 0.27130886912345886, |
| "learning_rate": 7.783603724899257e-06, |
| "loss": 0.0462, |
| "step": 830 |
| }, |
| { |
| "grad_norm": 0.2683325707912445, |
| "learning_rate": 6.92087326903022e-06, |
| "loss": 0.046, |
| "step": 840 |
| }, |
| { |
| "grad_norm": 0.3312835991382599, |
| "learning_rate": 6.1052492057601275e-06, |
| "loss": 0.0483, |
| "step": 850 |
| }, |
| { |
| "grad_norm": 0.35007426142692566, |
| "learning_rate": 5.337623408027293e-06, |
| "loss": 0.056, |
| "step": 860 |
| }, |
| { |
| "grad_norm": 0.2915596067905426, |
| "learning_rate": 4.618835263371396e-06, |
| "loss": 0.0485, |
| "step": 870 |
| }, |
| { |
| "grad_norm": 0.3173588812351227, |
| "learning_rate": 3.949670756075447e-06, |
| "loss": 0.0473, |
| "step": 880 |
| }, |
| { |
| "grad_norm": 0.3005145788192749, |
| "learning_rate": 3.3308616077036115e-06, |
| "loss": 0.0479, |
| "step": 890 |
| }, |
| { |
| "grad_norm": 0.28296926617622375, |
| "learning_rate": 2.7630844769743757e-06, |
| "loss": 0.0422, |
| "step": 900 |
| }, |
| { |
| "grad_norm": 0.2193201184272766, |
| "learning_rate": 2.2469602198441573e-06, |
| "loss": 0.0421, |
| "step": 910 |
| }, |
| { |
| "grad_norm": 0.1950123906135559, |
| "learning_rate": 1.7830532106104747e-06, |
| "loss": 0.0447, |
| "step": 920 |
| }, |
| { |
| "grad_norm": 0.32784703373908997, |
| "learning_rate": 1.3718707247769135e-06, |
| "loss": 0.0448, |
| "step": 930 |
| }, |
| { |
| "grad_norm": 0.20192043483257294, |
| "learning_rate": 1.0138623843548078e-06, |
| "loss": 0.0414, |
| "step": 940 |
| }, |
| { |
| "grad_norm": 0.23864571750164032, |
| "learning_rate": 7.094196662081831e-07, |
| "loss": 0.0476, |
| "step": 950 |
| }, |
| { |
| "grad_norm": 0.22843579947948456, |
| "learning_rate": 4.5887547397955864e-07, |
| "loss": 0.0449, |
| "step": 960 |
| }, |
| { |
| "grad_norm": 0.24962212145328522, |
| "learning_rate": 2.625037740646763e-07, |
| "loss": 0.0455, |
| "step": 970 |
| }, |
| { |
| "grad_norm": 0.316610187292099, |
| "learning_rate": 1.2051929603428825e-07, |
| "loss": 0.0467, |
| "step": 980 |
| }, |
| { |
| "grad_norm": 0.32058215141296387, |
| "learning_rate": 3.3077297830541584e-08, |
| "loss": 0.0506, |
| "step": 990 |
| }, |
| { |
| "grad_norm": 0.2989563047885895, |
| "learning_rate": 2.7339599464326627e-10, |
| "loss": 0.0532, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 48, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|