| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 240, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.041928721174004195, |
| "grad_norm": 5.873931884765625, |
| "learning_rate": 3.333333333333333e-07, |
| "loss": 1.862, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.08385744234800839, |
| "grad_norm": 6.9156107902526855, |
| "learning_rate": 7.5e-07, |
| "loss": 1.7854, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12578616352201258, |
| "grad_norm": 5.825167179107666, |
| "learning_rate": 1.1666666666666668e-06, |
| "loss": 1.5769, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.16771488469601678, |
| "grad_norm": 4.851530075073242, |
| "learning_rate": 1.5833333333333331e-06, |
| "loss": 1.6161, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.20964360587002095, |
| "grad_norm": 4.060953617095947, |
| "learning_rate": 2e-06, |
| "loss": 1.7598, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.25157232704402516, |
| "grad_norm": 2.5847902297973633, |
| "learning_rate": 1.997356916700572e-06, |
| "loss": 1.3625, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.29350104821802936, |
| "grad_norm": 3.8893041610717773, |
| "learning_rate": 1.9894416385809443e-06, |
| "loss": 1.4926, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.33542976939203356, |
| "grad_norm": 3.7155327796936035, |
| "learning_rate": 1.976296007119933e-06, |
| "loss": 1.5143, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.37735849056603776, |
| "grad_norm": 3.918890953063965, |
| "learning_rate": 1.9579895123154886e-06, |
| "loss": 1.5692, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.4192872117400419, |
| "grad_norm": 5.501850128173828, |
| "learning_rate": 1.9346189253489886e-06, |
| "loss": 1.9754, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4612159329140461, |
| "grad_norm": 2.250485897064209, |
| "learning_rate": 1.9063077870366499e-06, |
| "loss": 1.5317, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.5031446540880503, |
| "grad_norm": 4.778130531311035, |
| "learning_rate": 1.8732057547721957e-06, |
| "loss": 1.6225, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5450733752620545, |
| "grad_norm": 5.034369468688965, |
| "learning_rate": 1.8354878114129364e-06, |
| "loss": 1.9014, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5870020964360587, |
| "grad_norm": 3.4649276733398438, |
| "learning_rate": 1.7933533402912351e-06, |
| "loss": 1.5783, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6289308176100629, |
| "grad_norm": 4.354034900665283, |
| "learning_rate": 1.7470250712409959e-06, |
| "loss": 1.4057, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6708595387840671, |
| "grad_norm": 3.999058961868286, |
| "learning_rate": 1.6967479032106548e-06, |
| "loss": 1.5335, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7127882599580713, |
| "grad_norm": 3.4833269119262695, |
| "learning_rate": 1.6427876096865393e-06, |
| "loss": 1.1902, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 2.8304104804992676, |
| "learning_rate": 1.5854294337699405e-06, |
| "loss": 1.2191, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7966457023060797, |
| "grad_norm": 4.565997123718262, |
| "learning_rate": 1.52497658033456e-06, |
| "loss": 1.4723, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.8385744234800838, |
| "grad_norm": 3.578016996383667, |
| "learning_rate": 1.461748613235034e-06, |
| "loss": 1.0579, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8805031446540881, |
| "grad_norm": 4.267149925231934, |
| "learning_rate": 1.3960797660391568e-06, |
| "loss": 1.4263, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9224318658280922, |
| "grad_norm": 3.9493331909179688, |
| "learning_rate": 1.3283171752135611e-06, |
| "loss": 1.7828, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9643605870020965, |
| "grad_norm": 3.2271363735198975, |
| "learning_rate": 1.2588190451025207e-06, |
| "loss": 1.2667, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 12.765212059020996, |
| "learning_rate": 1.1879527544001117e-06, |
| "loss": 1.5189, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0419287211740043, |
| "grad_norm": 2.5988142490386963, |
| "learning_rate": 1.1160929141252301e-06, |
| "loss": 1.3693, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.0838574423480083, |
| "grad_norm": 3.980133533477783, |
| "learning_rate": 1.043619387365336e-06, |
| "loss": 1.4213, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.1257861635220126, |
| "grad_norm": 3.8095943927764893, |
| "learning_rate": 9.709152812568885e-07, |
| "loss": 1.2731, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.1677148846960168, |
| "grad_norm": 4.488629341125488, |
| "learning_rate": 8.983649218171981e-07, |
| "loss": 1.4819, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.209643605870021, |
| "grad_norm": 3.532264232635498, |
| "learning_rate": 8.263518223330696e-07, |
| "loss": 1.3564, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.251572327044025, |
| "grad_norm": 4.543543815612793, |
| "learning_rate": 7.552566560456761e-07, |
| "loss": 1.2954, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.2935010482180294, |
| "grad_norm": 3.2413134574890137, |
| "learning_rate": 6.854552438483865e-07, |
| "loss": 1.2842, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.3354297693920336, |
| "grad_norm": 3.2103683948516846, |
| "learning_rate": 6.173165676349102e-07, |
| "loss": 1.2546, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.3773584905660377, |
| "grad_norm": 2.752671957015991, |
| "learning_rate": 5.512008197995378e-07, |
| "loss": 1.2626, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.419287211740042, |
| "grad_norm": 3.4775447845458984, |
| "learning_rate": 4.874574992001348e-07, |
| "loss": 1.2031, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.4612159329140462, |
| "grad_norm": 4.845881938934326, |
| "learning_rate": 4.2642356364895417e-07, |
| "loss": 1.5397, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.5031446540880502, |
| "grad_norm": 3.8967208862304688, |
| "learning_rate": 3.684216486975026e-07, |
| "loss": 1.4725, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.5450733752620545, |
| "grad_norm": 3.4233365058898926, |
| "learning_rate": 3.137583621312665e-07, |
| "loss": 1.2935, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.5870020964360587, |
| "grad_norm": 3.8637824058532715, |
| "learning_rate": 2.62722663189876e-07, |
| "loss": 1.2836, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.6289308176100628, |
| "grad_norm": 4.068167209625244, |
| "learning_rate": 2.1558433508042427e-07, |
| "loss": 1.1832, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.6708595387840672, |
| "grad_norm": 3.147029399871826, |
| "learning_rate": 1.7259255885848944e-07, |
| "loss": 1.2623, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.7127882599580713, |
| "grad_norm": 2.986510992050171, |
| "learning_rate": 1.3397459621556128e-07, |
| "loss": 1.0893, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.7547169811320755, |
| "grad_norm": 3.7319693565368652, |
| "learning_rate": 9.993458813587884e-08, |
| "loss": 1.2304, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.7966457023060798, |
| "grad_norm": 4.113098621368408, |
| "learning_rate": 7.065247577317745e-08, |
| "loss": 1.1455, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.8385744234800838, |
| "grad_norm": 4.701733589172363, |
| "learning_rate": 4.6283049251773176e-08, |
| "loss": 1.1915, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.880503144654088, |
| "grad_norm": 3.9321887493133545, |
| "learning_rate": 2.6955129420176193e-08, |
| "loss": 1.2247, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.9224318658280923, |
| "grad_norm": 3.6867830753326416, |
| "learning_rate": 1.2770886882625952e-08, |
| "loss": 1.2779, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.9643605870020964, |
| "grad_norm": 2.951756000518799, |
| "learning_rate": 3.805301908254455e-09, |
| "loss": 1.2652, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 7.836752414703369, |
| "learning_rate": 1.0576807289253142e-10, |
| "loss": 1.4392, |
| "step": 240 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 240, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2077792876363776.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|