| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.5159332321699543, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.030349013657056147, |
| "grad_norm": 0.49088624119758606, |
| "learning_rate": 1.8e-05, |
| "loss": 2.5243, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06069802731411229, |
| "grad_norm": 0.6470591425895691, |
| "learning_rate": 3.8e-05, |
| "loss": 2.389, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09104704097116843, |
| "grad_norm": 0.5612089037895203, |
| "learning_rate": 5.8e-05, |
| "loss": 2.164, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12139605462822459, |
| "grad_norm": 1.2414666414260864, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 1.8563, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.15174506828528073, |
| "grad_norm": 0.7067261934280396, |
| "learning_rate": 9.8e-05, |
| "loss": 1.3996, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18209408194233687, |
| "grad_norm": 0.6404679417610168, |
| "learning_rate": 0.000118, |
| "loss": 1.0518, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.212443095599393, |
| "grad_norm": 0.7477788329124451, |
| "learning_rate": 0.000138, |
| "loss": 0.8398, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.24279210925644917, |
| "grad_norm": 0.7206103205680847, |
| "learning_rate": 0.00015800000000000002, |
| "loss": 0.6644, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2731411229135053, |
| "grad_norm": 0.6917621493339539, |
| "learning_rate": 0.00017800000000000002, |
| "loss": 0.5447, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.30349013657056145, |
| "grad_norm": 0.7294086813926697, |
| "learning_rate": 0.00019800000000000002, |
| "loss": 0.4877, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3338391502276176, |
| "grad_norm": 0.7436180114746094, |
| "learning_rate": 0.00019797752808988766, |
| "loss": 0.4499, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.36418816388467373, |
| "grad_norm": 0.7850086688995361, |
| "learning_rate": 0.0001957303370786517, |
| "loss": 0.4262, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3945371775417299, |
| "grad_norm": 0.69960618019104, |
| "learning_rate": 0.00019348314606741572, |
| "loss": 0.4107, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.424886191198786, |
| "grad_norm": 0.5339717864990234, |
| "learning_rate": 0.0001912359550561798, |
| "loss": 0.4023, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4552352048558422, |
| "grad_norm": 0.7217109203338623, |
| "learning_rate": 0.00018898876404494384, |
| "loss": 0.3713, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.48558421851289835, |
| "grad_norm": 0.5403777360916138, |
| "learning_rate": 0.00018674157303370787, |
| "loss": 0.3451, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5159332321699545, |
| "grad_norm": 0.7086395621299744, |
| "learning_rate": 0.00018449438202247192, |
| "loss": 0.3433, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5462822458270106, |
| "grad_norm": 0.5336319804191589, |
| "learning_rate": 0.00018224719101123598, |
| "loss": 0.3467, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5766312594840668, |
| "grad_norm": 0.5295460224151611, |
| "learning_rate": 0.00018, |
| "loss": 0.3625, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6069802731411229, |
| "grad_norm": 0.6301100254058838, |
| "learning_rate": 0.00017775280898876404, |
| "loss": 0.342, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.637329286798179, |
| "grad_norm": 0.5921112298965454, |
| "learning_rate": 0.0001755056179775281, |
| "loss": 0.3418, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6676783004552352, |
| "grad_norm": 0.7228449583053589, |
| "learning_rate": 0.00017325842696629216, |
| "loss": 0.3276, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6980273141122914, |
| "grad_norm": 0.6273078918457031, |
| "learning_rate": 0.00017101123595505619, |
| "loss": 0.3226, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7283763277693475, |
| "grad_norm": 0.5710306167602539, |
| "learning_rate": 0.00016876404494382024, |
| "loss": 0.3238, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7587253414264037, |
| "grad_norm": 0.43547865748405457, |
| "learning_rate": 0.00016651685393258427, |
| "loss": 0.3264, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7890743550834598, |
| "grad_norm": 0.4803106188774109, |
| "learning_rate": 0.00016426966292134833, |
| "loss": 0.3125, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8194233687405159, |
| "grad_norm": 0.45547714829444885, |
| "learning_rate": 0.00016202247191011236, |
| "loss": 0.3015, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.849772382397572, |
| "grad_norm": 0.4463222622871399, |
| "learning_rate": 0.00015977528089887642, |
| "loss": 0.3006, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8801213960546282, |
| "grad_norm": 0.6434076428413391, |
| "learning_rate": 0.00015752808988764045, |
| "loss": 0.3159, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9104704097116844, |
| "grad_norm": 0.5581756830215454, |
| "learning_rate": 0.0001552808988764045, |
| "loss": 0.3042, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9408194233687405, |
| "grad_norm": 0.5555682182312012, |
| "learning_rate": 0.00015303370786516856, |
| "loss": 0.3011, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9711684370257967, |
| "grad_norm": 0.47584882378578186, |
| "learning_rate": 0.0001507865168539326, |
| "loss": 0.2971, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9439553618431091, |
| "learning_rate": 0.00014853932584269662, |
| "loss": 0.3148, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0303490136570561, |
| "grad_norm": 0.41724225878715515, |
| "learning_rate": 0.00014629213483146068, |
| "loss": 0.2885, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0606980273141122, |
| "grad_norm": 0.6222860813140869, |
| "learning_rate": 0.00014404494382022474, |
| "loss": 0.2941, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0910470409711683, |
| "grad_norm": 0.43433964252471924, |
| "learning_rate": 0.00014179775280898877, |
| "loss": 0.2825, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.1213960546282247, |
| "grad_norm": 0.5504065752029419, |
| "learning_rate": 0.0001395505617977528, |
| "loss": 0.2917, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1517450682852808, |
| "grad_norm": 0.4902341663837433, |
| "learning_rate": 0.00013730337078651686, |
| "loss": 0.2803, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.182094081942337, |
| "grad_norm": 0.5374056100845337, |
| "learning_rate": 0.00013505617977528091, |
| "loss": 0.2799, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.212443095599393, |
| "grad_norm": 0.47176507115364075, |
| "learning_rate": 0.00013280898876404494, |
| "loss": 0.2783, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2427921092564491, |
| "grad_norm": 0.4779718816280365, |
| "learning_rate": 0.000130561797752809, |
| "loss": 0.2819, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.2731411229135052, |
| "grad_norm": 0.4197782278060913, |
| "learning_rate": 0.00012831460674157303, |
| "loss": 0.279, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.3034901365705616, |
| "grad_norm": 0.3616682291030884, |
| "learning_rate": 0.0001260674157303371, |
| "loss": 0.2732, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.3338391502276177, |
| "grad_norm": 0.4093301594257355, |
| "learning_rate": 0.00012382022471910112, |
| "loss": 0.2719, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.3641881638846738, |
| "grad_norm": 0.389291375875473, |
| "learning_rate": 0.00012157303370786516, |
| "loss": 0.2774, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.39453717754173, |
| "grad_norm": 0.41108396649360657, |
| "learning_rate": 0.0001193258426966292, |
| "loss": 0.274, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.424886191198786, |
| "grad_norm": 0.45677822828292847, |
| "learning_rate": 0.00011707865168539326, |
| "loss": 0.2701, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.4552352048558421, |
| "grad_norm": 0.45333346724510193, |
| "learning_rate": 0.00011483146067415731, |
| "loss": 0.27, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.4855842185128982, |
| "grad_norm": 0.4619373679161072, |
| "learning_rate": 0.00011258426966292135, |
| "loss": 0.2619, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.5159332321699543, |
| "grad_norm": 0.36644256114959717, |
| "learning_rate": 0.00011033707865168538, |
| "loss": 0.2606, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.144895527714816e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|