| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9986738753442823, |
| "eval_steps": 200, |
| "global_step": 445, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02244210955829848, |
| "grad_norm": 19.15482521057129, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 3.8476, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04488421911659696, |
| "grad_norm": 5.446422576904297, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 3.2218, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06732632867489544, |
| "grad_norm": 1.8523049354553223, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 2.9756, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08976843823319391, |
| "grad_norm": 2.139192581176758, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 2.88, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11221054779149241, |
| "grad_norm": 1.853474497795105, |
| "learning_rate": 9.996145181203616e-06, |
| "loss": 2.8198, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13465265734979087, |
| "grad_norm": 1.501637578010559, |
| "learning_rate": 9.965342284774633e-06, |
| "loss": 2.8122, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15709476690808935, |
| "grad_norm": 2.0072269439697266, |
| "learning_rate": 9.903926402016153e-06, |
| "loss": 2.7806, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.17953687646638783, |
| "grad_norm": 1.7332258224487305, |
| "learning_rate": 9.812276182268236e-06, |
| "loss": 2.7422, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2019789860246863, |
| "grad_norm": 1.3256088495254517, |
| "learning_rate": 9.690956679612422e-06, |
| "loss": 2.736, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.22442109558298481, |
| "grad_norm": 1.6238477230072021, |
| "learning_rate": 9.540715869125407e-06, |
| "loss": 2.7361, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2468632051412833, |
| "grad_norm": 1.326378583908081, |
| "learning_rate": 9.362480035363987e-06, |
| "loss": 2.7135, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.26930531469958174, |
| "grad_norm": 1.3376497030258179, |
| "learning_rate": 9.157348061512728e-06, |
| "loss": 2.7064, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.29174742425788025, |
| "grad_norm": 1.2815560102462769, |
| "learning_rate": 8.926584654403725e-06, |
| "loss": 2.7018, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3141895338161787, |
| "grad_norm": 1.5868873596191406, |
| "learning_rate": 8.671612547178428e-06, |
| "loss": 2.6961, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3366316433744772, |
| "grad_norm": 1.366570234298706, |
| "learning_rate": 8.39400372766471e-06, |
| "loss": 2.6968, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.35907375293277566, |
| "grad_norm": 1.6603009700775146, |
| "learning_rate": 8.095469746549172e-06, |
| "loss": 2.6879, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.38151586249107416, |
| "grad_norm": 1.4688373804092407, |
| "learning_rate": 7.777851165098012e-06, |
| "loss": 2.6686, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4039579720493726, |
| "grad_norm": 1.2386434078216553, |
| "learning_rate": 7.443106207484776e-06, |
| "loss": 2.6497, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4264000816076711, |
| "grad_norm": 1.3002716302871704, |
| "learning_rate": 7.093298687687141e-06, |
| "loss": 2.6413, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.44884219116596963, |
| "grad_norm": 1.2603603601455688, |
| "learning_rate": 6.730585285387465e-06, |
| "loss": 2.6472, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.44884219116596963, |
| "eval_loss": 2.642993450164795, |
| "eval_runtime": 1294.6794, |
| "eval_samples_per_second": 148.064, |
| "eval_steps_per_second": 0.842, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4712843007242681, |
| "grad_norm": 1.2680917978286743, |
| "learning_rate": 6.3572022493253715e-06, |
| "loss": 2.6369, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4937264102825666, |
| "grad_norm": 1.3160443305969238, |
| "learning_rate": 5.975451610080643e-06, |
| "loss": 2.63, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5161685198408651, |
| "grad_norm": 1.2467771768569946, |
| "learning_rate": 5.587686987289189e-06, |
| "loss": 2.6209, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5386106293991635, |
| "grad_norm": 1.208018183708191, |
| "learning_rate": 5.1962990787953436e-06, |
| "loss": 2.6318, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.561052738957462, |
| "grad_norm": 1.2416397333145142, |
| "learning_rate": 4.803700921204659e-06, |
| "loss": 2.621, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5834948485157605, |
| "grad_norm": 1.1826361417770386, |
| "learning_rate": 4.4123130127108125e-06, |
| "loss": 2.6161, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.605936958074059, |
| "grad_norm": 1.2550407648086548, |
| "learning_rate": 4.02454838991936e-06, |
| "loss": 2.6164, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6283790676323574, |
| "grad_norm": 1.2681384086608887, |
| "learning_rate": 3.6427977506746293e-06, |
| "loss": 2.6091, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6508211771906559, |
| "grad_norm": 1.2637056112289429, |
| "learning_rate": 3.269414714612534e-06, |
| "loss": 2.5967, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6732632867489544, |
| "grad_norm": 1.211774468421936, |
| "learning_rate": 2.906701312312861e-06, |
| "loss": 2.6031, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6957053963072529, |
| "grad_norm": 1.1411036252975464, |
| "learning_rate": 2.5568937925152272e-06, |
| "loss": 2.6014, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7181475058655513, |
| "grad_norm": 1.1422080993652344, |
| "learning_rate": 2.2221488349019903e-06, |
| "loss": 2.5978, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7405896154238498, |
| "grad_norm": 1.172059416770935, |
| "learning_rate": 1.9045302534508298e-06, |
| "loss": 2.5911, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7630317249821483, |
| "grad_norm": 1.1655080318450928, |
| "learning_rate": 1.6059962723352912e-06, |
| "loss": 2.5913, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7854738345404468, |
| "grad_norm": 1.1286932229995728, |
| "learning_rate": 1.3283874528215735e-06, |
| "loss": 2.5819, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8079159440987452, |
| "grad_norm": 1.1322216987609863, |
| "learning_rate": 1.0734153455962765e-06, |
| "loss": 2.5833, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8303580536570437, |
| "grad_norm": 1.1392606496810913, |
| "learning_rate": 8.426519384872733e-07, |
| "loss": 2.5851, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8528001632153422, |
| "grad_norm": 1.1811796426773071, |
| "learning_rate": 6.375199646360142e-07, |
| "loss": 2.5853, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8752422727736408, |
| "grad_norm": 1.1267277002334595, |
| "learning_rate": 4.5928413087459325e-07, |
| "loss": 2.5832, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8976843823319393, |
| "grad_norm": 1.116821527481079, |
| "learning_rate": 3.0904332038757977e-07, |
| "loss": 2.5779, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8976843823319393, |
| "eval_loss": 2.582942485809326, |
| "eval_runtime": 1274.0632, |
| "eval_samples_per_second": 150.46, |
| "eval_steps_per_second": 0.856, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9201264918902377, |
| "grad_norm": 1.1507278680801392, |
| "learning_rate": 1.8772381773176417e-07, |
| "loss": 2.5833, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9425686014485362, |
| "grad_norm": 1.0935174226760864, |
| "learning_rate": 9.607359798384785e-08, |
| "loss": 2.597, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9650107110068347, |
| "grad_norm": 1.1115341186523438, |
| "learning_rate": 3.465771522536854e-08, |
| "loss": 2.5725, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9874528205651332, |
| "grad_norm": 1.131402611732483, |
| "learning_rate": 3.854818796385495e-09, |
| "loss": 2.575, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9986738753442823, |
| "step": 445, |
| "total_flos": 7.860958022007259e+18, |
| "train_loss": 2.698132219207421, |
| "train_runtime": 39941.2631, |
| "train_samples_per_second": 43.195, |
| "train_steps_per_second": 0.011 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 445, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.860958022007259e+18, |
| "train_batch_size": 22, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|