| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4681, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.021362956633198035, | |
| "grad_norm": 0.10347568243741989, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.4928, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04272591326639607, | |
| "grad_norm": 0.21450009942054749, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.482, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0640888698995941, | |
| "grad_norm": 0.33770546317100525, | |
| "learning_rate": 2e-05, | |
| "loss": 2.4373, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08545182653279214, | |
| "grad_norm": 0.3730420470237732, | |
| "learning_rate": 1.9974299762831266e-05, | |
| "loss": 2.3748, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10681478316599018, | |
| "grad_norm": 0.40363097190856934, | |
| "learning_rate": 1.9897331151763162e-05, | |
| "loss": 2.3395, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1281777397991882, | |
| "grad_norm": 0.4664216935634613, | |
| "learning_rate": 1.9769489789107492e-05, | |
| "loss": 2.3456, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.14954069643238624, | |
| "grad_norm": 0.4879497289657593, | |
| "learning_rate": 1.9591432785532302e-05, | |
| "loss": 2.3481, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.17090365306558428, | |
| "grad_norm": 0.4861871004104614, | |
| "learning_rate": 1.9364075362481876e-05, | |
| "loss": 2.3305, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.19226660969878231, | |
| "grad_norm": 0.5032399296760559, | |
| "learning_rate": 1.908858614789511e-05, | |
| "loss": 2.2901, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.21362956633198035, | |
| "grad_norm": 0.5441191792488098, | |
| "learning_rate": 1.8766381169402465e-05, | |
| "loss": 2.3201, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2349925229651784, | |
| "grad_norm": 0.4986329674720764, | |
| "learning_rate": 1.839911657587678e-05, | |
| "loss": 2.2981, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2563554795983764, | |
| "grad_norm": 0.506102979183197, | |
| "learning_rate": 1.7988680124749516e-05, | |
| "loss": 2.2633, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.27771843623157444, | |
| "grad_norm": 0.5421627163887024, | |
| "learning_rate": 1.7537181478848007e-05, | |
| "loss": 2.2985, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2990813928647725, | |
| "grad_norm": 0.6916339993476868, | |
| "learning_rate": 1.704694136262846e-05, | |
| "loss": 2.2716, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.3204443494979705, | |
| "grad_norm": 0.7127671837806702, | |
| "learning_rate": 1.6520479633542167e-05, | |
| "loss": 2.2697, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.34180730613116855, | |
| "grad_norm": 0.6163765788078308, | |
| "learning_rate": 1.5960502329848683e-05, | |
| "loss": 2.3061, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3631702627643666, | |
| "grad_norm": 0.6348161697387695, | |
| "learning_rate": 1.5369887761450813e-05, | |
| "loss": 2.2746, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.38453321939756463, | |
| "grad_norm": 0.6660177111625671, | |
| "learning_rate": 1.475167171524519e-05, | |
| "loss": 2.2785, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.40589617603076267, | |
| "grad_norm": 0.7842796444892883, | |
| "learning_rate": 1.4109031851033612e-05, | |
| "loss": 2.2649, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.4272591326639607, | |
| "grad_norm": 0.6408088803291321, | |
| "learning_rate": 1.344527136820094e-05, | |
| "loss": 2.253, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.44862208929715874, | |
| "grad_norm": 0.6853693127632141, | |
| "learning_rate": 1.2763802027113587e-05, | |
| "loss": 2.2758, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4699850459303568, | |
| "grad_norm": 0.5584832429885864, | |
| "learning_rate": 1.2068126612509384e-05, | |
| "loss": 2.2917, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.4913480025635548, | |
| "grad_norm": 0.5917975306510925, | |
| "learning_rate": 1.1361820929017884e-05, | |
| "loss": 2.2838, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5127109591967528, | |
| "grad_norm": 0.6518538594245911, | |
| "learning_rate": 1.0648515421354968e-05, | |
| "loss": 2.2407, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5340739158299509, | |
| "grad_norm": 0.6702529788017273, | |
| "learning_rate": 9.931876513664764e-06, | |
| "loss": 2.2241, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5554368724631489, | |
| "grad_norm": 0.7369861602783203, | |
| "learning_rate": 9.215587763925683e-06, | |
| "loss": 2.2469, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.576799829096347, | |
| "grad_norm": 0.8747798204421997, | |
| "learning_rate": 8.503330930287628e-06, | |
| "loss": 2.2703, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.598162785729545, | |
| "grad_norm": 0.8590816855430603, | |
| "learning_rate": 7.798767046660521e-06, | |
| "loss": 2.2236, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.619525742362743, | |
| "grad_norm": 0.6715850830078125, | |
| "learning_rate": 7.1055176048263085e-06, | |
| "loss": 2.2296, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.640888698995941, | |
| "grad_norm": 0.8593222498893738, | |
| "learning_rate": 6.42714593979943e-06, | |
| "loss": 2.2466, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6622516556291391, | |
| "grad_norm": 0.8626837134361267, | |
| "learning_rate": 5.767138914115842e-06, | |
| "loss": 2.2454, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6836146122623371, | |
| "grad_norm": 0.7464703917503357, | |
| "learning_rate": 5.128888995194161e-06, | |
| "loss": 2.267, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.7049775688955351, | |
| "grad_norm": 0.813230037689209, | |
| "learning_rate": 4.515676817892231e-06, | |
| "loss": 2.2443, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.7263405255287332, | |
| "grad_norm": 0.6884306073188782, | |
| "learning_rate": 3.930654321888331e-06, | |
| "loss": 2.2186, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.7477034821619312, | |
| "grad_norm": 0.6503286957740784, | |
| "learning_rate": 3.3768285505617404e-06, | |
| "loss": 2.2633, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7690664387951293, | |
| "grad_norm": 0.7110804319381714, | |
| "learning_rate": 2.8570461946470963e-06, | |
| "loss": 2.2259, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7904293954283272, | |
| "grad_norm": 0.6175870299339294, | |
| "learning_rate": 2.3739789601090347e-06, | |
| "loss": 2.2678, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.8117923520615253, | |
| "grad_norm": 0.7111446261405945, | |
| "learning_rate": 1.9301098354467707e-06, | |
| "loss": 2.2305, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.8331553086947233, | |
| "grad_norm": 0.6799335479736328, | |
| "learning_rate": 1.5277203290154342e-06, | |
| "loss": 2.2314, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.8545182653279214, | |
| "grad_norm": 0.6913052797317505, | |
| "learning_rate": 1.1688787419649338e-06, | |
| "loss": 2.2394, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.8758812219611194, | |
| "grad_norm": 0.6040618419647217, | |
| "learning_rate": 8.554295370739074e-07, | |
| "loss": 2.2329, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8972441785943175, | |
| "grad_norm": 1.026239275932312, | |
| "learning_rate": 5.889838581235641e-07, | |
| "loss": 2.2698, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.9186071352275155, | |
| "grad_norm": 0.7688636183738708, | |
| "learning_rate": 3.7091124854222613e-07, | |
| "loss": 2.2419, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.9399700918607136, | |
| "grad_norm": 0.8546819090843201, | |
| "learning_rate": 2.0233261188714491e-07, | |
| "loss": 2.2185, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.9613330484939115, | |
| "grad_norm": 0.940643310546875, | |
| "learning_rate": 8.411445034704258e-08, | |
| "loss": 2.1983, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9826960051271096, | |
| "grad_norm": 0.8350269794464111, | |
| "learning_rate": 1.6864410879763316e-08, | |
| "loss": 2.2649, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 4681, | |
| "total_flos": 8.50597775081472e+16, | |
| "train_loss": 2.2806778850526896, | |
| "train_runtime": 1455.669, | |
| "train_samples_per_second": 6.431, | |
| "train_steps_per_second": 3.216 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 4681, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.50597775081472e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |