| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.8027057497181511, |
| "eval_steps": 50, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04509582863585118, |
| "grad_norm": 4.880373001098633, |
| "learning_rate": 4e-07, |
| "loss": 1.7893, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09019165727170236, |
| "grad_norm": 4.680130958557129, |
| "learning_rate": 8.444444444444444e-07, |
| "loss": 1.7309, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13528748590755355, |
| "grad_norm": 4.52065372467041, |
| "learning_rate": 1.2888888888888889e-06, |
| "loss": 1.7019, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18038331454340473, |
| "grad_norm": 4.010641574859619, |
| "learning_rate": 1.7333333333333334e-06, |
| "loss": 1.5762, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2254791431792559, |
| "grad_norm": 2.6717236042022705, |
| "learning_rate": 1.9995040840893383e-06, |
| "loss": 1.6333, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2254791431792559, |
| "eval_loss": 1.5759482383728027, |
| "eval_runtime": 14.6699, |
| "eval_samples_per_second": 12.747, |
| "eval_steps_per_second": 6.408, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2705749718151071, |
| "grad_norm": 1.3333920240402222, |
| "learning_rate": 1.9939306773179494e-06, |
| "loss": 1.2989, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3156708004509583, |
| "grad_norm": 0.6459554433822632, |
| "learning_rate": 1.9821986184473754e-06, |
| "loss": 1.2498, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.36076662908680945, |
| "grad_norm": 0.6363914608955383, |
| "learning_rate": 1.964380602355277e-06, |
| "loss": 1.157, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.40586245772266066, |
| "grad_norm": 0.4646184742450714, |
| "learning_rate": 1.9405870340877135e-06, |
| "loss": 1.1613, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4509582863585118, |
| "grad_norm": 0.5635167956352234, |
| "learning_rate": 1.9109653447608605e-06, |
| "loss": 1.3009, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4509582863585118, |
| "eval_loss": 1.3309173583984375, |
| "eval_runtime": 14.1549, |
| "eval_samples_per_second": 13.211, |
| "eval_steps_per_second": 6.641, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.496054114994363, |
| "grad_norm": 0.5266286134719849, |
| "learning_rate": 1.8756990780396006e-06, |
| "loss": 1.1237, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5411499436302142, |
| "grad_norm": 0.5904309153556824, |
| "learning_rate": 1.8350067528534024e-06, |
| "loss": 1.2938, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5862457722660653, |
| "grad_norm": 0.4533354938030243, |
| "learning_rate": 1.7891405093963937e-06, |
| "loss": 1.3771, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6313416009019166, |
| "grad_norm": 0.6127325892448425, |
| "learning_rate": 1.7383845468013654e-06, |
| "loss": 1.417, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6764374295377678, |
| "grad_norm": 0.5645148754119873, |
| "learning_rate": 1.683053362168282e-06, |
| "loss": 1.1581, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6764374295377678, |
| "eval_loss": 1.2584223747253418, |
| "eval_runtime": 14.0101, |
| "eval_samples_per_second": 13.347, |
| "eval_steps_per_second": 6.709, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7215332581736189, |
| "grad_norm": 0.5589331984519958, |
| "learning_rate": 1.6234898018587336e-06, |
| "loss": 1.1287, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7666290868094702, |
| "grad_norm": 0.5021057724952698, |
| "learning_rate": 1.5600629371310144e-06, |
| "loss": 1.105, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8117249154453213, |
| "grad_norm": 0.4609015882015228, |
| "learning_rate": 1.4931657772789457e-06, |
| "loss": 1.1352, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8568207440811725, |
| "grad_norm": 0.46195802092552185, |
| "learning_rate": 1.423212834444425e-06, |
| "loss": 1.0855, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9019165727170236, |
| "grad_norm": 0.527087926864624, |
| "learning_rate": 1.3506375551927544e-06, |
| "loss": 1.0202, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9019165727170236, |
| "eval_loss": 1.2102832794189453, |
| "eval_runtime": 14.1065, |
| "eval_samples_per_second": 13.256, |
| "eval_steps_per_second": 6.664, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9470124013528749, |
| "grad_norm": 1.0008864402770996, |
| "learning_rate": 1.2758896347653752e-06, |
| "loss": 1.2173, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.992108229988726, |
| "grad_norm": 0.5077604651451111, |
| "learning_rate": 1.1994322306515925e-06, |
| "loss": 1.0537, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0360766629086808, |
| "grad_norm": 0.711245596408844, |
| "learning_rate": 1.1217390927447225e-06, |
| "loss": 1.1367, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0811724915445322, |
| "grad_norm": 0.510122537612915, |
| "learning_rate": 1.043291627864961e-06, |
| "loss": 1.1316, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.1262683201803834, |
| "grad_norm": 0.5964261293411255, |
| "learning_rate": 9.645759168379461e-07, |
| "loss": 1.2521, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1262683201803834, |
| "eval_loss": 1.1702840328216553, |
| "eval_runtime": 14.1699, |
| "eval_samples_per_second": 13.197, |
| "eval_steps_per_second": 6.634, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1713641488162345, |
| "grad_norm": 0.39409753680229187, |
| "learning_rate": 8.860797026119721e-07, |
| "loss": 0.915, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.2164599774520857, |
| "grad_norm": 0.4673859179019928, |
| "learning_rate": 8.082893680762618e-07, |
| "loss": 1.2371, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.2615558060879368, |
| "grad_norm": 0.5883477926254272, |
| "learning_rate": 7.316869223065155e-07, |
| "loss": 1.1097, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.306651634723788, |
| "grad_norm": 0.5270853638648987, |
| "learning_rate": 6.567470139117447e-07, |
| "loss": 1.0892, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3517474633596391, |
| "grad_norm": 0.5741926431655884, |
| "learning_rate": 5.839339899884628e-07, |
| "loss": 0.9821, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3517474633596391, |
| "eval_loss": 1.1472760438919067, |
| "eval_runtime": 14.092, |
| "eval_samples_per_second": 13.27, |
| "eval_steps_per_second": 6.67, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3968432919954905, |
| "grad_norm": 0.426485151052475, |
| "learning_rate": 5.136990189057187e-07, |
| "loss": 1.1382, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4419391206313417, |
| "grad_norm": 0.45604461431503296, |
| "learning_rate": 4.4647729474894123e-07, |
| "loss": 1.0356, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4870349492671928, |
| "grad_norm": 0.3508279621601105, |
| "learning_rate": 3.826853407445848e-07, |
| "loss": 1.0695, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.532130777903044, |
| "grad_norm": 0.4223732352256775, |
| "learning_rate": 3.227184283742591e-07, |
| "loss": 0.9751, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.5772266065388951, |
| "grad_norm": 0.6314070224761963, |
| "learning_rate": 2.6694812817017387e-07, |
| "loss": 1.0793, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.5772266065388951, |
| "eval_loss": 1.136489748954773, |
| "eval_runtime": 14.3065, |
| "eval_samples_per_second": 13.071, |
| "eval_steps_per_second": 6.57, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6223224351747465, |
| "grad_norm": 0.5215230584144592, |
| "learning_rate": 2.157200073678137e-07, |
| "loss": 0.973, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.6674182638105974, |
| "grad_norm": 0.5581642985343933, |
| "learning_rate": 1.6935148868177718e-07, |
| "loss": 0.9685, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.7125140924464488, |
| "grad_norm": 0.5601069331169128, |
| "learning_rate": 1.2812988347236166e-07, |
| "loss": 1.0191, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.7576099210822997, |
| "grad_norm": 0.4990064203739166, |
| "learning_rate": 9.231061148990648e-08, |
| "loss": 0.9674, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.8027057497181511, |
| "grad_norm": 0.6436290144920349, |
| "learning_rate": 6.211561822781474e-08, |
| "loss": 0.826, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.8027057497181511, |
| "eval_loss": 1.1320565938949585, |
| "eval_runtime": 14.0805, |
| "eval_samples_per_second": 13.281, |
| "eval_steps_per_second": 6.676, |
| "step": 400 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 444, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.339717225125376e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|