| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 302, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0033112582781456954, |
| "grad_norm": 8.547256141414913, |
| "learning_rate": 1.6129032258064514e-08, |
| "loss": 1.3889, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.033112582781456956, |
| "grad_norm": 8.048209966870028, |
| "learning_rate": 1.6129032258064515e-07, |
| "loss": 1.4125, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06622516556291391, |
| "grad_norm": 4.441313871275738, |
| "learning_rate": 3.225806451612903e-07, |
| "loss": 1.3764, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09933774834437085, |
| "grad_norm": 2.792727195498978, |
| "learning_rate": 4.838709677419355e-07, |
| "loss": 1.297, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13245033112582782, |
| "grad_norm": 2.440195994000396, |
| "learning_rate": 4.98640553244975e-07, |
| "loss": 1.2474, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16556291390728478, |
| "grad_norm": 1.7402831463764055, |
| "learning_rate": 4.939602093872018e-07, |
| "loss": 1.2119, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1986754966887417, |
| "grad_norm": 1.8044881066728908, |
| "learning_rate": 4.860049979829031e-07, |
| "loss": 1.1844, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.23178807947019867, |
| "grad_norm": 2.4217209978341887, |
| "learning_rate": 4.7488170801918125e-07, |
| "loss": 1.1695, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.26490066225165565, |
| "grad_norm": 1.6559949763449162, |
| "learning_rate": 4.6073965606452205e-07, |
| "loss": 1.1473, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2980132450331126, |
| "grad_norm": 1.6318360522717992, |
| "learning_rate": 4.437686818764167e-07, |
| "loss": 1.1446, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.33112582781456956, |
| "grad_norm": 1.5824289985736384, |
| "learning_rate": 4.241966000347147e-07, |
| "loss": 1.1247, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.33112582781456956, |
| "eval_loss": 1.1251499652862549, |
| "eval_runtime": 28.1687, |
| "eval_samples_per_second": 44.375, |
| "eval_steps_per_second": 0.71, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.36423841059602646, |
| "grad_norm": 1.6700627307800546, |
| "learning_rate": 4.0228614180941443e-07, |
| "loss": 1.127, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3973509933774834, |
| "grad_norm": 1.6527451316364625, |
| "learning_rate": 3.7833142831457683e-07, |
| "loss": 1.1282, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4304635761589404, |
| "grad_norm": 1.7456316161527055, |
| "learning_rate": 3.526540222919605e-07, |
| "loss": 1.1212, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.46357615894039733, |
| "grad_norm": 1.6991459409500143, |
| "learning_rate": 3.255986115243545e-07, |
| "loss": 1.1275, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4966887417218543, |
| "grad_norm": 1.7534210030423583, |
| "learning_rate": 2.97528381823508e-07, |
| "loss": 1.1049, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5298013245033113, |
| "grad_norm": 1.7140941720199787, |
| "learning_rate": 2.68820141704636e-07, |
| "loss": 1.1042, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5629139072847682, |
| "grad_norm": 1.7525269414058429, |
| "learning_rate": 2.398592641927878e-07, |
| "loss": 1.0951, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5960264900662252, |
| "grad_norm": 1.6093519147740063, |
| "learning_rate": 2.1103451366114007e-07, |
| "loss": 1.11, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6291390728476821, |
| "grad_norm": 1.7141231135481751, |
| "learning_rate": 1.827328271445881e-07, |
| "loss": 1.1127, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6622516556291391, |
| "grad_norm": 1.6936730364698183, |
| "learning_rate": 1.5533412018312042e-07, |
| "loss": 1.1054, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6622516556291391, |
| "eval_loss": 1.0929443836212158, |
| "eval_runtime": 26.7241, |
| "eval_samples_per_second": 46.774, |
| "eval_steps_per_second": 0.748, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.695364238410596, |
| "grad_norm": 1.6640304962792698, |
| "learning_rate": 1.2920618692017632e-07, |
| "loss": 1.0931, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7284768211920529, |
| "grad_norm": 1.6853199575869429, |
| "learning_rate": 1.0469976291593402e-07, |
| "loss": 1.0786, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7615894039735099, |
| "grad_norm": 1.6787446835035877, |
| "learning_rate": 8.21438169512223e-08, |
| "loss": 1.0944, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7947019867549668, |
| "grad_norm": 1.7361953819881613, |
| "learning_rate": 6.184113502383415e-08, |
| "loss": 1.0895, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8278145695364238, |
| "grad_norm": 1.6915106308947352, |
| "learning_rate": 4.406425581668988e-08, |
| "loss": 1.0921, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8609271523178808, |
| "grad_norm": 1.6890053704917347, |
| "learning_rate": 2.905181219922448e-08, |
| "loss": 1.0888, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8940397350993378, |
| "grad_norm": 1.7278129612109874, |
| "learning_rate": 1.700532787286593e-08, |
| "loss": 1.0897, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9271523178807947, |
| "grad_norm": 1.7209342188458445, |
| "learning_rate": 8.086512161725873e-09, |
| "loss": 1.0865, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9602649006622517, |
| "grad_norm": 1.68192345667153, |
| "learning_rate": 2.4150892626280394e-09, |
| "loss": 1.0884, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9933774834437086, |
| "grad_norm": 1.6887236258880367, |
| "learning_rate": 6.719109414038416e-11, |
| "loss": 1.0915, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9933774834437086, |
| "eval_loss": 1.087569236755371, |
| "eval_runtime": 27.0413, |
| "eval_samples_per_second": 46.226, |
| "eval_steps_per_second": 0.74, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 302, |
| "total_flos": 126255932375040.0, |
| "train_loss": 0.0, |
| "train_runtime": 0.0184, |
| "train_samples_per_second": 2103253.281, |
| "train_steps_per_second": 16451.669 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 302, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 126255932375040.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|