| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.7104795737122558, |
| "eval_steps": 200, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.035523978685612786, |
| "grad_norm": 30.68755531311035, |
| "learning_rate": 2.1176470588235296e-05, |
| "loss": 10.0284, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07104795737122557, |
| "grad_norm": 11.449149131774902, |
| "learning_rate": 4.470588235294118e-05, |
| "loss": 8.8672, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10657193605683836, |
| "grad_norm": 8.332048416137695, |
| "learning_rate": 6.823529411764707e-05, |
| "loss": 7.2862, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14209591474245115, |
| "grad_norm": 8.64196491241455, |
| "learning_rate": 9.176470588235295e-05, |
| "loss": 5.7446, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17761989342806395, |
| "grad_norm": 9.184565544128418, |
| "learning_rate": 0.00011529411764705881, |
| "loss": 4.3277, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21314387211367672, |
| "grad_norm": 12.228641510009766, |
| "learning_rate": 0.00013882352941176472, |
| "loss": 2.8672, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.24866785079928952, |
| "grad_norm": 10.162263870239258, |
| "learning_rate": 0.0001623529411764706, |
| "loss": 2.1514, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2841918294849023, |
| "grad_norm": 8.797259330749512, |
| "learning_rate": 0.00018588235294117648, |
| "loss": 1.9459, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3197158081705151, |
| "grad_norm": 6.776567459106445, |
| "learning_rate": 0.00019894875164257555, |
| "loss": 1.3218, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3552397868561279, |
| "grad_norm": 8.384011268615723, |
| "learning_rate": 0.00019632063074901445, |
| "loss": 1.2121, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3907637655417407, |
| "grad_norm": 6.2972564697265625, |
| "learning_rate": 0.00019369250985545335, |
| "loss": 0.9314, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.42628774422735344, |
| "grad_norm": 5.5578179359436035, |
| "learning_rate": 0.00019106438896189225, |
| "loss": 0.7533, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.46181172291296624, |
| "grad_norm": 5.197394847869873, |
| "learning_rate": 0.00018843626806833115, |
| "loss": 0.7108, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.49733570159857904, |
| "grad_norm": 3.50080943107605, |
| "learning_rate": 0.00018580814717477005, |
| "loss": 0.4706, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5328596802841918, |
| "grad_norm": 7.297764301300049, |
| "learning_rate": 0.00018318002628120894, |
| "loss": 0.4651, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5683836589698046, |
| "grad_norm": 3.9045844078063965, |
| "learning_rate": 0.00018055190538764784, |
| "loss": 0.5066, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6039076376554174, |
| "grad_norm": 3.720517873764038, |
| "learning_rate": 0.00017792378449408674, |
| "loss": 0.4518, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6394316163410302, |
| "grad_norm": 2.6637465953826904, |
| "learning_rate": 0.00017529566360052564, |
| "loss": 0.3716, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6749555950266429, |
| "grad_norm": 3.8581767082214355, |
| "learning_rate": 0.00017266754270696454, |
| "loss": 0.2933, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7104795737122558, |
| "grad_norm": 2.2496705055236816, |
| "learning_rate": 0.0001700394218134034, |
| "loss": 0.299, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7104795737122558, |
| "eval_loss": 0.08425440639257431, |
| "eval_runtime": 10.2304, |
| "eval_samples_per_second": 48.874, |
| "eval_steps_per_second": 6.158, |
| "step": 200 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 846, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8104703246401536.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|