{ "best_global_step": 150, "best_metric": 1.579034686088562, "best_model_checkpoint": "./models/t5-function-call-finetuned/checkpoint-150", "epoch": 10.0, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07017543859649122, "grad_norm": 64.5877914428711, "learning_rate": 0.0, "loss": 11.5084, "step": 1 }, { "epoch": 0.7017543859649122, "grad_norm": 3.7129411697387695, "learning_rate": 0.00019285714285714286, "loss": 7.3837, "step": 10 }, { "epoch": 1.0, "eval_loss": 2.611706018447876, "eval_runtime": 5.5425, "eval_samples_per_second": 4.691, "eval_steps_per_second": 1.263, "step": 15 }, { "epoch": 1.3508771929824561, "grad_norm": 1.3122819662094116, "learning_rate": 0.00028897058823529407, "loss": 3.4848, "step": 20 }, { "epoch": 2.0, "grad_norm": 1.599865198135376, "learning_rate": 0.0002669117647058823, "loss": 2.4457, "step": 30 }, { "epoch": 2.0, "eval_loss": 1.8906079530715942, "eval_runtime": 4.8084, "eval_samples_per_second": 5.407, "eval_steps_per_second": 1.456, "step": 30 }, { "epoch": 2.7017543859649122, "grad_norm": 0.6903684139251709, "learning_rate": 0.0002448529411764706, "loss": 2.0901, "step": 40 }, { "epoch": 3.0, "eval_loss": 1.754644513130188, "eval_runtime": 5.6357, "eval_samples_per_second": 4.613, "eval_steps_per_second": 1.242, "step": 45 }, { "epoch": 3.3508771929824563, "grad_norm": 0.39704379439353943, "learning_rate": 0.00022279411764705882, "loss": 1.9332, "step": 50 }, { "epoch": 4.0, "grad_norm": 1.1494520902633667, "learning_rate": 0.00020073529411764702, "loss": 1.8694, "step": 60 }, { "epoch": 4.0, "eval_loss": 1.6825193166732788, "eval_runtime": 5.5482, "eval_samples_per_second": 4.686, "eval_steps_per_second": 1.262, "step": 60 }, { "epoch": 4.701754385964913, "grad_norm": 0.37586721777915955, "learning_rate": 0.00017867647058823527, "loss": 1.8034, "step": 70 }, { "epoch": 5.0, "eval_loss": 1.6308975219726562, "eval_runtime": 5.5437, "eval_samples_per_second": 4.69, "eval_steps_per_second": 1.263, "step": 75 }, { "epoch": 5.350877192982456, "grad_norm": 0.4155793786048889, "learning_rate": 0.00015661764705882352, "loss": 1.7805, "step": 80 }, { "epoch": 6.0, "grad_norm": 1.2015619277954102, "learning_rate": 0.00013455882352941175, "loss": 1.7608, "step": 90 }, { "epoch": 6.0, "eval_loss": 1.6127651929855347, "eval_runtime": 5.6533, "eval_samples_per_second": 4.599, "eval_steps_per_second": 1.238, "step": 90 }, { "epoch": 6.701754385964913, "grad_norm": 0.31167203187942505, "learning_rate": 0.0001125, "loss": 1.733, "step": 100 }, { "epoch": 7.0, "eval_loss": 1.6017709970474243, "eval_runtime": 5.8448, "eval_samples_per_second": 4.448, "eval_steps_per_second": 1.198, "step": 105 }, { "epoch": 7.350877192982456, "grad_norm": 0.27257081866264343, "learning_rate": 9.044117647058822e-05, "loss": 1.7074, "step": 110 }, { "epoch": 8.0, "grad_norm": 0.5978785157203674, "learning_rate": 6.838235294117646e-05, "loss": 1.7125, "step": 120 }, { "epoch": 8.0, "eval_loss": 1.5895110368728638, "eval_runtime": 5.7894, "eval_samples_per_second": 4.491, "eval_steps_per_second": 1.209, "step": 120 }, { "epoch": 8.701754385964913, "grad_norm": 0.24433940649032593, "learning_rate": 4.63235294117647e-05, "loss": 1.6954, "step": 130 }, { "epoch": 9.0, "eval_loss": 1.5858670473098755, "eval_runtime": 5.7488, "eval_samples_per_second": 4.523, "eval_steps_per_second": 1.218, "step": 135 }, { "epoch": 9.350877192982455, "grad_norm": 0.28507718443870544, "learning_rate": 2.426470588235294e-05, "loss": 1.6935, "step": 140 }, { "epoch": 10.0, "grad_norm": 1.2023290395736694, "learning_rate": 2.2058823529411763e-06, "loss": 1.7175, "step": 150 }, { "epoch": 10.0, "eval_loss": 1.579034686088562, "eval_runtime": 5.9112, "eval_samples_per_second": 4.398, "eval_steps_per_second": 1.184, "step": 150 } ], "logging_steps": 10, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 380648816640000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }