{ "best_metric": 0.4721340835094452, "best_model_checkpoint": "tiny_bert_km_50_v1_sst2/checkpoint-264", "epoch": 6.0, "eval_steps": 500, "global_step": 1584, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.747946739196777, "learning_rate": 4.9e-05, "loss": 0.4666, "step": 264 }, { "epoch": 1.0, "eval_accuracy": 0.7855504587155964, "eval_loss": 0.4721340835094452, "eval_runtime": 0.3669, "eval_samples_per_second": 2376.728, "eval_steps_per_second": 10.902, "step": 264 }, { "epoch": 2.0, "grad_norm": 8.643387794494629, "learning_rate": 4.8e-05, "loss": 0.2544, "step": 528 }, { "epoch": 2.0, "eval_accuracy": 0.8004587155963303, "eval_loss": 0.522304356098175, "eval_runtime": 0.3235, "eval_samples_per_second": 2695.928, "eval_steps_per_second": 12.367, "step": 528 }, { "epoch": 3.0, "grad_norm": 4.789928913116455, "learning_rate": 4.7e-05, "loss": 0.1989, "step": 792 }, { "epoch": 3.0, "eval_accuracy": 0.8027522935779816, "eval_loss": 0.5052730441093445, "eval_runtime": 0.3266, "eval_samples_per_second": 2670.238, "eval_steps_per_second": 12.249, "step": 792 }, { "epoch": 4.0, "grad_norm": 3.88606595993042, "learning_rate": 4.600000000000001e-05, "loss": 0.1622, "step": 1056 }, { "epoch": 4.0, "eval_accuracy": 0.805045871559633, "eval_loss": 0.5874336361885071, "eval_runtime": 0.3218, "eval_samples_per_second": 2709.525, "eval_steps_per_second": 12.429, "step": 1056 }, { "epoch": 5.0, "grad_norm": 10.503990173339844, "learning_rate": 4.5e-05, "loss": 0.1356, "step": 1320 }, { "epoch": 5.0, "eval_accuracy": 0.7912844036697247, "eval_loss": 0.6845178008079529, "eval_runtime": 0.3229, "eval_samples_per_second": 2700.711, "eval_steps_per_second": 12.389, "step": 1320 }, { "epoch": 6.0, "grad_norm": 6.96405553817749, "learning_rate": 4.4000000000000006e-05, "loss": 0.1131, "step": 1584 }, { "epoch": 6.0, "eval_accuracy": 0.7912844036697247, "eval_loss": 0.7334715723991394, "eval_runtime": 0.3192, "eval_samples_per_second": 2732.097, "eval_steps_per_second": 12.533, "step": 1584 }, { "epoch": 6.0, "step": 1584, "total_flos": 1.0596790573590528e+16, "train_loss": 0.22179452819053572, "train_runtime": 275.0501, "train_samples_per_second": 12243.041, "train_steps_per_second": 47.991 } ], "logging_steps": 1, "max_steps": 13200, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.0596790573590528e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }