| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 2440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.9568660259246826, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.5556, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7343358395989975, | |
| "eval_f1": 0.6899266862170088, | |
| "eval_loss": 0.5147875547409058, | |
| "eval_precision": 0.6851714708898257, | |
| "eval_recall": 0.6970358246953992, | |
| "eval_runtime": 1.7157, | |
| "eval_samples_per_second": 232.562, | |
| "eval_steps_per_second": 29.143, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 4.367062091827393, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.476, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7869674185463659, | |
| "eval_f1": 0.7095448122296921, | |
| "eval_loss": 0.4507494866847992, | |
| "eval_precision": 0.7577399380804953, | |
| "eval_recall": 0.6917621385706492, | |
| "eval_runtime": 1.7126, | |
| "eval_samples_per_second": 232.973, | |
| "eval_steps_per_second": 29.195, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 8.358736991882324, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.4238, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8195488721804511, | |
| "eval_f1": 0.7644319076357912, | |
| "eval_loss": 0.4003293514251709, | |
| "eval_precision": 0.7957593330916999, | |
| "eval_recall": 0.7473176941262047, | |
| "eval_runtime": 1.7133, | |
| "eval_samples_per_second": 232.886, | |
| "eval_steps_per_second": 29.184, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.034170627593994, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3735, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8370927318295739, | |
| "eval_f1": 0.796615353247018, | |
| "eval_loss": 0.379879891872406, | |
| "eval_precision": 0.8088983050847458, | |
| "eval_recall": 0.7872340425531914, | |
| "eval_runtime": 1.7115, | |
| "eval_samples_per_second": 233.13, | |
| "eval_steps_per_second": 29.214, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 4.402078151702881, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.3548, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8471177944862155, | |
| "eval_f1": 0.8160386984618873, | |
| "eval_loss": 0.3634377121925354, | |
| "eval_precision": 0.8152632848784607, | |
| "eval_recall": 0.8168303327877796, | |
| "eval_runtime": 1.7124, | |
| "eval_samples_per_second": 233.013, | |
| "eval_steps_per_second": 29.2, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 4.384060382843018, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.3213, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8421052631578947, | |
| "eval_f1": 0.8136136136136136, | |
| "eval_loss": 0.35842451453208923, | |
| "eval_precision": 0.8076923076923077, | |
| "eval_recall": 0.820785597381342, | |
| "eval_runtime": 1.7164, | |
| "eval_samples_per_second": 232.462, | |
| "eval_steps_per_second": 29.131, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 3.8013863563537598, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.3085, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8621553884711779, | |
| "eval_f1": 0.8259397035145188, | |
| "eval_loss": 0.3317520022392273, | |
| "eval_precision": 0.844799331103679, | |
| "eval_recall": 0.8124659028914347, | |
| "eval_runtime": 1.712, | |
| "eval_samples_per_second": 233.056, | |
| "eval_steps_per_second": 29.205, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 4.293257236480713, | |
| "learning_rate": 3e-05, | |
| "loss": 0.2981, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8671679197994987, | |
| "eval_f1": 0.8237705942648565, | |
| "eval_loss": 0.3428766429424286, | |
| "eval_precision": 0.8722222222222222, | |
| "eval_recall": 0.7985088197854155, | |
| "eval_runtime": 1.712, | |
| "eval_samples_per_second": 233.064, | |
| "eval_steps_per_second": 29.206, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 9.226790428161621, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.2788, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8796992481203008, | |
| "eval_f1": 0.8439374185136896, | |
| "eval_loss": 0.33035776019096375, | |
| "eval_precision": 0.8794955044955045, | |
| "eval_recall": 0.822376795781051, | |
| "eval_runtime": 1.7106, | |
| "eval_samples_per_second": 233.253, | |
| "eval_steps_per_second": 29.23, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 4.4179863929748535, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.259, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8771929824561403, | |
| "eval_f1": 0.850729517396184, | |
| "eval_loss": 0.30757635831832886, | |
| "eval_precision": 0.8535087719298247, | |
| "eval_recall": 0.8481087470449173, | |
| "eval_runtime": 1.7123, | |
| "eval_samples_per_second": 233.024, | |
| "eval_steps_per_second": 29.201, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 2.00750470161438, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.2587, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.87468671679198, | |
| "eval_f1": 0.8503151260504201, | |
| "eval_loss": 0.3025457561016083, | |
| "eval_precision": 0.8470628455912955, | |
| "eval_recall": 0.8538370612838698, | |
| "eval_runtime": 1.7097, | |
| "eval_samples_per_second": 233.369, | |
| "eval_steps_per_second": 29.244, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 4.474819660186768, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2391, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8847117794486216, | |
| "eval_f1": 0.8564658408408408, | |
| "eval_loss": 0.2989969253540039, | |
| "eval_precision": 0.8697278911564625, | |
| "eval_recall": 0.8459265320967448, | |
| "eval_runtime": 1.7113, | |
| "eval_samples_per_second": 233.161, | |
| "eval_steps_per_second": 29.218, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 5.184099197387695, | |
| "learning_rate": 1.75e-05, | |
| "loss": 0.2443, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8796992481203008, | |
| "eval_f1": 0.8518472677764712, | |
| "eval_loss": 0.2918570339679718, | |
| "eval_precision": 0.8599810186649794, | |
| "eval_recall": 0.844880887434079, | |
| "eval_runtime": 1.7186, | |
| "eval_samples_per_second": 232.167, | |
| "eval_steps_per_second": 29.094, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 6.611837863922119, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.237, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8771929824561403, | |
| "eval_f1": 0.8550328105883662, | |
| "eval_loss": 0.30395177006721497, | |
| "eval_precision": 0.8482905982905984, | |
| "eval_recall": 0.8631114748136025, | |
| "eval_runtime": 1.7146, | |
| "eval_samples_per_second": 232.704, | |
| "eval_steps_per_second": 29.161, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 5.530145168304443, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.2176, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8897243107769424, | |
| "eval_f1": 0.8682773109243698, | |
| "eval_loss": 0.2936829626560211, | |
| "eval_precision": 0.864855223259409, | |
| "eval_recall": 0.8719767230405528, | |
| "eval_runtime": 1.7134, | |
| "eval_samples_per_second": 232.875, | |
| "eval_steps_per_second": 29.182, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 3.9305107593536377, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2202, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8822055137844611, | |
| "eval_f1": 0.8560793854229822, | |
| "eval_loss": 0.29200199246406555, | |
| "eval_precision": 0.8609538327526132, | |
| "eval_recall": 0.8516548463356974, | |
| "eval_runtime": 1.7117, | |
| "eval_samples_per_second": 233.104, | |
| "eval_steps_per_second": 29.211, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 5.218233585357666, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.2203, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8822055137844611, | |
| "eval_f1": 0.8575487105473052, | |
| "eval_loss": 0.29227131605148315, | |
| "eval_precision": 0.8584592421103936, | |
| "eval_recall": 0.8566557555919259, | |
| "eval_runtime": 1.7107, | |
| "eval_samples_per_second": 233.232, | |
| "eval_steps_per_second": 29.227, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 3.9121828079223633, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2204, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8847117794486216, | |
| "eval_f1": 0.8602260265626904, | |
| "eval_loss": 0.2926991283893585, | |
| "eval_precision": 0.8620943049601959, | |
| "eval_recall": 0.8584288052373159, | |
| "eval_runtime": 1.7115, | |
| "eval_samples_per_second": 233.126, | |
| "eval_steps_per_second": 29.214, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 9.12126636505127, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.2124, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8872180451127819, | |
| "eval_f1": 0.8622036668943447, | |
| "eval_loss": 0.29202741384506226, | |
| "eval_precision": 0.8671602787456446, | |
| "eval_recall": 0.8577014002545917, | |
| "eval_runtime": 1.711, | |
| "eval_samples_per_second": 233.194, | |
| "eval_steps_per_second": 29.222, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 3.568925142288208, | |
| "learning_rate": 0.0, | |
| "loss": 0.2108, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8847117794486216, | |
| "eval_f1": 0.8587719298245614, | |
| "eval_loss": 0.29165780544281006, | |
| "eval_precision": 0.864771021021021, | |
| "eval_recall": 0.8534278959810875, | |
| "eval_runtime": 1.7116, | |
| "eval_samples_per_second": 233.115, | |
| "eval_steps_per_second": 29.212, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 2440, | |
| "total_flos": 7677008691480000.0, | |
| "train_loss": 0.29650945194431994, | |
| "train_runtime": 617.5878, | |
| "train_samples_per_second": 118.04, | |
| "train_steps_per_second": 3.951 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 7677008691480000.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |