| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 12990, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00046189376443418013, | |
| "grad_norm": 12.800962448120117, | |
| "learning_rate": 0.0, | |
| "loss": 4.0583, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.23094688221709006, | |
| "eval_cosine_accuracy": 0.9421356916427612, | |
| "eval_loss": 1.4610942602157593, | |
| "eval_runtime": 21.999, | |
| "eval_samples_per_second": 432.064, | |
| "eval_steps_per_second": 1.727, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4618937644341801, | |
| "eval_cosine_accuracy": 0.9457128047943115, | |
| "eval_loss": 1.3612146377563477, | |
| "eval_runtime": 21.8898, | |
| "eval_samples_per_second": 434.22, | |
| "eval_steps_per_second": 1.736, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6928406466512702, | |
| "eval_cosine_accuracy": 0.9528669118881226, | |
| "eval_loss": 1.288307547569275, | |
| "eval_runtime": 21.8186, | |
| "eval_samples_per_second": 435.638, | |
| "eval_steps_per_second": 1.742, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9237875288683602, | |
| "eval_cosine_accuracy": 0.9522356390953064, | |
| "eval_loss": 1.2684112787246704, | |
| "eval_runtime": 21.8066, | |
| "eval_samples_per_second": 435.877, | |
| "eval_steps_per_second": 1.743, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 18.849971771240234, | |
| "learning_rate": 1.6664098613251156e-05, | |
| "loss": 2.6124, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 1.1547344110854503, | |
| "eval_cosine_accuracy": 0.9541293978691101, | |
| "eval_loss": 1.2559542655944824, | |
| "eval_runtime": 21.9828, | |
| "eval_samples_per_second": 432.383, | |
| "eval_steps_per_second": 1.729, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.3856812933025404, | |
| "eval_cosine_accuracy": 0.956233561038971, | |
| "eval_loss": 1.1885266304016113, | |
| "eval_runtime": 23.3704, | |
| "eval_samples_per_second": 406.71, | |
| "eval_steps_per_second": 1.626, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.6166281755196303, | |
| "eval_cosine_accuracy": 0.9557075500488281, | |
| "eval_loss": 1.187910556793213, | |
| "eval_runtime": 21.9291, | |
| "eval_samples_per_second": 433.442, | |
| "eval_steps_per_second": 1.733, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.8475750577367207, | |
| "eval_cosine_accuracy": 0.9580221176147461, | |
| "eval_loss": 1.1555284261703491, | |
| "eval_runtime": 21.9378, | |
| "eval_samples_per_second": 433.27, | |
| "eval_steps_per_second": 1.732, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 17.494279861450195, | |
| "learning_rate": 1.1120800205180818e-05, | |
| "loss": 1.986, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.0785219399538106, | |
| "eval_cosine_accuracy": 0.9582325220108032, | |
| "eval_loss": 1.154712438583374, | |
| "eval_runtime": 22.0846, | |
| "eval_samples_per_second": 430.391, | |
| "eval_steps_per_second": 1.721, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.3094688221709005, | |
| "eval_cosine_accuracy": 0.9584429264068604, | |
| "eval_loss": 1.145609974861145, | |
| "eval_runtime": 22.0404, | |
| "eval_samples_per_second": 431.254, | |
| "eval_steps_per_second": 1.724, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.540415704387991, | |
| "eval_cosine_accuracy": 0.9585481286048889, | |
| "eval_loss": 1.135787844657898, | |
| "eval_runtime": 23.3896, | |
| "eval_samples_per_second": 406.377, | |
| "eval_steps_per_second": 1.625, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.771362586605081, | |
| "eval_cosine_accuracy": 0.9596002101898193, | |
| "eval_loss": 1.1278640031814575, | |
| "eval_runtime": 21.8218, | |
| "eval_samples_per_second": 435.573, | |
| "eval_steps_per_second": 1.741, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 19.504749298095703, | |
| "learning_rate": 2.0518081559374197e-08, | |
| "loss": 1.8005, | |
| "step": 6495 | |
| }, | |
| { | |
| "epoch": 3.0023094688221708, | |
| "eval_cosine_accuracy": 0.958758533000946, | |
| "eval_loss": 1.1430408954620361, | |
| "eval_runtime": 21.8232, | |
| "eval_samples_per_second": 435.545, | |
| "eval_steps_per_second": 1.741, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.233256351039261, | |
| "eval_cosine_accuracy": 0.9589689373970032, | |
| "eval_loss": 1.1253587007522583, | |
| "eval_runtime": 21.5629, | |
| "eval_samples_per_second": 440.804, | |
| "eval_steps_per_second": 1.762, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.464203233256351, | |
| "eval_cosine_accuracy": 0.9603366851806641, | |
| "eval_loss": 1.1333616971969604, | |
| "eval_runtime": 21.5578, | |
| "eval_samples_per_second": 440.907, | |
| "eval_steps_per_second": 1.763, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.695150115473441, | |
| "eval_cosine_accuracy": 0.959915816783905, | |
| "eval_loss": 1.1089916229248047, | |
| "eval_runtime": 21.4736, | |
| "eval_samples_per_second": 442.637, | |
| "eval_steps_per_second": 1.77, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.9260969976905313, | |
| "eval_cosine_accuracy": 0.9602314829826355, | |
| "eval_loss": 1.1000142097473145, | |
| "eval_runtime": 21.6139, | |
| "eval_samples_per_second": 439.764, | |
| "eval_steps_per_second": 1.758, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 19.312646865844727, | |
| "learning_rate": 8.341350779295749e-06, | |
| "loss": 1.7181, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 4.157043879907621, | |
| "eval_cosine_accuracy": 0.9586533308029175, | |
| "eval_loss": 1.1027752161026, | |
| "eval_runtime": 22.7823, | |
| "eval_samples_per_second": 417.21, | |
| "eval_steps_per_second": 1.668, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.387990762124711, | |
| "eval_cosine_accuracy": 0.9591794013977051, | |
| "eval_loss": 1.1046050786972046, | |
| "eval_runtime": 21.7857, | |
| "eval_samples_per_second": 436.296, | |
| "eval_steps_per_second": 1.744, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.618937644341801, | |
| "eval_cosine_accuracy": 0.9596002101898193, | |
| "eval_loss": 1.0983500480651855, | |
| "eval_runtime": 21.6711, | |
| "eval_samples_per_second": 438.604, | |
| "eval_steps_per_second": 1.753, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.849884526558892, | |
| "eval_cosine_accuracy": 0.9598106145858765, | |
| "eval_loss": 1.0925102233886719, | |
| "eval_runtime": 21.6712, | |
| "eval_samples_per_second": 438.6, | |
| "eval_steps_per_second": 1.753, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 10.462065696716309, | |
| "learning_rate": 4.1793342312872815e-06, | |
| "loss": 1.6411, | |
| "step": 10825 | |
| }, | |
| { | |
| "epoch": 5.080831408775982, | |
| "eval_cosine_accuracy": 0.9600210189819336, | |
| "eval_loss": 1.0932044982910156, | |
| "eval_runtime": 21.5545, | |
| "eval_samples_per_second": 440.974, | |
| "eval_steps_per_second": 1.763, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.311778290993072, | |
| "eval_cosine_accuracy": 0.9596002101898193, | |
| "eval_loss": 1.089023470878601, | |
| "eval_runtime": 23.2588, | |
| "eval_samples_per_second": 408.662, | |
| "eval_steps_per_second": 1.634, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 5.542725173210162, | |
| "eval_cosine_accuracy": 0.9600210189819336, | |
| "eval_loss": 1.0830504894256592, | |
| "eval_runtime": 21.6294, | |
| "eval_samples_per_second": 439.448, | |
| "eval_steps_per_second": 1.757, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 5.773672055427252, | |
| "eval_cosine_accuracy": 0.9600210189819336, | |
| "eval_loss": 1.0858004093170166, | |
| "eval_runtime": 21.6851, | |
| "eval_samples_per_second": 438.32, | |
| "eval_steps_per_second": 1.752, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 19.333669662475586, | |
| "learning_rate": 1.539349624783529e-08, | |
| "loss": 1.6083, | |
| "step": 12990 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 12990, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |