| { |
| "best_metric": 0.6391554702495201, |
| "best_model_checkpoint": "models/safety-classifier-gpt41/checkpoint-3519", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 3519, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09974424552429667, |
| "grad_norm": 0.4263969659805298, |
| "learning_rate": 8.297872340425533e-05, |
| "loss": 0.1391, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.19948849104859334, |
| "grad_norm": 0.27834752202033997, |
| "learning_rate": 0.00016595744680851065, |
| "loss": 0.1218, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.29923273657289, |
| "grad_norm": 0.6282734274864197, |
| "learning_rate": 0.00019687074829931973, |
| "loss": 0.1102, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.3989769820971867, |
| "grad_norm": 0.44432786107063293, |
| "learning_rate": 0.00019156462585034014, |
| "loss": 0.105, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.49872122762148335, |
| "grad_norm": 0.2723007798194885, |
| "learning_rate": 0.00018625850340136055, |
| "loss": 0.1079, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.59846547314578, |
| "grad_norm": 0.48778554797172546, |
| "learning_rate": 0.00018095238095238095, |
| "loss": 0.1054, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.6982097186700768, |
| "grad_norm": 0.25881531834602356, |
| "learning_rate": 0.00017564625850340136, |
| "loss": 0.1086, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.7979539641943734, |
| "grad_norm": 0.25694021582603455, |
| "learning_rate": 0.0001703401360544218, |
| "loss": 0.1048, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.8976982097186701, |
| "grad_norm": 0.4252418279647827, |
| "learning_rate": 0.0001650340136054422, |
| "loss": 0.1034, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.9974424552429667, |
| "grad_norm": 0.41715288162231445, |
| "learning_rate": 0.00015972789115646259, |
| "loss": 0.1021, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.6956707187033483, |
| "eval_f1": 0.5335076822491011, |
| "eval_loss": 0.11207349003870727, |
| "eval_precision": 0.38894184938036225, |
| "eval_recall": 0.8491155046826223, |
| "eval_runtime": 29.5167, |
| "eval_samples_per_second": 158.859, |
| "eval_steps_per_second": 4.98, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.0971867007672633, |
| "grad_norm": 0.28545722365379333, |
| "learning_rate": 0.000154421768707483, |
| "loss": 0.1003, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.19693094629156, |
| "grad_norm": 0.4745310842990875, |
| "learning_rate": 0.0001491156462585034, |
| "loss": 0.1009, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.2966751918158568, |
| "grad_norm": 0.375118225812912, |
| "learning_rate": 0.0001438095238095238, |
| "loss": 0.097, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.3964194373401535, |
| "grad_norm": 0.30696073174476624, |
| "learning_rate": 0.00013850340136054422, |
| "loss": 0.1, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.49616368286445, |
| "grad_norm": 1.0406877994537354, |
| "learning_rate": 0.00013319727891156463, |
| "loss": 0.0907, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.5959079283887467, |
| "grad_norm": 0.3132568299770355, |
| "learning_rate": 0.00012789115646258506, |
| "loss": 0.0905, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.6956521739130435, |
| "grad_norm": 0.4280431866645813, |
| "learning_rate": 0.00012258503401360544, |
| "loss": 0.0952, |
| "step": 1989 |
| }, |
| { |
| "epoch": 1.7953964194373402, |
| "grad_norm": 0.7637690305709839, |
| "learning_rate": 0.00011727891156462585, |
| "loss": 0.0962, |
| "step": 2106 |
| }, |
| { |
| "epoch": 1.895140664961637, |
| "grad_norm": 0.4497227966785431, |
| "learning_rate": 0.00011197278911564626, |
| "loss": 0.0952, |
| "step": 2223 |
| }, |
| { |
| "epoch": 1.9948849104859336, |
| "grad_norm": 0.8150501251220703, |
| "learning_rate": 0.00010666666666666667, |
| "loss": 0.0925, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8328001706120708, |
| "eval_f1": 0.6087824351297405, |
| "eval_loss": 0.08943241893450823, |
| "eval_precision": 0.5848513902205177, |
| "eval_recall": 0.6347554630593132, |
| "eval_runtime": 28.6966, |
| "eval_samples_per_second": 163.399, |
| "eval_steps_per_second": 5.123, |
| "step": 2346 |
| }, |
| { |
| "epoch": 2.0946291560102304, |
| "grad_norm": 0.6674602031707764, |
| "learning_rate": 0.00010136054421768707, |
| "loss": 0.0848, |
| "step": 2457 |
| }, |
| { |
| "epoch": 2.1943734015345266, |
| "grad_norm": 0.6307544112205505, |
| "learning_rate": 9.60544217687075e-05, |
| "loss": 0.0915, |
| "step": 2574 |
| }, |
| { |
| "epoch": 2.2941176470588234, |
| "grad_norm": 0.3149726092815399, |
| "learning_rate": 9.074829931972789e-05, |
| "loss": 0.0881, |
| "step": 2691 |
| }, |
| { |
| "epoch": 2.39386189258312, |
| "grad_norm": 0.8826068043708801, |
| "learning_rate": 8.54421768707483e-05, |
| "loss": 0.0899, |
| "step": 2808 |
| }, |
| { |
| "epoch": 2.493606138107417, |
| "grad_norm": 0.3497423231601715, |
| "learning_rate": 8.013605442176872e-05, |
| "loss": 0.0867, |
| "step": 2925 |
| }, |
| { |
| "epoch": 2.5933503836317136, |
| "grad_norm": 0.4942176640033722, |
| "learning_rate": 7.482993197278913e-05, |
| "loss": 0.0878, |
| "step": 3042 |
| }, |
| { |
| "epoch": 2.6930946291560103, |
| "grad_norm": 0.6147931218147278, |
| "learning_rate": 6.952380952380952e-05, |
| "loss": 0.0826, |
| "step": 3159 |
| }, |
| { |
| "epoch": 2.792838874680307, |
| "grad_norm": 0.42120879888534546, |
| "learning_rate": 6.421768707482993e-05, |
| "loss": 0.0868, |
| "step": 3276 |
| }, |
| { |
| "epoch": 2.8925831202046037, |
| "grad_norm": 0.6062323451042175, |
| "learning_rate": 5.8911564625850346e-05, |
| "loss": 0.0831, |
| "step": 3393 |
| }, |
| { |
| "epoch": 2.9923273657289, |
| "grad_norm": 0.7976586222648621, |
| "learning_rate": 5.360544217687075e-05, |
| "loss": 0.0838, |
| "step": 3510 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.8396246534442312, |
| "eval_f1": 0.6391554702495201, |
| "eval_loss": 0.08481446683795528, |
| "eval_precision": 0.5930543187889582, |
| "eval_recall": 0.6930280957336108, |
| "eval_runtime": 28.8664, |
| "eval_samples_per_second": 162.438, |
| "eval_steps_per_second": 5.092, |
| "step": 3519 |
| } |
| ], |
| "logging_steps": 117, |
| "max_steps": 4692, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5037586926129152e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|