| { | |
| "best_global_step": 200, | |
| "best_metric": 0.054177138954401016, | |
| "best_model_checkpoint": "/home/ricoiban/GEMMA/mnlp_chatsplaining/safety_model_output/checkpoint-200", | |
| "epoch": 5.0, | |
| "eval_steps": 50, | |
| "global_step": 245, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.20725388601036268, | |
| "grad_norm": 2.7763237953186035, | |
| "learning_rate": 0.0005899999999999999, | |
| "loss": 8.1874, | |
| "mean_token_accuracy": 0.390625, | |
| "num_tokens": 21984.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.41450777202072536, | |
| "grad_norm": 1.8894965648651123, | |
| "learning_rate": 0.000565, | |
| "loss": 0.7121, | |
| "mean_token_accuracy": 0.64375, | |
| "num_tokens": 44111.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.6217616580310881, | |
| "grad_norm": 2.1179754734039307, | |
| "learning_rate": 0.00054, | |
| "loss": 0.6903, | |
| "mean_token_accuracy": 0.659375, | |
| "num_tokens": 68120.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8290155440414507, | |
| "grad_norm": 2.2939863204956055, | |
| "learning_rate": 0.0005149999999999999, | |
| "loss": 0.677, | |
| "mean_token_accuracy": 0.653125, | |
| "num_tokens": 89669.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.0207253886010363, | |
| "grad_norm": 2.18673038482666, | |
| "learning_rate": 0.00049, | |
| "loss": 0.6974, | |
| "mean_token_accuracy": 0.6655405405405406, | |
| "num_tokens": 111297.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0207253886010363, | |
| "eval_loss": 0.7593430876731873, | |
| "eval_mean_token_accuracy": 0.5, | |
| "eval_num_tokens": 111297.0, | |
| "eval_runtime": 0.1338, | |
| "eval_samples_per_second": 7.475, | |
| "eval_steps_per_second": 7.475, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.2279792746113989, | |
| "grad_norm": 3.68613338470459, | |
| "learning_rate": 0.00046499999999999997, | |
| "loss": 0.6289, | |
| "mean_token_accuracy": 0.703125, | |
| "num_tokens": 133338.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.4352331606217616, | |
| "grad_norm": 5.129226207733154, | |
| "learning_rate": 0.0004399999999999999, | |
| "loss": 0.5701, | |
| "mean_token_accuracy": 0.75625, | |
| "num_tokens": 156589.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.6424870466321244, | |
| "grad_norm": 3.548778772354126, | |
| "learning_rate": 0.00041499999999999995, | |
| "loss": 0.5314, | |
| "mean_token_accuracy": 0.765625, | |
| "num_tokens": 178995.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.849740932642487, | |
| "grad_norm": 4.861147403717041, | |
| "learning_rate": 0.00039, | |
| "loss": 0.5636, | |
| "mean_token_accuracy": 0.771875, | |
| "num_tokens": 203491.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.0414507772020727, | |
| "grad_norm": 3.220465660095215, | |
| "learning_rate": 0.00036499999999999993, | |
| "loss": 0.4901, | |
| "mean_token_accuracy": 0.7905405405405406, | |
| "num_tokens": 223362.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.0414507772020727, | |
| "eval_loss": 0.6653734445571899, | |
| "eval_mean_token_accuracy": 0.5, | |
| "eval_num_tokens": 223362.0, | |
| "eval_runtime": 0.161, | |
| "eval_samples_per_second": 6.212, | |
| "eval_steps_per_second": 6.212, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.2487046632124352, | |
| "grad_norm": 3.534886360168457, | |
| "learning_rate": 0.00033999999999999997, | |
| "loss": 0.323, | |
| "mean_token_accuracy": 0.86875, | |
| "num_tokens": 245475.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.4559585492227978, | |
| "grad_norm": 2.073479413986206, | |
| "learning_rate": 0.00031499999999999996, | |
| "loss": 0.3263, | |
| "mean_token_accuracy": 0.875, | |
| "num_tokens": 268345.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.6632124352331608, | |
| "grad_norm": 5.83317756652832, | |
| "learning_rate": 0.00029, | |
| "loss": 0.3978, | |
| "mean_token_accuracy": 0.84375, | |
| "num_tokens": 292072.0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.8704663212435233, | |
| "grad_norm": 3.23555064201355, | |
| "learning_rate": 0.000265, | |
| "loss": 0.2859, | |
| "mean_token_accuracy": 0.871875, | |
| "num_tokens": 314093.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.062176165803109, | |
| "grad_norm": 1.8520785570144653, | |
| "learning_rate": 0.00023999999999999998, | |
| "loss": 0.4766, | |
| "mean_token_accuracy": 0.8885135135135135, | |
| "num_tokens": 334669.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.062176165803109, | |
| "eval_loss": 1.3390449285507202, | |
| "eval_mean_token_accuracy": 0.5, | |
| "eval_num_tokens": 334669.0, | |
| "eval_runtime": 0.2104, | |
| "eval_samples_per_second": 4.753, | |
| "eval_steps_per_second": 4.753, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.2694300518134716, | |
| "grad_norm": 13.872314453125, | |
| "learning_rate": 0.000215, | |
| "loss": 0.1519, | |
| "mean_token_accuracy": 0.928125, | |
| "num_tokens": 358016.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.476683937823834, | |
| "grad_norm": 4.716925144195557, | |
| "learning_rate": 0.00018999999999999998, | |
| "loss": 0.1854, | |
| "mean_token_accuracy": 0.934375, | |
| "num_tokens": 380340.0, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.6839378238341967, | |
| "grad_norm": 6.100285530090332, | |
| "learning_rate": 0.000165, | |
| "loss": 0.1897, | |
| "mean_token_accuracy": 0.925, | |
| "num_tokens": 402140.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.8911917098445596, | |
| "grad_norm": 5.842983245849609, | |
| "learning_rate": 0.00014, | |
| "loss": 0.1615, | |
| "mean_token_accuracy": 0.94375, | |
| "num_tokens": 426084.0, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.082901554404145, | |
| "grad_norm": 3.3988146781921387, | |
| "learning_rate": 0.000115, | |
| "loss": 0.0874, | |
| "mean_token_accuracy": 0.9628378378378378, | |
| "num_tokens": 446213.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.082901554404145, | |
| "eval_loss": 0.054177138954401016, | |
| "eval_mean_token_accuracy": 1.0, | |
| "eval_num_tokens": 446213.0, | |
| "eval_runtime": 0.1261, | |
| "eval_samples_per_second": 7.932, | |
| "eval_steps_per_second": 7.932, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.290155440414508, | |
| "grad_norm": 4.433862686157227, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 0.0692, | |
| "mean_token_accuracy": 0.978125, | |
| "num_tokens": 469234.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.4974093264248705, | |
| "grad_norm": 18.63327980041504, | |
| "learning_rate": 6.5e-05, | |
| "loss": 0.0986, | |
| "mean_token_accuracy": 0.9625, | |
| "num_tokens": 492700.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.704663212435233, | |
| "grad_norm": 2.435386896133423, | |
| "learning_rate": 3.9999999999999996e-05, | |
| "loss": 0.0937, | |
| "mean_token_accuracy": 0.9625, | |
| "num_tokens": 514671.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.9119170984455955, | |
| "grad_norm": 14.27245807647705, | |
| "learning_rate": 1.4999999999999999e-05, | |
| "loss": 0.0743, | |
| "mean_token_accuracy": 0.971875, | |
| "num_tokens": 537299.0, | |
| "step": 240 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 245, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2072339005440000.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |