| { |
| "best_global_step": 175, |
| "best_metric": 0.22990073263645172, |
| "best_model_checkpoint": "saves_multiple/prefix-tuning/llama-3-8b-instruct/train_cb_42_1760637133/checkpoint-175", |
| "epoch": 5.0, |
| "eval_steps": 25, |
| "global_step": 250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1, |
| "grad_norm": 147.09878540039062, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 7.3911, |
| "num_input_tokens_seen": 2720, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 201.96444702148438, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 6.0584, |
| "num_input_tokens_seen": 5536, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 169.54861450195312, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 4.1152, |
| "num_input_tokens_seen": 8480, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 46.49274826049805, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 2.3628, |
| "num_input_tokens_seen": 11776, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 27.129613876342773, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 1.1162, |
| "num_input_tokens_seen": 14720, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_loss": 0.7037240266799927, |
| "eval_runtime": 1.2436, |
| "eval_samples_per_second": 40.204, |
| "eval_steps_per_second": 10.453, |
| "num_input_tokens_seen": 14720, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 48.88370895385742, |
| "learning_rate": 9.992203820909906e-06, |
| "loss": 0.5016, |
| "num_input_tokens_seen": 18560, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 26.719514846801758, |
| "learning_rate": 9.960573506572391e-06, |
| "loss": 0.2915, |
| "num_input_tokens_seen": 21824, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 9.22452449798584, |
| "learning_rate": 9.904775776745959e-06, |
| "loss": 0.2798, |
| "num_input_tokens_seen": 24288, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 22.899555206298828, |
| "learning_rate": 9.825082472361558e-06, |
| "loss": 0.3252, |
| "num_input_tokens_seen": 27648, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 11.016155242919922, |
| "learning_rate": 9.721881851187406e-06, |
| "loss": 0.2568, |
| "num_input_tokens_seen": 31456, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.28914472460746765, |
| "eval_runtime": 1.2319, |
| "eval_samples_per_second": 40.588, |
| "eval_steps_per_second": 10.553, |
| "num_input_tokens_seen": 31456, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 9.274727821350098, |
| "learning_rate": 9.595676696276173e-06, |
| "loss": 0.24, |
| "num_input_tokens_seen": 34976, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 30.56814956665039, |
| "learning_rate": 9.44708186645649e-06, |
| "loss": 0.2301, |
| "num_input_tokens_seen": 37856, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 14.439095497131348, |
| "learning_rate": 9.276821300802535e-06, |
| "loss": 0.2214, |
| "num_input_tokens_seen": 41728, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 105.18769836425781, |
| "learning_rate": 9.085724491675642e-06, |
| "loss": 0.3427, |
| "num_input_tokens_seen": 44704, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 69.08840942382812, |
| "learning_rate": 8.874722443520898e-06, |
| "loss": 0.254, |
| "num_input_tokens_seen": 47168, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_loss": 0.2438250482082367, |
| "eval_runtime": 1.2262, |
| "eval_samples_per_second": 40.778, |
| "eval_steps_per_second": 10.602, |
| "num_input_tokens_seen": 47168, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 37.581912994384766, |
| "learning_rate": 8.644843137107058e-06, |
| "loss": 0.2904, |
| "num_input_tokens_seen": 51072, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 152.4672088623047, |
| "learning_rate": 8.397206521307584e-06, |
| "loss": 0.4201, |
| "num_input_tokens_seen": 54400, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 128.72598266601562, |
| "learning_rate": 8.133019056822303e-06, |
| "loss": 0.2121, |
| "num_input_tokens_seen": 56736, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 29.933185577392578, |
| "learning_rate": 7.85356783842216e-06, |
| "loss": 0.2648, |
| "num_input_tokens_seen": 60064, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 10.928328514099121, |
| "learning_rate": 7.560214324352858e-06, |
| "loss": 0.1647, |
| "num_input_tokens_seen": 63168, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.36001044511795044, |
| "eval_runtime": 1.235, |
| "eval_samples_per_second": 40.485, |
| "eval_steps_per_second": 10.526, |
| "num_input_tokens_seen": 63168, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 70.27168273925781, |
| "learning_rate": 7.254387703447154e-06, |
| "loss": 0.427, |
| "num_input_tokens_seen": 66464, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 61.89235305786133, |
| "learning_rate": 6.9375779322605154e-06, |
| "loss": 0.277, |
| "num_input_tokens_seen": 70112, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 8.1712646484375, |
| "learning_rate": 6.611328476152557e-06, |
| "loss": 0.1532, |
| "num_input_tokens_seen": 73376, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 31.11164665222168, |
| "learning_rate": 6.277228789678953e-06, |
| "loss": 0.174, |
| "num_input_tokens_seen": 76288, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 10.602115631103516, |
| "learning_rate": 5.936906572928625e-06, |
| "loss": 0.3223, |
| "num_input_tokens_seen": 79424, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_loss": 0.2356501966714859, |
| "eval_runtime": 1.2576, |
| "eval_samples_per_second": 39.757, |
| "eval_steps_per_second": 10.337, |
| "num_input_tokens_seen": 79424, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 8.446126937866211, |
| "learning_rate": 5.592019841532507e-06, |
| "loss": 0.1352, |
| "num_input_tokens_seen": 82880, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 55.46540832519531, |
| "learning_rate": 5.244248848978067e-06, |
| "loss": 0.2971, |
| "num_input_tokens_seen": 86560, |
| "step": 135 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 12.031166076660156, |
| "learning_rate": 4.895287900583216e-06, |
| "loss": 0.1537, |
| "num_input_tokens_seen": 89184, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 9.252001762390137, |
| "learning_rate": 4.546837099011101e-06, |
| "loss": 0.3131, |
| "num_input_tokens_seen": 92256, |
| "step": 145 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 35.95872497558594, |
| "learning_rate": 4.200594061540827e-06, |
| "loss": 0.2999, |
| "num_input_tokens_seen": 95168, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.2933087944984436, |
| "eval_runtime": 1.2189, |
| "eval_samples_per_second": 41.021, |
| "eval_steps_per_second": 10.666, |
| "num_input_tokens_seen": 95168, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 63.808555603027344, |
| "learning_rate": 3.8582456494467214e-06, |
| "loss": 0.2206, |
| "num_input_tokens_seen": 98528, |
| "step": 155 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 38.348934173583984, |
| "learning_rate": 3.521459749779769e-06, |
| "loss": 0.2139, |
| "num_input_tokens_seen": 101408, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.3, |
| "grad_norm": 116.32037353515625, |
| "learning_rate": 3.1918771495895395e-06, |
| "loss": 0.2016, |
| "num_input_tokens_seen": 104736, |
| "step": 165 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 56.04789352416992, |
| "learning_rate": 2.871103542174637e-06, |
| "loss": 0.2427, |
| "num_input_tokens_seen": 107840, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 35.397884368896484, |
| "learning_rate": 2.560701704306336e-06, |
| "loss": 0.244, |
| "num_input_tokens_seen": 111392, |
| "step": 175 |
| }, |
| { |
| "epoch": 3.5, |
| "eval_loss": 0.22990073263645172, |
| "eval_runtime": 1.2427, |
| "eval_samples_per_second": 40.235, |
| "eval_steps_per_second": 10.461, |
| "num_input_tokens_seen": 111392, |
| "step": 175 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 5.372102737426758, |
| "learning_rate": 2.2621838825372496e-06, |
| "loss": 0.1392, |
| "num_input_tokens_seen": 114720, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.7, |
| "grad_norm": 44.36689376831055, |
| "learning_rate": 1.977004425688126e-06, |
| "loss": 0.1194, |
| "num_input_tokens_seen": 117632, |
| "step": 185 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 32.87962341308594, |
| "learning_rate": 1.7065526994065973e-06, |
| "loss": 0.1453, |
| "num_input_tokens_seen": 120512, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.9, |
| "grad_norm": 15.784488677978516, |
| "learning_rate": 1.4521463173173966e-06, |
| "loss": 0.2925, |
| "num_input_tokens_seen": 124352, |
| "step": 195 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 8.471246719360352, |
| "learning_rate": 1.2150247217412186e-06, |
| "loss": 0.2501, |
| "num_input_tokens_seen": 127136, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.23893244564533234, |
| "eval_runtime": 1.2441, |
| "eval_samples_per_second": 40.189, |
| "eval_steps_per_second": 10.449, |
| "num_input_tokens_seen": 127136, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 31.993316650390625, |
| "learning_rate": 9.963431452563331e-07, |
| "loss": 0.1709, |
| "num_input_tokens_seen": 129664, |
| "step": 205 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 27.969955444335938, |
| "learning_rate": 7.971669825215789e-07, |
| "loss": 0.2556, |
| "num_input_tokens_seen": 132128, |
| "step": 210 |
| }, |
| { |
| "epoch": 4.3, |
| "grad_norm": 22.807920455932617, |
| "learning_rate": 6.184665997806832e-07, |
| "loss": 0.143, |
| "num_input_tokens_seen": 135200, |
| "step": 215 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 14.716069221496582, |
| "learning_rate": 4.6111260733545714e-07, |
| "loss": 0.2176, |
| "num_input_tokens_seen": 138368, |
| "step": 220 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 15.53546142578125, |
| "learning_rate": 3.258716180199278e-07, |
| "loss": 0.1977, |
| "num_input_tokens_seen": 141632, |
| "step": 225 |
| }, |
| { |
| "epoch": 4.5, |
| "eval_loss": 0.23097044229507446, |
| "eval_runtime": 1.2534, |
| "eval_samples_per_second": 39.89, |
| "eval_steps_per_second": 10.371, |
| "num_input_tokens_seen": 141632, |
| "step": 225 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 18.080829620361328, |
| "learning_rate": 2.134025123396638e-07, |
| "loss": 0.1109, |
| "num_input_tokens_seen": 144960, |
| "step": 230 |
| }, |
| { |
| "epoch": 4.7, |
| "grad_norm": 26.747798919677734, |
| "learning_rate": 1.2425322847218368e-07, |
| "loss": 0.1466, |
| "num_input_tokens_seen": 147936, |
| "step": 235 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 7.783914089202881, |
| "learning_rate": 5.8858092767236084e-08, |
| "loss": 0.1319, |
| "num_input_tokens_seen": 151392, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.9, |
| "grad_norm": 16.87287712097168, |
| "learning_rate": 1.753570375247815e-08, |
| "loss": 0.1445, |
| "num_input_tokens_seen": 154624, |
| "step": 245 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 15.167189598083496, |
| "learning_rate": 4.87379953478806e-10, |
| "loss": 0.1052, |
| "num_input_tokens_seen": 158656, |
| "step": 250 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.23779883980751038, |
| "eval_runtime": 1.2348, |
| "eval_samples_per_second": 40.491, |
| "eval_steps_per_second": 10.528, |
| "num_input_tokens_seen": 158656, |
| "step": 250 |
| }, |
| { |
| "epoch": 5.0, |
| "num_input_tokens_seen": 158656, |
| "step": 250, |
| "total_flos": 7144207972564992.0, |
| "train_loss": 0.6316961252689361, |
| "train_runtime": 79.306, |
| "train_samples_per_second": 12.609, |
| "train_steps_per_second": 3.152 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 250, |
| "num_input_tokens_seen": 158656, |
| "num_train_epochs": 5, |
| "save_steps": 25, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7144207972564992.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|