| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.7647058823529411, |
| "eval_steps": 500, |
| "global_step": 60, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.029411764705882353, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2e-05, |
| "loss": 2.0227, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 0.8125, |
| "learning_rate": 4e-05, |
| "loss": 1.6615, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.08823529411764706, |
| "grad_norm": 0.90625, |
| "learning_rate": 6e-05, |
| "loss": 1.9129, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 1.296875, |
| "learning_rate": 8e-05, |
| "loss": 2.2892, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.14705882352941177, |
| "grad_norm": 1.2578125, |
| "learning_rate": 0.0001, |
| "loss": 2.0247, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 0.98828125, |
| "learning_rate": 0.00012, |
| "loss": 1.8962, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.20588235294117646, |
| "grad_norm": 1.0546875, |
| "learning_rate": 0.00014, |
| "loss": 1.703, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 0.9140625, |
| "learning_rate": 0.00016, |
| "loss": 1.5534, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.2647058823529412, |
| "grad_norm": 0.93359375, |
| "learning_rate": 0.00018, |
| "loss": 1.4303, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 0.83203125, |
| "learning_rate": 0.0002, |
| "loss": 1.3701, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3235294117647059, |
| "grad_norm": 1.0703125, |
| "learning_rate": 0.00019985334138511237, |
| "loss": 1.3449, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 0.9765625, |
| "learning_rate": 0.00019941379571543596, |
| "loss": 1.3191, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.38235294117647056, |
| "grad_norm": 0.94140625, |
| "learning_rate": 0.00019868265225415265, |
| "loss": 1.0243, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 0.91015625, |
| "learning_rate": 0.00019766205557100868, |
| "loss": 1.0321, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.4411764705882353, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.0001963549992519223, |
| "loss": 1.2083, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 1.140625, |
| "learning_rate": 0.00019476531711828027, |
| "loss": 1.2976, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.125, |
| "learning_rate": 0.00019289767198167916, |
| "loss": 1.0929, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 1.1015625, |
| "learning_rate": 0.00019075754196709572, |
| "loss": 0.9081, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5588235294117647, |
| "grad_norm": 1.15625, |
| "learning_rate": 0.0001883512044446023, |
| "loss": 0.9417, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.90234375, |
| "learning_rate": 0.00018568571761675893, |
| "loss": 0.9065, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6176470588235294, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.00018276889981568906, |
| "loss": 0.9419, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 1.109375, |
| "learning_rate": 0.00017960930657056438, |
| "loss": 0.9199, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.6764705882352942, |
| "grad_norm": 1.15625, |
| "learning_rate": 0.00017621620551276366, |
| "loss": 0.9139, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 1.046875, |
| "learning_rate": 0.0001725995491923131, |
| "loss": 0.7512, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.7352941176470589, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.00016876994588534234, |
| "loss": 0.7435, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 0.95703125, |
| "learning_rate": 0.00016473862847818277, |
| "loss": 0.7502, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.7941176470588235, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00016051742151937655, |
| "loss": 0.956, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 1.0390625, |
| "learning_rate": 0.00015611870653623825, |
| "loss": 0.8827, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8529411764705882, |
| "grad_norm": 0.94140625, |
| "learning_rate": 0.00015155538571770218, |
| "loss": 0.7322, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 1.0078125, |
| "learning_rate": 0.00014684084406997903, |
| "loss": 0.7407, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.9117647058823529, |
| "grad_norm": 0.89453125, |
| "learning_rate": 0.00014198891015602646, |
| "loss": 0.8105, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00013701381553399145, |
| "loss": 0.9126, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.9705882352941176, |
| "grad_norm": 1.1953125, |
| "learning_rate": 0.000131930153013598, |
| "loss": 0.9439, |
| "step": 33 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.1484375, |
| "learning_rate": 0.00012675283385292212, |
| "loss": 0.7298, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.0294117647058822, |
| "grad_norm": 0.87890625, |
| "learning_rate": 0.00012149704402110243, |
| "loss": 0.4975, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.0588235294117647, |
| "grad_norm": 0.84765625, |
| "learning_rate": 0.0001161781996552765, |
| "loss": 0.8089, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.088235294117647, |
| "grad_norm": 0.83203125, |
| "learning_rate": 0.00011081190184239419, |
| "loss": 0.6845, |
| "step": 37 |
| }, |
| { |
| "epoch": 1.1176470588235294, |
| "grad_norm": 0.8515625, |
| "learning_rate": 0.00010541389085854176, |
| "loss": 0.5513, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.1470588235294117, |
| "grad_norm": 0.76953125, |
| "learning_rate": 0.0001, |
| "loss": 0.6271, |
| "step": 39 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 0.71484375, |
| "learning_rate": 9.458610914145826e-05, |
| "loss": 0.5144, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.2058823529411764, |
| "grad_norm": 0.87890625, |
| "learning_rate": 8.918809815760585e-05, |
| "loss": 0.7141, |
| "step": 41 |
| }, |
| { |
| "epoch": 1.2352941176470589, |
| "grad_norm": 0.91796875, |
| "learning_rate": 8.382180034472353e-05, |
| "loss": 0.6109, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.2647058823529411, |
| "grad_norm": 0.921875, |
| "learning_rate": 7.85029559788976e-05, |
| "loss": 0.6952, |
| "step": 43 |
| }, |
| { |
| "epoch": 1.2941176470588236, |
| "grad_norm": 0.77734375, |
| "learning_rate": 7.324716614707793e-05, |
| "loss": 0.6302, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.3235294117647058, |
| "grad_norm": 0.734375, |
| "learning_rate": 6.806984698640202e-05, |
| "loss": 0.4617, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.3529411764705883, |
| "grad_norm": 0.9921875, |
| "learning_rate": 6.298618446600856e-05, |
| "loss": 0.4288, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.3823529411764706, |
| "grad_norm": 0.859375, |
| "learning_rate": 5.801108984397354e-05, |
| "loss": 0.5413, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.4117647058823528, |
| "grad_norm": 0.83203125, |
| "learning_rate": 5.3159155930021e-05, |
| "loss": 0.5586, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.4411764705882353, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.844461428229782e-05, |
| "loss": 0.7129, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 0.73828125, |
| "learning_rate": 4.388129346376178e-05, |
| "loss": 0.5006, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.88671875, |
| "learning_rate": 3.948257848062351e-05, |
| "loss": 0.6091, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.5294117647058822, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.5261371521817244e-05, |
| "loss": 0.5783, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.5588235294117647, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.123005411465766e-05, |
| "loss": 0.817, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.5882352941176472, |
| "grad_norm": 0.88671875, |
| "learning_rate": 2.7400450807686938e-05, |
| "loss": 0.632, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.6176470588235294, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.3783794487236365e-05, |
| "loss": 0.6843, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.6470588235294117, |
| "grad_norm": 0.76953125, |
| "learning_rate": 2.0390693429435627e-05, |
| "loss": 0.448, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.6764705882352942, |
| "grad_norm": 0.7578125, |
| "learning_rate": 1.7231100184310956e-05, |
| "loss": 0.4553, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 0.89453125, |
| "learning_rate": 1.4314282383241096e-05, |
| "loss": 0.3936, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.7352941176470589, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.1648795555397719e-05, |
| "loss": 0.6992, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 0.98046875, |
| "learning_rate": 9.242458032904311e-06, |
| "loss": 0.6984, |
| "step": 60 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 68, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 12, |
| "total_flos": 1.776820044127273e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|