| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 680, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.014713996689350745, |
| "grad_norm": 1.3101017475128174, |
| "learning_rate": 9.86764705882353e-05, |
| "loss": 2.9497, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02942799337870149, |
| "grad_norm": 1.7666537761688232, |
| "learning_rate": 9.720588235294117e-05, |
| "loss": 2.5707, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.044141990068052236, |
| "grad_norm": 1.4447238445281982, |
| "learning_rate": 9.573529411764707e-05, |
| "loss": 2.281, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05885598675740298, |
| "grad_norm": 1.039144515991211, |
| "learning_rate": 9.426470588235294e-05, |
| "loss": 2.3904, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07356998344675372, |
| "grad_norm": 1.2274360656738281, |
| "learning_rate": 9.279411764705884e-05, |
| "loss": 2.5839, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08828398013610447, |
| "grad_norm": 1.1105211973190308, |
| "learning_rate": 9.13235294117647e-05, |
| "loss": 2.2987, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10299797682545521, |
| "grad_norm": 1.2184361219406128, |
| "learning_rate": 8.98529411764706e-05, |
| "loss": 2.5504, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.11771197351480596, |
| "grad_norm": 1.1057401895523071, |
| "learning_rate": 8.838235294117647e-05, |
| "loss": 2.4922, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1324259702041567, |
| "grad_norm": 1.1191836595535278, |
| "learning_rate": 8.691176470588237e-05, |
| "loss": 2.6277, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.14713996689350745, |
| "grad_norm": 1.2518597841262817, |
| "learning_rate": 8.544117647058823e-05, |
| "loss": 2.1856, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1618539635828582, |
| "grad_norm": 1.2593870162963867, |
| "learning_rate": 8.397058823529412e-05, |
| "loss": 2.39, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.17656796027220895, |
| "grad_norm": 1.5582458972930908, |
| "learning_rate": 8.25e-05, |
| "loss": 2.2159, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1912819569615597, |
| "grad_norm": 1.1720651388168335, |
| "learning_rate": 8.102941176470588e-05, |
| "loss": 2.5332, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.20599595365091042, |
| "grad_norm": 1.3537250757217407, |
| "learning_rate": 7.955882352941176e-05, |
| "loss": 2.549, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.22070995034026117, |
| "grad_norm": 1.440487265586853, |
| "learning_rate": 7.808823529411765e-05, |
| "loss": 2.6141, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.23542394702961192, |
| "grad_norm": 1.2468889951705933, |
| "learning_rate": 7.661764705882354e-05, |
| "loss": 2.1802, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.25013794371896264, |
| "grad_norm": 1.6108534336090088, |
| "learning_rate": 7.514705882352941e-05, |
| "loss": 2.0655, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2648519404083134, |
| "grad_norm": 1.454962968826294, |
| "learning_rate": 7.367647058823531e-05, |
| "loss": 2.3067, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.27956593709766414, |
| "grad_norm": 1.4071511030197144, |
| "learning_rate": 7.220588235294118e-05, |
| "loss": 2.378, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2942799337870149, |
| "grad_norm": 1.7635990381240845, |
| "learning_rate": 7.073529411764707e-05, |
| "loss": 2.6275, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.30899393047636564, |
| "grad_norm": 1.4291043281555176, |
| "learning_rate": 6.926470588235294e-05, |
| "loss": 1.8871, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3237079271657164, |
| "grad_norm": 1.5699518918991089, |
| "learning_rate": 6.779411764705882e-05, |
| "loss": 2.3781, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.33842192385506714, |
| "grad_norm": 1.5319325923919678, |
| "learning_rate": 6.632352941176471e-05, |
| "loss": 2.1488, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3531359205444179, |
| "grad_norm": 1.505298376083374, |
| "learning_rate": 6.485294117647059e-05, |
| "loss": 2.6991, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.36784991723376864, |
| "grad_norm": 2.733734607696533, |
| "learning_rate": 6.338235294117647e-05, |
| "loss": 2.1978, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3825639139231194, |
| "grad_norm": 1.6058900356292725, |
| "learning_rate": 6.191176470588235e-05, |
| "loss": 2.1871, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3972779106124701, |
| "grad_norm": 2.072322130203247, |
| "learning_rate": 6.044117647058824e-05, |
| "loss": 2.5621, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.41199190730182084, |
| "grad_norm": 1.765065312385559, |
| "learning_rate": 5.897058823529412e-05, |
| "loss": 2.1595, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4267059039911716, |
| "grad_norm": 1.5139451026916504, |
| "learning_rate": 5.7499999999999995e-05, |
| "loss": 2.2917, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.44141990068052234, |
| "grad_norm": 1.7041040658950806, |
| "learning_rate": 5.6029411764705884e-05, |
| "loss": 2.4275, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4561338973698731, |
| "grad_norm": 1.5826152563095093, |
| "learning_rate": 5.455882352941176e-05, |
| "loss": 2.5011, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.47084789405922384, |
| "grad_norm": 1.692821979522705, |
| "learning_rate": 5.308823529411765e-05, |
| "loss": 2.3798, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4855618907485746, |
| "grad_norm": 1.8263252973556519, |
| "learning_rate": 5.161764705882354e-05, |
| "loss": 2.6645, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5002758874379253, |
| "grad_norm": 1.7093111276626587, |
| "learning_rate": 5.0147058823529414e-05, |
| "loss": 2.4835, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5149898841272761, |
| "grad_norm": 1.7787758111953735, |
| "learning_rate": 4.86764705882353e-05, |
| "loss": 2.3779, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5297038808166268, |
| "grad_norm": 1.7382874488830566, |
| "learning_rate": 4.720588235294118e-05, |
| "loss": 2.3799, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5444178775059776, |
| "grad_norm": 1.797096848487854, |
| "learning_rate": 4.573529411764706e-05, |
| "loss": 2.2148, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5591318741953283, |
| "grad_norm": 1.781445860862732, |
| "learning_rate": 4.4264705882352944e-05, |
| "loss": 2.0308, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5738458708846791, |
| "grad_norm": 2.1444976329803467, |
| "learning_rate": 4.2794117647058827e-05, |
| "loss": 2.2169, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5885598675740298, |
| "grad_norm": 1.8745927810668945, |
| "learning_rate": 4.13235294117647e-05, |
| "loss": 1.9554, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6032738642633806, |
| "grad_norm": 1.5899605751037598, |
| "learning_rate": 3.985294117647059e-05, |
| "loss": 2.4028, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6179878609527313, |
| "grad_norm": 1.9187322854995728, |
| "learning_rate": 3.8382352941176474e-05, |
| "loss": 2.2514, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.632701857642082, |
| "grad_norm": 1.5589262247085571, |
| "learning_rate": 3.6911764705882356e-05, |
| "loss": 2.343, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6474158543314328, |
| "grad_norm": 1.5984998941421509, |
| "learning_rate": 3.544117647058824e-05, |
| "loss": 1.9433, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6621298510207835, |
| "grad_norm": 1.9034677743911743, |
| "learning_rate": 3.397058823529412e-05, |
| "loss": 2.2025, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6768438477101343, |
| "grad_norm": 1.7389880418777466, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 2.3835, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.691557844399485, |
| "grad_norm": 1.8547667264938354, |
| "learning_rate": 3.1029411764705886e-05, |
| "loss": 2.2641, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7062718410888358, |
| "grad_norm": 1.6838593482971191, |
| "learning_rate": 2.9558823529411766e-05, |
| "loss": 1.8679, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7209858377781865, |
| "grad_norm": 2.003615617752075, |
| "learning_rate": 2.8088235294117648e-05, |
| "loss": 2.2402, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7356998344675373, |
| "grad_norm": 1.7946232557296753, |
| "learning_rate": 2.661764705882353e-05, |
| "loss": 1.9735, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.750413831156888, |
| "grad_norm": 1.9260705709457397, |
| "learning_rate": 2.5147058823529413e-05, |
| "loss": 2.3836, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7651278278462388, |
| "grad_norm": 1.3813334703445435, |
| "learning_rate": 2.3676470588235295e-05, |
| "loss": 2.4856, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7798418245355895, |
| "grad_norm": 1.6835263967514038, |
| "learning_rate": 2.2205882352941178e-05, |
| "loss": 2.3702, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7945558212249402, |
| "grad_norm": 2.234776496887207, |
| "learning_rate": 2.073529411764706e-05, |
| "loss": 2.6312, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.809269817914291, |
| "grad_norm": 1.4896553754806519, |
| "learning_rate": 1.9264705882352943e-05, |
| "loss": 2.2481, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8239838146036417, |
| "grad_norm": 1.92662513256073, |
| "learning_rate": 1.7794117647058825e-05, |
| "loss": 1.9659, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8386978112929925, |
| "grad_norm": 1.4436355829238892, |
| "learning_rate": 1.6323529411764708e-05, |
| "loss": 2.3563, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8534118079823432, |
| "grad_norm": 1.8207820653915405, |
| "learning_rate": 1.4852941176470589e-05, |
| "loss": 2.0522, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.868125804671694, |
| "grad_norm": 1.8331910371780396, |
| "learning_rate": 1.3382352941176471e-05, |
| "loss": 2.4762, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8828398013610447, |
| "grad_norm": 1.7300273180007935, |
| "learning_rate": 1.1911764705882354e-05, |
| "loss": 2.4929, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8975537980503955, |
| "grad_norm": 1.7423087358474731, |
| "learning_rate": 1.0441176470588236e-05, |
| "loss": 2.065, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.9122677947397462, |
| "grad_norm": 1.853888988494873, |
| "learning_rate": 8.970588235294119e-06, |
| "loss": 2.3043, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.926981791429097, |
| "grad_norm": 1.6552138328552246, |
| "learning_rate": 7.5e-06, |
| "loss": 2.6556, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9416957881184477, |
| "grad_norm": 1.6745601892471313, |
| "learning_rate": 6.029411764705883e-06, |
| "loss": 2.3833, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.9564097848077984, |
| "grad_norm": 1.6154053211212158, |
| "learning_rate": 4.558823529411764e-06, |
| "loss": 2.4859, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9711237814971492, |
| "grad_norm": 1.7248449325561523, |
| "learning_rate": 3.0882352941176472e-06, |
| "loss": 2.4022, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9858377781864999, |
| "grad_norm": 1.7172226905822754, |
| "learning_rate": 1.6176470588235297e-06, |
| "loss": 2.5658, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.6853206157684326, |
| "learning_rate": 1.4705882352941178e-07, |
| "loss": 2.1013, |
| "step": 680 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 680, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.941123206453658e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|