| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 680, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.014713996689350745, | |
| "grad_norm": 1.3101017475128174, | |
| "learning_rate": 9.86764705882353e-05, | |
| "loss": 2.9497, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02942799337870149, | |
| "grad_norm": 1.7666537761688232, | |
| "learning_rate": 9.720588235294117e-05, | |
| "loss": 2.5707, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.044141990068052236, | |
| "grad_norm": 1.4447238445281982, | |
| "learning_rate": 9.573529411764707e-05, | |
| "loss": 2.281, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05885598675740298, | |
| "grad_norm": 1.039144515991211, | |
| "learning_rate": 9.426470588235294e-05, | |
| "loss": 2.3904, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07356998344675372, | |
| "grad_norm": 1.2274360656738281, | |
| "learning_rate": 9.279411764705884e-05, | |
| "loss": 2.5839, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08828398013610447, | |
| "grad_norm": 1.1105211973190308, | |
| "learning_rate": 9.13235294117647e-05, | |
| "loss": 2.2987, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.10299797682545521, | |
| "grad_norm": 1.2184361219406128, | |
| "learning_rate": 8.98529411764706e-05, | |
| "loss": 2.5504, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.11771197351480596, | |
| "grad_norm": 1.1057401895523071, | |
| "learning_rate": 8.838235294117647e-05, | |
| "loss": 2.4922, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1324259702041567, | |
| "grad_norm": 1.1191836595535278, | |
| "learning_rate": 8.691176470588237e-05, | |
| "loss": 2.6277, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.14713996689350745, | |
| "grad_norm": 1.2518597841262817, | |
| "learning_rate": 8.544117647058823e-05, | |
| "loss": 2.1856, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1618539635828582, | |
| "grad_norm": 1.2593870162963867, | |
| "learning_rate": 8.397058823529412e-05, | |
| "loss": 2.39, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.17656796027220895, | |
| "grad_norm": 1.5582458972930908, | |
| "learning_rate": 8.25e-05, | |
| "loss": 2.2159, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1912819569615597, | |
| "grad_norm": 1.1720651388168335, | |
| "learning_rate": 8.102941176470588e-05, | |
| "loss": 2.5332, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.20599595365091042, | |
| "grad_norm": 1.3537250757217407, | |
| "learning_rate": 7.955882352941176e-05, | |
| "loss": 2.549, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.22070995034026117, | |
| "grad_norm": 1.440487265586853, | |
| "learning_rate": 7.808823529411765e-05, | |
| "loss": 2.6141, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.23542394702961192, | |
| "grad_norm": 1.2468889951705933, | |
| "learning_rate": 7.661764705882354e-05, | |
| "loss": 2.1802, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.25013794371896264, | |
| "grad_norm": 1.6108534336090088, | |
| "learning_rate": 7.514705882352941e-05, | |
| "loss": 2.0655, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2648519404083134, | |
| "grad_norm": 1.454962968826294, | |
| "learning_rate": 7.367647058823531e-05, | |
| "loss": 2.3067, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.27956593709766414, | |
| "grad_norm": 1.4071511030197144, | |
| "learning_rate": 7.220588235294118e-05, | |
| "loss": 2.378, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2942799337870149, | |
| "grad_norm": 1.7635990381240845, | |
| "learning_rate": 7.073529411764707e-05, | |
| "loss": 2.6275, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.30899393047636564, | |
| "grad_norm": 1.4291043281555176, | |
| "learning_rate": 6.926470588235294e-05, | |
| "loss": 1.8871, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3237079271657164, | |
| "grad_norm": 1.5699518918991089, | |
| "learning_rate": 6.779411764705882e-05, | |
| "loss": 2.3781, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.33842192385506714, | |
| "grad_norm": 1.5319325923919678, | |
| "learning_rate": 6.632352941176471e-05, | |
| "loss": 2.1488, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3531359205444179, | |
| "grad_norm": 1.505298376083374, | |
| "learning_rate": 6.485294117647059e-05, | |
| "loss": 2.6991, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.36784991723376864, | |
| "grad_norm": 2.733734607696533, | |
| "learning_rate": 6.338235294117647e-05, | |
| "loss": 2.1978, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3825639139231194, | |
| "grad_norm": 1.6058900356292725, | |
| "learning_rate": 6.191176470588235e-05, | |
| "loss": 2.1871, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3972779106124701, | |
| "grad_norm": 2.072322130203247, | |
| "learning_rate": 6.044117647058824e-05, | |
| "loss": 2.5621, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.41199190730182084, | |
| "grad_norm": 1.765065312385559, | |
| "learning_rate": 5.897058823529412e-05, | |
| "loss": 2.1595, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4267059039911716, | |
| "grad_norm": 1.5139451026916504, | |
| "learning_rate": 5.7499999999999995e-05, | |
| "loss": 2.2917, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.44141990068052234, | |
| "grad_norm": 1.7041040658950806, | |
| "learning_rate": 5.6029411764705884e-05, | |
| "loss": 2.4275, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4561338973698731, | |
| "grad_norm": 1.5826152563095093, | |
| "learning_rate": 5.455882352941176e-05, | |
| "loss": 2.5011, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.47084789405922384, | |
| "grad_norm": 1.692821979522705, | |
| "learning_rate": 5.308823529411765e-05, | |
| "loss": 2.3798, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4855618907485746, | |
| "grad_norm": 1.8263252973556519, | |
| "learning_rate": 5.161764705882354e-05, | |
| "loss": 2.6645, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5002758874379253, | |
| "grad_norm": 1.7093111276626587, | |
| "learning_rate": 5.0147058823529414e-05, | |
| "loss": 2.4835, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5149898841272761, | |
| "grad_norm": 1.7787758111953735, | |
| "learning_rate": 4.86764705882353e-05, | |
| "loss": 2.3779, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5297038808166268, | |
| "grad_norm": 1.7382874488830566, | |
| "learning_rate": 4.720588235294118e-05, | |
| "loss": 2.3799, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5444178775059776, | |
| "grad_norm": 1.797096848487854, | |
| "learning_rate": 4.573529411764706e-05, | |
| "loss": 2.2148, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5591318741953283, | |
| "grad_norm": 1.781445860862732, | |
| "learning_rate": 4.4264705882352944e-05, | |
| "loss": 2.0308, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5738458708846791, | |
| "grad_norm": 2.1444976329803467, | |
| "learning_rate": 4.2794117647058827e-05, | |
| "loss": 2.2169, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5885598675740298, | |
| "grad_norm": 1.8745927810668945, | |
| "learning_rate": 4.13235294117647e-05, | |
| "loss": 1.9554, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6032738642633806, | |
| "grad_norm": 1.5899605751037598, | |
| "learning_rate": 3.985294117647059e-05, | |
| "loss": 2.4028, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6179878609527313, | |
| "grad_norm": 1.9187322854995728, | |
| "learning_rate": 3.8382352941176474e-05, | |
| "loss": 2.2514, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.632701857642082, | |
| "grad_norm": 1.5589262247085571, | |
| "learning_rate": 3.6911764705882356e-05, | |
| "loss": 2.343, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6474158543314328, | |
| "grad_norm": 1.5984998941421509, | |
| "learning_rate": 3.544117647058824e-05, | |
| "loss": 1.9433, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6621298510207835, | |
| "grad_norm": 1.9034677743911743, | |
| "learning_rate": 3.397058823529412e-05, | |
| "loss": 2.2025, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6768438477101343, | |
| "grad_norm": 1.7389880418777466, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 2.3835, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.691557844399485, | |
| "grad_norm": 1.8547667264938354, | |
| "learning_rate": 3.1029411764705886e-05, | |
| "loss": 2.2641, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7062718410888358, | |
| "grad_norm": 1.6838593482971191, | |
| "learning_rate": 2.9558823529411766e-05, | |
| "loss": 1.8679, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7209858377781865, | |
| "grad_norm": 2.003615617752075, | |
| "learning_rate": 2.8088235294117648e-05, | |
| "loss": 2.2402, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7356998344675373, | |
| "grad_norm": 1.7946232557296753, | |
| "learning_rate": 2.661764705882353e-05, | |
| "loss": 1.9735, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.750413831156888, | |
| "grad_norm": 1.9260705709457397, | |
| "learning_rate": 2.5147058823529413e-05, | |
| "loss": 2.3836, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7651278278462388, | |
| "grad_norm": 1.3813334703445435, | |
| "learning_rate": 2.3676470588235295e-05, | |
| "loss": 2.4856, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7798418245355895, | |
| "grad_norm": 1.6835263967514038, | |
| "learning_rate": 2.2205882352941178e-05, | |
| "loss": 2.3702, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7945558212249402, | |
| "grad_norm": 2.234776496887207, | |
| "learning_rate": 2.073529411764706e-05, | |
| "loss": 2.6312, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.809269817914291, | |
| "grad_norm": 1.4896553754806519, | |
| "learning_rate": 1.9264705882352943e-05, | |
| "loss": 2.2481, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8239838146036417, | |
| "grad_norm": 1.92662513256073, | |
| "learning_rate": 1.7794117647058825e-05, | |
| "loss": 1.9659, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8386978112929925, | |
| "grad_norm": 1.4436355829238892, | |
| "learning_rate": 1.6323529411764708e-05, | |
| "loss": 2.3563, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8534118079823432, | |
| "grad_norm": 1.8207820653915405, | |
| "learning_rate": 1.4852941176470589e-05, | |
| "loss": 2.0522, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.868125804671694, | |
| "grad_norm": 1.8331910371780396, | |
| "learning_rate": 1.3382352941176471e-05, | |
| "loss": 2.4762, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8828398013610447, | |
| "grad_norm": 1.7300273180007935, | |
| "learning_rate": 1.1911764705882354e-05, | |
| "loss": 2.4929, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8975537980503955, | |
| "grad_norm": 1.7423087358474731, | |
| "learning_rate": 1.0441176470588236e-05, | |
| "loss": 2.065, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.9122677947397462, | |
| "grad_norm": 1.853888988494873, | |
| "learning_rate": 8.970588235294119e-06, | |
| "loss": 2.3043, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.926981791429097, | |
| "grad_norm": 1.6552138328552246, | |
| "learning_rate": 7.5e-06, | |
| "loss": 2.6556, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9416957881184477, | |
| "grad_norm": 1.6745601892471313, | |
| "learning_rate": 6.029411764705883e-06, | |
| "loss": 2.3833, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9564097848077984, | |
| "grad_norm": 1.6154053211212158, | |
| "learning_rate": 4.558823529411764e-06, | |
| "loss": 2.4859, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9711237814971492, | |
| "grad_norm": 1.7248449325561523, | |
| "learning_rate": 3.0882352941176472e-06, | |
| "loss": 2.4022, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9858377781864999, | |
| "grad_norm": 1.7172226905822754, | |
| "learning_rate": 1.6176470588235297e-06, | |
| "loss": 2.5658, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.6853206157684326, | |
| "learning_rate": 1.4705882352941178e-07, | |
| "loss": 2.1013, | |
| "step": 680 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 680, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.941123206453658e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |