| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 833, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012005702708786673, |
| "grad_norm": 13.904192924499512, |
| "learning_rate": 1.0714285714285714e-06, |
| "loss": 2.0907, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.024011405417573347, |
| "grad_norm": 10.684447288513184, |
| "learning_rate": 2.261904761904762e-06, |
| "loss": 1.7351, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03601710812636002, |
| "grad_norm": 13.9859619140625, |
| "learning_rate": 3.4523809523809528e-06, |
| "loss": 1.9817, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.048022810835146694, |
| "grad_norm": 11.632477760314941, |
| "learning_rate": 4.642857142857144e-06, |
| "loss": 1.8382, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06002851354393337, |
| "grad_norm": 10.015055656433105, |
| "learning_rate": 5.833333333333334e-06, |
| "loss": 2.1399, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07203421625272004, |
| "grad_norm": 11.713295936584473, |
| "learning_rate": 7.023809523809524e-06, |
| "loss": 1.8619, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.08403991896150671, |
| "grad_norm": 9.750036239624023, |
| "learning_rate": 8.214285714285714e-06, |
| "loss": 1.9983, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.09604562167029339, |
| "grad_norm": 11.98967170715332, |
| "learning_rate": 9.404761904761905e-06, |
| "loss": 1.8564, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.10805132437908006, |
| "grad_norm": 7.032362461090088, |
| "learning_rate": 9.998900487402431e-06, |
| "loss": 1.8123, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.12005702708786674, |
| "grad_norm": 9.9423246383667, |
| "learning_rate": 9.99010728783628e-06, |
| "loss": 1.7612, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1320627297966534, |
| "grad_norm": 10.866604804992676, |
| "learning_rate": 9.972536356177037e-06, |
| "loss": 1.8318, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.14406843250544007, |
| "grad_norm": 10.427411079406738, |
| "learning_rate": 9.94621860016312e-06, |
| "loss": 1.9897, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.15607413521422675, |
| "grad_norm": 8.520286560058594, |
| "learning_rate": 9.911200313430767e-06, |
| "loss": 1.9808, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.16807983792301343, |
| "grad_norm": 12.630261421203613, |
| "learning_rate": 9.8675430940823e-06, |
| "loss": 1.9811, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1800855406318001, |
| "grad_norm": 9.4483060836792, |
| "learning_rate": 9.815323736333405e-06, |
| "loss": 2.0145, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.19209124334058678, |
| "grad_norm": 8.699251174926758, |
| "learning_rate": 9.754634095430062e-06, |
| "loss": 2.0973, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.20409694604937345, |
| "grad_norm": 9.745418548583984, |
| "learning_rate": 9.685580926072713e-06, |
| "loss": 2.1629, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.21610264875816013, |
| "grad_norm": 11.316314697265625, |
| "learning_rate": 9.608285694631884e-06, |
| "loss": 1.8444, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2281083514669468, |
| "grad_norm": 10.36503791809082, |
| "learning_rate": 9.522884365485599e-06, |
| "loss": 1.9405, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.24011405417573348, |
| "grad_norm": 11.719226837158203, |
| "learning_rate": 9.429527161854402e-06, |
| "loss": 1.8054, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.25211975688452015, |
| "grad_norm": 7.179232120513916, |
| "learning_rate": 9.328378301554698e-06, |
| "loss": 2.1386, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2641254595933068, |
| "grad_norm": 9.562182426452637, |
| "learning_rate": 9.219615708135226e-06, |
| "loss": 1.8712, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2761311623020935, |
| "grad_norm": 12.569293022155762, |
| "learning_rate": 9.103430697904776e-06, |
| "loss": 1.9458, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.28813686501088015, |
| "grad_norm": 8.965167045593262, |
| "learning_rate": 8.980027643401694e-06, |
| "loss": 1.9557, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.30014256771966685, |
| "grad_norm": 9.079051971435547, |
| "learning_rate": 8.849623613897126e-06, |
| "loss": 1.8958, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3121482704284535, |
| "grad_norm": 12.004711151123047, |
| "learning_rate": 8.712447993564362e-06, |
| "loss": 2.0377, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3241539731372402, |
| "grad_norm": 12.883252143859863, |
| "learning_rate": 8.568742077985945e-06, |
| "loss": 1.9571, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.33615967584602685, |
| "grad_norm": 10.449505805969238, |
| "learning_rate": 8.418758649708299e-06, |
| "loss": 1.8825, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.34816537855481355, |
| "grad_norm": 9.432299613952637, |
| "learning_rate": 8.262761533590468e-06, |
| "loss": 1.8399, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3601710812636002, |
| "grad_norm": 7.1271185874938965, |
| "learning_rate": 8.101025132729139e-06, |
| "loss": 2.0495, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3721767839723869, |
| "grad_norm": 10.691527366638184, |
| "learning_rate": 7.933833945776257e-06, |
| "loss": 2.1008, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.38418248668117355, |
| "grad_norm": 9.862756729125977, |
| "learning_rate": 7.761482066498298e-06, |
| "loss": 2.1887, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.39618818938996025, |
| "grad_norm": 8.47981071472168, |
| "learning_rate": 7.584272666457471e-06, |
| "loss": 2.0397, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4081938920987469, |
| "grad_norm": 10.607376098632812, |
| "learning_rate": 7.402517461724839e-06, |
| "loss": 2.044, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.4201995948075336, |
| "grad_norm": 10.644391059875488, |
| "learning_rate": 7.2165361645634105e-06, |
| "loss": 1.943, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.43220529751632025, |
| "grad_norm": 10.169779777526855, |
| "learning_rate": 7.026655921045736e-06, |
| "loss": 1.9137, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4442110002251069, |
| "grad_norm": 8.738324165344238, |
| "learning_rate": 6.833210735595215e-06, |
| "loss": 2.0836, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4562167029338936, |
| "grad_norm": 11.044781684875488, |
| "learning_rate": 6.636540883463385e-06, |
| "loss": 1.8944, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.46822240564268025, |
| "grad_norm": 10.515331268310547, |
| "learning_rate": 6.436992312176669e-06, |
| "loss": 1.8942, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.48022810835146695, |
| "grad_norm": 8.095483779907227, |
| "learning_rate": 6.234916033005421e-06, |
| "loss": 2.1919, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4922338110602536, |
| "grad_norm": 11.837594032287598, |
| "learning_rate": 6.030667503525726e-06, |
| "loss": 2.1104, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5042395137690403, |
| "grad_norm": 10.554974555969238, |
| "learning_rate": 5.82460600236002e-06, |
| "loss": 2.0049, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.516245216477827, |
| "grad_norm": 8.677346229553223, |
| "learning_rate": 5.617093997196392e-06, |
| "loss": 2.0641, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.5282509191866136, |
| "grad_norm": 9.323301315307617, |
| "learning_rate": 5.408496507198229e-06, |
| "loss": 1.888, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5402566218954004, |
| "grad_norm": 10.618653297424316, |
| "learning_rate": 5.199180460925757e-06, |
| "loss": 1.8445, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.552262324604187, |
| "grad_norm": 12.810103416442871, |
| "learning_rate": 4.989514050898893e-06, |
| "loss": 1.8746, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5642680273129737, |
| "grad_norm": 9.233360290527344, |
| "learning_rate": 4.779866085936762e-06, |
| "loss": 1.9861, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5762737300217603, |
| "grad_norm": 10.848196983337402, |
| "learning_rate": 4.5706053424131285e-06, |
| "loss": 1.9709, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5882794327305471, |
| "grad_norm": 11.675863265991211, |
| "learning_rate": 4.362099915568894e-06, |
| "loss": 2.0604, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6002851354393337, |
| "grad_norm": 10.185128211975098, |
| "learning_rate": 4.15471657202274e-06, |
| "loss": 1.9789, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6002851354393337, |
| "eval_loss": 1.6066299676895142, |
| "eval_runtime": 41.652, |
| "eval_samples_per_second": 35.557, |
| "eval_steps_per_second": 35.557, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6122908381481204, |
| "grad_norm": 11.495535850524902, |
| "learning_rate": 3.948820104618828e-06, |
| "loss": 1.9084, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.624296540856907, |
| "grad_norm": 9.106337547302246, |
| "learning_rate": 3.744772690746448e-06, |
| "loss": 1.9345, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6363022435656938, |
| "grad_norm": 10.831775665283203, |
| "learning_rate": 3.542933255260309e-06, |
| "loss": 2.0163, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6483079462744804, |
| "grad_norm": 9.374225616455078, |
| "learning_rate": 3.3436568391221215e-06, |
| "loss": 2.0138, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.660313648983267, |
| "grad_norm": 8.958917617797852, |
| "learning_rate": 3.1472939748740604e-06, |
| "loss": 2.0027, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6723193516920537, |
| "grad_norm": 6.963953018188477, |
| "learning_rate": 2.954190070042654e-06, |
| "loss": 1.8467, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6843250544008403, |
| "grad_norm": 6.840718746185303, |
| "learning_rate": 2.764684799557714e-06, |
| "loss": 1.963, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6963307571096271, |
| "grad_norm": 12.296991348266602, |
| "learning_rate": 2.5791115082550495e-06, |
| "loss": 1.8294, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7083364598184138, |
| "grad_norm": 9.745269775390625, |
| "learning_rate": 2.397796624514001e-06, |
| "loss": 1.8554, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7203421625272004, |
| "grad_norm": 10.261857986450195, |
| "learning_rate": 2.2210590860611835e-06, |
| "loss": 1.9552, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.732347865235987, |
| "grad_norm": 10.252482414245605, |
| "learning_rate": 2.049209778950518e-06, |
| "loss": 1.9781, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7443535679447738, |
| "grad_norm": 7.251546382904053, |
| "learning_rate": 1.8825509907063328e-06, |
| "loss": 1.6812, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.7563592706535605, |
| "grad_norm": 10.077775955200195, |
| "learning_rate": 1.7213758785915508e-06, |
| "loss": 1.9387, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7683649733623471, |
| "grad_norm": 11.14145565032959, |
| "learning_rate": 1.5659679539362071e-06, |
| "loss": 1.9589, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7803706760711338, |
| "grad_norm": 11.107977867126465, |
| "learning_rate": 1.4166005834334607e-06, |
| "loss": 1.9859, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7923763787799205, |
| "grad_norm": 7.929206371307373, |
| "learning_rate": 1.2735365082802642e-06, |
| "loss": 1.9113, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8043820814887072, |
| "grad_norm": 9.809419631958008, |
| "learning_rate": 1.1370273820085731e-06, |
| "loss": 1.9373, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.8163877841974938, |
| "grad_norm": 10.714163780212402, |
| "learning_rate": 1.0073133278200702e-06, |
| "loss": 1.8766, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8283934869062805, |
| "grad_norm": 8.51906967163086, |
| "learning_rate": 8.846225162030204e-07, |
| "loss": 1.8467, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.8403991896150672, |
| "grad_norm": 10.252246856689453, |
| "learning_rate": 7.691707635742957e-07, |
| "loss": 1.9715, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8524048923238539, |
| "grad_norm": 10.066018104553223, |
| "learning_rate": 6.611611526525214e-07, |
| "loss": 2.0223, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8644105950326405, |
| "grad_norm": 10.5, |
| "learning_rate": 5.607836752301527e-07, |
| "loss": 2.0496, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8764162977414272, |
| "grad_norm": 9.800883293151855, |
| "learning_rate": 4.6821489797282624e-07, |
| "loss": 1.9503, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8884220004502138, |
| "grad_norm": 11.5232515335083, |
| "learning_rate": 3.8361765183388466e-07, |
| "loss": 1.9714, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9004277031590006, |
| "grad_norm": 10.428476333618164, |
| "learning_rate": 3.0714074563037043e-07, |
| "loss": 1.8059, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9124334058677872, |
| "grad_norm": 10.010412216186523, |
| "learning_rate": 2.389187042843416e-07, |
| "loss": 2.0123, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.9244391085765739, |
| "grad_norm": 9.442755699157715, |
| "learning_rate": 1.7907153218994166e-07, |
| "loss": 1.7247, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.9364448112853605, |
| "grad_norm": 11.165854454040527, |
| "learning_rate": 1.277045021224571e-07, |
| "loss": 1.9639, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.9484505139941473, |
| "grad_norm": 10.688188552856445, |
| "learning_rate": 8.490797006069696e-08, |
| "loss": 1.9314, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.9604562167029339, |
| "grad_norm": 8.110855102539062, |
| "learning_rate": 5.075721624840713e-08, |
| "loss": 1.8037, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9724619194117206, |
| "grad_norm": 7.74312162399292, |
| "learning_rate": 2.5312312774313784e-08, |
| "loss": 1.7496, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9844676221205072, |
| "grad_norm": 10.240447998046875, |
| "learning_rate": 8.618017903708198e-09, |
| "loss": 1.8276, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.996473324829294, |
| "grad_norm": 9.714399337768555, |
| "learning_rate": 7.036973474688901e-10, |
| "loss": 1.9989, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 833, |
| "total_flos": 5523238276368384.0, |
| "train_loss": 1.9459604552957046, |
| "train_runtime": 1842.4954, |
| "train_samples_per_second": 7.233, |
| "train_steps_per_second": 0.452 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 833, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5523238276368384.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|