| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.2130013831258646, | |
| "eval_steps": 50, | |
| "global_step": 2700, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04098150709492342, | |
| "grad_norm": 0.5333279371261597, | |
| "learning_rate": 3.663003663003663e-06, | |
| "loss": 2.2117, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04098150709492342, | |
| "eval_loss": 2.7143771648406982, | |
| "eval_runtime": 282.6413, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08196301418984683, | |
| "grad_norm": 0.42295441031455994, | |
| "learning_rate": 7.326007326007326e-06, | |
| "loss": 2.1609, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08196301418984683, | |
| "eval_loss": 2.6825826168060303, | |
| "eval_runtime": 283.0578, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.099, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12294452128477025, | |
| "grad_norm": 0.277256041765213, | |
| "learning_rate": 1.098901098901099e-05, | |
| "loss": 2.0212, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12294452128477025, | |
| "eval_loss": 2.602968215942383, | |
| "eval_runtime": 282.5654, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16392602837969367, | |
| "grad_norm": 0.2273157685995102, | |
| "learning_rate": 1.4652014652014653e-05, | |
| "loss": 1.8938, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16392602837969367, | |
| "eval_loss": 2.506340980529785, | |
| "eval_runtime": 283.8897, | |
| "eval_samples_per_second": 0.764, | |
| "eval_steps_per_second": 0.099, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.20490753547461707, | |
| "grad_norm": 0.21983110904693604, | |
| "learning_rate": 1.8315018315018315e-05, | |
| "loss": 1.7918, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20490753547461707, | |
| "eval_loss": 2.4188530445098877, | |
| "eval_runtime": 283.2047, | |
| "eval_samples_per_second": 0.766, | |
| "eval_steps_per_second": 0.099, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2458890425695405, | |
| "grad_norm": 0.20148395001888275, | |
| "learning_rate": 1.999406558079547e-05, | |
| "loss": 1.7115, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2458890425695405, | |
| "eval_loss": 2.3436834812164307, | |
| "eval_runtime": 282.2728, | |
| "eval_samples_per_second": 0.769, | |
| "eval_steps_per_second": 0.099, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2868705496644639, | |
| "grad_norm": 0.20601870119571686, | |
| "learning_rate": 1.9951769064396967e-05, | |
| "loss": 1.6595, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2868705496644639, | |
| "eval_loss": 2.3192338943481445, | |
| "eval_runtime": 282.1213, | |
| "eval_samples_per_second": 0.769, | |
| "eval_steps_per_second": 0.099, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.32785205675938733, | |
| "grad_norm": 0.19198212027549744, | |
| "learning_rate": 1.986897612915546e-05, | |
| "loss": 1.6081, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.32785205675938733, | |
| "eval_loss": 2.3058478832244873, | |
| "eval_runtime": 282.7227, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.36883356385431076, | |
| "grad_norm": 0.19531460106372833, | |
| "learning_rate": 1.9746023681741606e-05, | |
| "loss": 1.6127, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.36883356385431076, | |
| "eval_loss": 2.2954719066619873, | |
| "eval_runtime": 283.3445, | |
| "eval_samples_per_second": 0.766, | |
| "eval_steps_per_second": 0.099, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.40981507094923414, | |
| "grad_norm": 0.2188873440027237, | |
| "learning_rate": 1.9583412048657773e-05, | |
| "loss": 1.5999, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.40981507094923414, | |
| "eval_loss": 2.2856028079986572, | |
| "eval_runtime": 282.6955, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.45079657804415757, | |
| "grad_norm": 0.2119743824005127, | |
| "learning_rate": 1.9381802940275198e-05, | |
| "loss": 1.6074, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.45079657804415757, | |
| "eval_loss": 2.2773895263671875, | |
| "eval_runtime": 283.317, | |
| "eval_samples_per_second": 0.766, | |
| "eval_steps_per_second": 0.099, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.491778085139081, | |
| "grad_norm": 0.21725280582904816, | |
| "learning_rate": 1.914201675815694e-05, | |
| "loss": 1.5989, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.491778085139081, | |
| "eval_loss": 2.2710676193237305, | |
| "eval_runtime": 283.5744, | |
| "eval_samples_per_second": 0.765, | |
| "eval_steps_per_second": 0.099, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5327595922340044, | |
| "grad_norm": 0.22087362408638, | |
| "learning_rate": 1.8865029256623765e-05, | |
| "loss": 1.5708, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5327595922340044, | |
| "eval_loss": 2.2653110027313232, | |
| "eval_runtime": 282.6403, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5737410993289278, | |
| "grad_norm": 0.24597273766994476, | |
| "learning_rate": 1.855196757214796e-05, | |
| "loss": 1.5981, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5737410993289278, | |
| "eval_loss": 2.2589895725250244, | |
| "eval_runtime": 282.6053, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6147226064238512, | |
| "grad_norm": 0.20500172674655914, | |
| "learning_rate": 1.8204105636732604e-05, | |
| "loss": 1.5859, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6147226064238512, | |
| "eval_loss": 2.2537271976470947, | |
| "eval_runtime": 282.8109, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.099, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6557041135187747, | |
| "grad_norm": 0.24009792506694794, | |
| "learning_rate": 1.782285899394034e-05, | |
| "loss": 1.5765, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6557041135187747, | |
| "eval_loss": 2.250108242034912, | |
| "eval_runtime": 282.295, | |
| "eval_samples_per_second": 0.769, | |
| "eval_steps_per_second": 0.099, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6966856206136981, | |
| "grad_norm": 0.23266170918941498, | |
| "learning_rate": 1.74097790386668e-05, | |
| "loss": 1.5676, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6966856206136981, | |
| "eval_loss": 2.2453207969665527, | |
| "eval_runtime": 281.9055, | |
| "eval_samples_per_second": 0.77, | |
| "eval_steps_per_second": 0.099, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7376671277086215, | |
| "grad_norm": 0.2351984679698944, | |
| "learning_rate": 1.6966546704098455e-05, | |
| "loss": 1.5688, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7376671277086215, | |
| "eval_loss": 2.2418150901794434, | |
| "eval_runtime": 281.9821, | |
| "eval_samples_per_second": 0.77, | |
| "eval_steps_per_second": 0.099, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7786486348035448, | |
| "grad_norm": 0.237622931599617, | |
| "learning_rate": 1.6494965621544403e-05, | |
| "loss": 1.5643, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7786486348035448, | |
| "eval_loss": 2.2379164695739746, | |
| "eval_runtime": 282.2084, | |
| "eval_samples_per_second": 0.769, | |
| "eval_steps_per_second": 0.099, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8196301418984683, | |
| "grad_norm": 0.24924997985363007, | |
| "learning_rate": 1.5996954780976568e-05, | |
| "loss": 1.5346, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8196301418984683, | |
| "eval_loss": 2.2346854209899902, | |
| "eval_runtime": 283.4225, | |
| "eval_samples_per_second": 0.766, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8606116489933917, | |
| "grad_norm": 0.24776747822761536, | |
| "learning_rate": 1.547454072214457e-05, | |
| "loss": 1.5507, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8606116489933917, | |
| "eval_loss": 2.2320806980133057, | |
| "eval_runtime": 282.9227, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.9015931560883151, | |
| "grad_norm": 0.2700960040092468, | |
| "learning_rate": 1.4929849288041656e-05, | |
| "loss": 1.5582, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9015931560883151, | |
| "eval_loss": 2.2285408973693848, | |
| "eval_runtime": 283.0683, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9425746631832386, | |
| "grad_norm": 0.26280835270881653, | |
| "learning_rate": 1.4365096974279093e-05, | |
| "loss": 1.5275, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9425746631832386, | |
| "eval_loss": 2.2259719371795654, | |
| "eval_runtime": 282.6889, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.983556170278162, | |
| "grad_norm": 0.2555929124355316, | |
| "learning_rate": 1.3782581909570757e-05, | |
| "loss": 1.523, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.983556170278162, | |
| "eval_loss": 2.2228543758392334, | |
| "eval_runtime": 284.1011, | |
| "eval_samples_per_second": 0.764, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.0245376773730854, | |
| "grad_norm": 0.27351436018943787, | |
| "learning_rate": 1.3184674504030679e-05, | |
| "loss": 1.5354, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.0245376773730854, | |
| "eval_loss": 2.2217090129852295, | |
| "eval_runtime": 284.3948, | |
| "eval_samples_per_second": 0.763, | |
| "eval_steps_per_second": 0.098, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.0655191844680088, | |
| "grad_norm": 0.2628322243690491, | |
| "learning_rate": 1.2573807803338216e-05, | |
| "loss": 1.5386, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.0655191844680088, | |
| "eval_loss": 2.2189579010009766, | |
| "eval_runtime": 282.9448, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1065006915629323, | |
| "grad_norm": 0.27614346146583557, | |
| "learning_rate": 1.1952467588022282e-05, | |
| "loss": 1.5338, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.1065006915629323, | |
| "eval_loss": 2.21720814704895, | |
| "eval_runtime": 282.859, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.1474821986578556, | |
| "grad_norm": 0.27579498291015625, | |
| "learning_rate": 1.1323182258153314e-05, | |
| "loss": 1.5292, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1474821986578556, | |
| "eval_loss": 2.2148287296295166, | |
| "eval_runtime": 283.7073, | |
| "eval_samples_per_second": 0.765, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1884637057527792, | |
| "grad_norm": 0.2812260687351227, | |
| "learning_rate": 1.0688512544604915e-05, | |
| "loss": 1.5376, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.1884637057527792, | |
| "eval_loss": 2.2129642963409424, | |
| "eval_runtime": 281.4833, | |
| "eval_samples_per_second": 0.771, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.2294452128477025, | |
| "grad_norm": 0.28713124990463257, | |
| "learning_rate": 1.005104108875275e-05, | |
| "loss": 1.5273, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2294452128477025, | |
| "eval_loss": 2.2116787433624268, | |
| "eval_runtime": 283.0969, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.270426719942626, | |
| "grad_norm": 0.27820634841918945, | |
| "learning_rate": 9.41336193301377e-06, | |
| "loss": 1.526, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.270426719942626, | |
| "eval_loss": 2.2110674381256104, | |
| "eval_runtime": 281.8783, | |
| "eval_samples_per_second": 0.77, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.3114082270375493, | |
| "grad_norm": 0.26735347509384155, | |
| "learning_rate": 8.778069964991484e-06, | |
| "loss": 1.537, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3114082270375493, | |
| "eval_loss": 2.2088816165924072, | |
| "eval_runtime": 281.9837, | |
| "eval_samples_per_second": 0.77, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3523897341324727, | |
| "grad_norm": 0.2734984755516052, | |
| "learning_rate": 8.147750358182e-06, | |
| "loss": 1.5431, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.3523897341324727, | |
| "eval_loss": 2.207979917526245, | |
| "eval_runtime": 282.6878, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.3933712412273962, | |
| "grad_norm": 0.3110925257205963, | |
| "learning_rate": 7.524968052209331e-06, | |
| "loss": 1.5401, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.3933712412273962, | |
| "eval_loss": 2.2074739933013916, | |
| "eval_runtime": 282.1594, | |
| "eval_samples_per_second": 0.769, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.4343527483223195, | |
| "grad_norm": 0.3202177882194519, | |
| "learning_rate": 6.912257315397784e-06, | |
| "loss": 1.5331, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.4343527483223195, | |
| "eval_loss": 2.205904245376587, | |
| "eval_runtime": 280.7829, | |
| "eval_samples_per_second": 0.773, | |
| "eval_steps_per_second": 0.1, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.475334255417243, | |
| "grad_norm": 0.28285861015319824, | |
| "learning_rate": 6.312111432154074e-06, | |
| "loss": 1.5395, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.475334255417243, | |
| "eval_loss": 2.2046620845794678, | |
| "eval_runtime": 282.5413, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.5163157625121664, | |
| "grad_norm": 0.2836764454841614, | |
| "learning_rate": 5.726972557124022e-06, | |
| "loss": 1.542, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.5163157625121664, | |
| "eval_loss": 2.203850507736206, | |
| "eval_runtime": 283.3938, | |
| "eval_samples_per_second": 0.766, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.5572972696070897, | |
| "grad_norm": 0.2900823652744293, | |
| "learning_rate": 5.159221777409953e-06, | |
| "loss": 1.502, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.5572972696070897, | |
| "eval_loss": 2.2032361030578613, | |
| "eval_runtime": 281.099, | |
| "eval_samples_per_second": 0.772, | |
| "eval_steps_per_second": 0.1, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.5982787767020132, | |
| "grad_norm": 0.282176673412323, | |
| "learning_rate": 4.611169423288323e-06, | |
| "loss": 1.5267, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.5982787767020132, | |
| "eval_loss": 2.202617883682251, | |
| "eval_runtime": 283.2145, | |
| "eval_samples_per_second": 0.766, | |
| "eval_steps_per_second": 0.099, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.6392602837969368, | |
| "grad_norm": 0.29563507437705994, | |
| "learning_rate": 4.085045666855846e-06, | |
| "loss": 1.51, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6392602837969368, | |
| "eval_loss": 2.2023086547851562, | |
| "eval_runtime": 282.4408, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.68024179089186, | |
| "grad_norm": 0.2923285663127899, | |
| "learning_rate": 3.5829914468607874e-06, | |
| "loss": 1.5319, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.68024179089186, | |
| "eval_loss": 2.2018585205078125, | |
| "eval_runtime": 281.9707, | |
| "eval_samples_per_second": 0.77, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.7212232979867834, | |
| "grad_norm": 0.2978394031524658, | |
| "learning_rate": 3.1070497566486825e-06, | |
| "loss": 1.5267, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.7212232979867834, | |
| "eval_loss": 2.2010927200317383, | |
| "eval_runtime": 282.5046, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.7622048050817067, | |
| "grad_norm": 0.2987017035484314, | |
| "learning_rate": 2.6591573306741704e-06, | |
| "loss": 1.5201, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.7622048050817067, | |
| "eval_loss": 2.200831174850464, | |
| "eval_runtime": 283.0038, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.8031863121766303, | |
| "grad_norm": 0.28967171907424927, | |
| "learning_rate": 2.241136763408801e-06, | |
| "loss": 1.5204, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.8031863121766303, | |
| "eval_loss": 2.2002053260803223, | |
| "eval_runtime": 281.6629, | |
| "eval_samples_per_second": 0.77, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.8441678192715538, | |
| "grad_norm": 0.321614146232605, | |
| "learning_rate": 1.8546890927150273e-06, | |
| "loss": 1.5094, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.8441678192715538, | |
| "eval_loss": 2.2001166343688965, | |
| "eval_runtime": 282.843, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.8851493263664771, | |
| "grad_norm": 0.2887614965438843, | |
| "learning_rate": 1.501386877866694e-06, | |
| "loss": 1.5268, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.8851493263664771, | |
| "eval_loss": 2.199751615524292, | |
| "eval_runtime": 282.0209, | |
| "eval_samples_per_second": 0.769, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.9261308334614005, | |
| "grad_norm": 0.30499428510665894, | |
| "learning_rate": 1.1826678003833402e-06, | |
| "loss": 1.513, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.9261308334614005, | |
| "eval_loss": 2.1995420455932617, | |
| "eval_runtime": 283.2079, | |
| "eval_samples_per_second": 0.766, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.967112340556324, | |
| "grad_norm": 0.29722264409065247, | |
| "learning_rate": 8.998288137183209e-07, | |
| "loss": 1.5263, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.967112340556324, | |
| "eval_loss": 2.199302911758423, | |
| "eval_runtime": 281.4307, | |
| "eval_samples_per_second": 0.771, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.0080938476512475, | |
| "grad_norm": 0.30117130279541016, | |
| "learning_rate": 6.540208656071601e-07, | |
| "loss": 1.5291, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.0080938476512475, | |
| "eval_loss": 2.199272394180298, | |
| "eval_runtime": 281.4783, | |
| "eval_samples_per_second": 0.771, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.049075354746171, | |
| "grad_norm": 0.29823198914527893, | |
| "learning_rate": 4.4624421455236156e-07, | |
| "loss": 1.5187, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.049075354746171, | |
| "eval_loss": 2.1992440223693848, | |
| "eval_runtime": 280.9197, | |
| "eval_samples_per_second": 0.772, | |
| "eval_steps_per_second": 0.1, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.090056861841094, | |
| "grad_norm": 0.3053443729877472, | |
| "learning_rate": 2.7734435950315663e-07, | |
| "loss": 1.5324, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.090056861841094, | |
| "eval_loss": 2.1991806030273438, | |
| "eval_runtime": 281.8919, | |
| "eval_samples_per_second": 0.77, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.1310383689360175, | |
| "grad_norm": 0.31491023302078247, | |
| "learning_rate": 1.4800859929338218e-07, | |
| "loss": 1.5314, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.1310383689360175, | |
| "eval_loss": 2.199115037918091, | |
| "eval_runtime": 282.019, | |
| "eval_samples_per_second": 0.769, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.172019876030941, | |
| "grad_norm": 0.300465852022171, | |
| "learning_rate": 5.876323583810184e-08, | |
| "loss": 1.5357, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.172019876030941, | |
| "eval_loss": 2.19909930229187, | |
| "eval_runtime": 281.0522, | |
| "eval_samples_per_second": 0.772, | |
| "eval_steps_per_second": 0.1, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.2130013831258646, | |
| "grad_norm": 0.3176944851875305, | |
| "learning_rate": 9.971432469871866e-09, | |
| "loss": 1.507, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.2130013831258646, | |
| "eval_loss": 2.1991000175476074, | |
| "eval_runtime": 281.6411, | |
| "eval_samples_per_second": 0.77, | |
| "eval_steps_per_second": 0.099, | |
| "step": 2700 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2735, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "total_flos": 1.100581152473088e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |