| { | |
| "best_metric": 0.9941176470588236, | |
| "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/VideoMAE_BdSLW60_FrameRateCorrected_withoutAug/checkpoint-4488", | |
| "epoch": 9.040055803571429, | |
| "eval_steps": 500, | |
| "global_step": 8977, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004464285714285714, | |
| "grad_norm": 9.476576805114746, | |
| "learning_rate": 2.2321428571428573e-06, | |
| "loss": 4.1434, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.008928571428571428, | |
| "grad_norm": 11.913525581359863, | |
| "learning_rate": 4.464285714285715e-06, | |
| "loss": 4.0979, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.013392857142857142, | |
| "grad_norm": 10.909424781799316, | |
| "learning_rate": 6.696428571428572e-06, | |
| "loss": 4.0707, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.017857142857142856, | |
| "grad_norm": 13.112800598144531, | |
| "learning_rate": 8.92857142857143e-06, | |
| "loss": 4.0628, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.022321428571428572, | |
| "grad_norm": 12.672718048095703, | |
| "learning_rate": 1.1160714285714287e-05, | |
| "loss": 4.0055, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.026785714285714284, | |
| "grad_norm": 13.710538864135742, | |
| "learning_rate": 1.3392857142857144e-05, | |
| "loss": 3.8428, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03125, | |
| "grad_norm": 15.481912612915039, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 3.6124, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03571428571428571, | |
| "grad_norm": 15.442792892456055, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 3.1009, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.040044642857142855, | |
| "eval_accuracy": 0.508235294117647, | |
| "eval_loss": 2.399294137954712, | |
| "eval_runtime": 297.3318, | |
| "eval_samples_per_second": 2.859, | |
| "eval_steps_per_second": 1.429, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.0001004464285714, | |
| "grad_norm": 21.445680618286133, | |
| "learning_rate": 2.0089285714285717e-05, | |
| "loss": 2.6823, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.0045647321428572, | |
| "grad_norm": 19.279504776000977, | |
| "learning_rate": 2.2321428571428575e-05, | |
| "loss": 2.0408, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0090290178571428, | |
| "grad_norm": 16.168643951416016, | |
| "learning_rate": 2.455357142857143e-05, | |
| "loss": 1.6718, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.0134933035714286, | |
| "grad_norm": 10.949524879455566, | |
| "learning_rate": 2.6785714285714288e-05, | |
| "loss": 1.1872, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.0179575892857142, | |
| "grad_norm": 14.425607681274414, | |
| "learning_rate": 2.9017857142857146e-05, | |
| "loss": 0.8375, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.022421875, | |
| "grad_norm": 12.171178817749023, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.6268, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.0268861607142856, | |
| "grad_norm": 6.816460609436035, | |
| "learning_rate": 3.348214285714286e-05, | |
| "loss": 0.4694, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0313504464285714, | |
| "grad_norm": 4.130532741546631, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 0.3985, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.0358147321428572, | |
| "grad_norm": 3.680629014968872, | |
| "learning_rate": 3.794642857142857e-05, | |
| "loss": 0.3152, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.0400558035714287, | |
| "eval_accuracy": 0.9611764705882353, | |
| "eval_loss": 0.21172475814819336, | |
| "eval_runtime": 306.8196, | |
| "eval_samples_per_second": 2.77, | |
| "eval_steps_per_second": 1.385, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 2.000200892857143, | |
| "grad_norm": 1.7801744937896729, | |
| "learning_rate": 4.017857142857143e-05, | |
| "loss": 0.2748, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.0046651785714285, | |
| "grad_norm": 4.094645023345947, | |
| "learning_rate": 4.2410714285714285e-05, | |
| "loss": 0.1463, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.0091294642857145, | |
| "grad_norm": 0.7033438682556152, | |
| "learning_rate": 4.464285714285715e-05, | |
| "loss": 0.1269, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.01359375, | |
| "grad_norm": 0.5256723761558533, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 0.0921, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.0180580357142857, | |
| "grad_norm": 1.1959021091461182, | |
| "learning_rate": 4.910714285714286e-05, | |
| "loss": 0.0496, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.0225223214285712, | |
| "grad_norm": 1.346044659614563, | |
| "learning_rate": 4.985119047619048e-05, | |
| "loss": 0.0801, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.0269866071428573, | |
| "grad_norm": 0.21942313015460968, | |
| "learning_rate": 4.960317460317461e-05, | |
| "loss": 0.1066, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.031450892857143, | |
| "grad_norm": 0.10540590435266495, | |
| "learning_rate": 4.9355158730158735e-05, | |
| "loss": 0.0639, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.0359151785714285, | |
| "grad_norm": 0.18206317722797394, | |
| "learning_rate": 4.910714285714286e-05, | |
| "loss": 0.0363, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.0400669642857143, | |
| "eval_accuracy": 0.991764705882353, | |
| "eval_loss": 0.037506770342588425, | |
| "eval_runtime": 294.0638, | |
| "eval_samples_per_second": 2.891, | |
| "eval_steps_per_second": 1.445, | |
| "step": 2693 | |
| }, | |
| { | |
| "epoch": 3.000301339285714, | |
| "grad_norm": 0.18602599203586578, | |
| "learning_rate": 4.8859126984126984e-05, | |
| "loss": 0.0633, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.004765625, | |
| "grad_norm": 0.13055439293384552, | |
| "learning_rate": 4.8611111111111115e-05, | |
| "loss": 0.0147, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.0092299107142857, | |
| "grad_norm": 0.21550215780735016, | |
| "learning_rate": 4.836309523809524e-05, | |
| "loss": 0.0339, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.0136941964285713, | |
| "grad_norm": 0.17052847146987915, | |
| "learning_rate": 4.811507936507937e-05, | |
| "loss": 0.0361, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.0181584821428573, | |
| "grad_norm": 0.11729184538125992, | |
| "learning_rate": 4.7867063492063496e-05, | |
| "loss": 0.0339, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.022622767857143, | |
| "grad_norm": 0.02288275957107544, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 0.0384, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.0270870535714285, | |
| "grad_norm": 0.026718221604824066, | |
| "learning_rate": 4.7371031746031745e-05, | |
| "loss": 0.0269, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.031551339285714, | |
| "grad_norm": 16.089458465576172, | |
| "learning_rate": 4.7123015873015876e-05, | |
| "loss": 0.0447, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.036015625, | |
| "grad_norm": 0.6215272545814514, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 0.0431, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.040078125, | |
| "eval_accuracy": 0.9764705882352941, | |
| "eval_loss": 0.08410066366195679, | |
| "eval_runtime": 296.1888, | |
| "eval_samples_per_second": 2.87, | |
| "eval_steps_per_second": 1.435, | |
| "step": 3591 | |
| }, | |
| { | |
| "epoch": 4.000401785714286, | |
| "grad_norm": 6.967636585235596, | |
| "learning_rate": 4.662698412698413e-05, | |
| "loss": 0.0237, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 4.004866071428571, | |
| "grad_norm": 0.018726671114563942, | |
| "learning_rate": 4.637896825396826e-05, | |
| "loss": 0.0286, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 4.009330357142857, | |
| "grad_norm": 0.027330530807375908, | |
| "learning_rate": 4.613095238095239e-05, | |
| "loss": 0.0216, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 4.0137946428571425, | |
| "grad_norm": 0.08880900591611862, | |
| "learning_rate": 4.5882936507936506e-05, | |
| "loss": 0.0148, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 4.018258928571429, | |
| "grad_norm": 0.01726541668176651, | |
| "learning_rate": 4.563492063492064e-05, | |
| "loss": 0.0341, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.0227232142857146, | |
| "grad_norm": 0.5136727094650269, | |
| "learning_rate": 4.538690476190476e-05, | |
| "loss": 0.0286, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 4.0271875, | |
| "grad_norm": 0.25709015130996704, | |
| "learning_rate": 4.5138888888888894e-05, | |
| "loss": 0.0437, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 4.031651785714286, | |
| "grad_norm": 1.26307213306427, | |
| "learning_rate": 4.489087301587302e-05, | |
| "loss": 0.0636, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 4.036116071428571, | |
| "grad_norm": 0.04352056235074997, | |
| "learning_rate": 4.464285714285715e-05, | |
| "loss": 0.0554, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.040044642857143, | |
| "eval_accuracy": 0.9941176470588236, | |
| "eval_loss": 0.03231202811002731, | |
| "eval_runtime": 284.1927, | |
| "eval_samples_per_second": 2.991, | |
| "eval_steps_per_second": 1.495, | |
| "step": 4488 | |
| }, | |
| { | |
| "epoch": 5.000502232142857, | |
| "grad_norm": 0.015468730591237545, | |
| "learning_rate": 4.439484126984127e-05, | |
| "loss": 0.0489, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.0049665178571425, | |
| "grad_norm": 0.008214977569878101, | |
| "learning_rate": 4.41468253968254e-05, | |
| "loss": 0.0224, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 5.009430803571429, | |
| "grad_norm": 0.709925651550293, | |
| "learning_rate": 4.3898809523809523e-05, | |
| "loss": 0.0131, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 5.013895089285715, | |
| "grad_norm": 6.61177396774292, | |
| "learning_rate": 4.3650793650793655e-05, | |
| "loss": 0.0122, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 5.018359375, | |
| "grad_norm": 0.024309329688549042, | |
| "learning_rate": 4.340277777777778e-05, | |
| "loss": 0.02, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 5.022823660714286, | |
| "grad_norm": 0.0186802726238966, | |
| "learning_rate": 4.315476190476191e-05, | |
| "loss": 0.0262, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.027287946428571, | |
| "grad_norm": 0.0063638836145401, | |
| "learning_rate": 4.290674603174603e-05, | |
| "loss": 0.0202, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 5.031752232142857, | |
| "grad_norm": 0.0073557039722800255, | |
| "learning_rate": 4.265873015873016e-05, | |
| "loss": 0.0266, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 5.0362165178571425, | |
| "grad_norm": 13.825119972229004, | |
| "learning_rate": 4.2410714285714285e-05, | |
| "loss": 0.0467, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 5.040055803571429, | |
| "eval_accuracy": 0.9835294117647059, | |
| "eval_loss": 0.06136510148644447, | |
| "eval_runtime": 284.8463, | |
| "eval_samples_per_second": 2.984, | |
| "eval_steps_per_second": 1.492, | |
| "step": 5386 | |
| }, | |
| { | |
| "epoch": 6.000602678571428, | |
| "grad_norm": 65.57583618164062, | |
| "learning_rate": 4.2162698412698416e-05, | |
| "loss": 0.022, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 6.005066964285715, | |
| "grad_norm": 0.026758452877402306, | |
| "learning_rate": 4.191468253968254e-05, | |
| "loss": 0.0287, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.00953125, | |
| "grad_norm": 0.013503998517990112, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.04, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 6.013995535714286, | |
| "grad_norm": 0.012964209541678429, | |
| "learning_rate": 4.14186507936508e-05, | |
| "loss": 0.0333, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 6.018459821428571, | |
| "grad_norm": 0.009373425506055355, | |
| "learning_rate": 4.117063492063492e-05, | |
| "loss": 0.0163, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 6.022924107142857, | |
| "grad_norm": 0.6834176778793335, | |
| "learning_rate": 4.0922619047619046e-05, | |
| "loss": 0.0482, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 6.027388392857143, | |
| "grad_norm": 0.034619417041540146, | |
| "learning_rate": 4.067460317460318e-05, | |
| "loss": 0.0339, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.031852678571428, | |
| "grad_norm": 0.031068073585629463, | |
| "learning_rate": 4.04265873015873e-05, | |
| "loss": 0.0399, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 6.036316964285715, | |
| "grad_norm": 0.13247177004814148, | |
| "learning_rate": 4.017857142857143e-05, | |
| "loss": 0.0324, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 6.040066964285714, | |
| "eval_accuracy": 0.9858823529411764, | |
| "eval_loss": 0.09442107379436493, | |
| "eval_runtime": 294.806, | |
| "eval_samples_per_second": 2.883, | |
| "eval_steps_per_second": 1.442, | |
| "step": 6284 | |
| }, | |
| { | |
| "epoch": 7.000703125, | |
| "grad_norm": 0.008008907549083233, | |
| "learning_rate": 3.993055555555556e-05, | |
| "loss": 0.0342, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 7.005167410714286, | |
| "grad_norm": 0.057924672961235046, | |
| "learning_rate": 3.968253968253968e-05, | |
| "loss": 0.008, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 7.009631696428571, | |
| "grad_norm": 0.0028113469015806913, | |
| "learning_rate": 3.943452380952381e-05, | |
| "loss": 0.0238, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 7.014095982142857, | |
| "grad_norm": 0.006544953212141991, | |
| "learning_rate": 3.918650793650794e-05, | |
| "loss": 0.0081, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 7.018560267857143, | |
| "grad_norm": 0.007798121310770512, | |
| "learning_rate": 3.893849206349206e-05, | |
| "loss": 0.0178, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 7.023024553571428, | |
| "grad_norm": 0.011922234669327736, | |
| "learning_rate": 3.8690476190476195e-05, | |
| "loss": 0.0146, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 7.027488839285715, | |
| "grad_norm": 0.02987760305404663, | |
| "learning_rate": 3.844246031746032e-05, | |
| "loss": 0.0438, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 7.031953125, | |
| "grad_norm": 0.010242101736366749, | |
| "learning_rate": 3.8194444444444444e-05, | |
| "loss": 0.0614, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.036417410714286, | |
| "grad_norm": 0.030809585005044937, | |
| "learning_rate": 3.794642857142857e-05, | |
| "loss": 0.0381, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 7.040078125, | |
| "eval_accuracy": 0.9870588235294118, | |
| "eval_loss": 0.05825329199433327, | |
| "eval_runtime": 282.1803, | |
| "eval_samples_per_second": 3.012, | |
| "eval_steps_per_second": 1.506, | |
| "step": 7182 | |
| }, | |
| { | |
| "epoch": 8.000803571428571, | |
| "grad_norm": 1.1858717203140259, | |
| "learning_rate": 3.76984126984127e-05, | |
| "loss": 0.0385, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 8.005267857142858, | |
| "grad_norm": 0.06927396357059479, | |
| "learning_rate": 3.7450396825396824e-05, | |
| "loss": 0.0322, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 8.009732142857143, | |
| "grad_norm": 0.0045885867439210415, | |
| "learning_rate": 3.7202380952380956e-05, | |
| "loss": 0.0051, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 8.014196428571429, | |
| "grad_norm": 0.005404896102845669, | |
| "learning_rate": 3.695436507936508e-05, | |
| "loss": 0.003, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 8.018660714285714, | |
| "grad_norm": 0.003892578650265932, | |
| "learning_rate": 3.6706349206349205e-05, | |
| "loss": 0.0154, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 8.023125, | |
| "grad_norm": 0.00285942736081779, | |
| "learning_rate": 3.6458333333333336e-05, | |
| "loss": 0.0095, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 8.027589285714285, | |
| "grad_norm": 0.004967841785401106, | |
| "learning_rate": 3.621031746031746e-05, | |
| "loss": 0.0036, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 8.032053571428571, | |
| "grad_norm": 0.0037566416431218386, | |
| "learning_rate": 3.5962301587301586e-05, | |
| "loss": 0.0013, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 8.036517857142858, | |
| "grad_norm": 0.003524980042129755, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 0.0039, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.040044642857143, | |
| "eval_accuracy": 0.991764705882353, | |
| "eval_loss": 0.030685828998684883, | |
| "eval_runtime": 277.8498, | |
| "eval_samples_per_second": 3.059, | |
| "eval_steps_per_second": 1.53, | |
| "step": 8079 | |
| }, | |
| { | |
| "epoch": 9.000904017857144, | |
| "grad_norm": 0.009027580730617046, | |
| "learning_rate": 3.546626984126984e-05, | |
| "loss": 0.0276, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 9.005368303571428, | |
| "grad_norm": 0.00775180896744132, | |
| "learning_rate": 3.521825396825397e-05, | |
| "loss": 0.0324, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 9.009832589285715, | |
| "grad_norm": 0.003224916523322463, | |
| "learning_rate": 3.49702380952381e-05, | |
| "loss": 0.001, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 9.014296875, | |
| "grad_norm": 0.001547032268717885, | |
| "learning_rate": 3.472222222222222e-05, | |
| "loss": 0.0002, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 9.018761160714286, | |
| "grad_norm": 0.00670098839327693, | |
| "learning_rate": 3.4474206349206354e-05, | |
| "loss": 0.0254, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 9.02322544642857, | |
| "grad_norm": 0.0018392472993582487, | |
| "learning_rate": 3.422619047619048e-05, | |
| "loss": 0.0341, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 9.027689732142857, | |
| "grad_norm": 27.046947479248047, | |
| "learning_rate": 3.397817460317461e-05, | |
| "loss": 0.0559, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 9.032154017857144, | |
| "grad_norm": 0.03754453361034393, | |
| "learning_rate": 3.3730158730158734e-05, | |
| "loss": 0.0343, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 9.036618303571428, | |
| "grad_norm": 0.00891073513776064, | |
| "learning_rate": 3.348214285714286e-05, | |
| "loss": 0.0149, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 9.040055803571429, | |
| "eval_accuracy": 0.9858823529411764, | |
| "eval_loss": 0.06944379210472107, | |
| "eval_runtime": 283.081, | |
| "eval_samples_per_second": 3.003, | |
| "eval_steps_per_second": 1.501, | |
| "step": 8977 | |
| }, | |
| { | |
| "epoch": 9.040055803571429, | |
| "step": 8977, | |
| "total_flos": 8.952618183954924e+19, | |
| "train_loss": 0.48967243622379686, | |
| "train_runtime": 34397.09, | |
| "train_samples_per_second": 5.21, | |
| "train_steps_per_second": 0.651 | |
| }, | |
| { | |
| "epoch": 9.040055803571429, | |
| "eval_accuracy": 0.9941176470588236, | |
| "eval_loss": 0.03231202811002731, | |
| "eval_runtime": 299.9449, | |
| "eval_samples_per_second": 2.834, | |
| "eval_steps_per_second": 1.417, | |
| "step": 8977 | |
| }, | |
| { | |
| "epoch": 9.040055803571429, | |
| "eval_accuracy": 0.8079937304075235, | |
| "eval_loss": 0.7567912340164185, | |
| "eval_runtime": 438.2415, | |
| "eval_samples_per_second": 2.912, | |
| "eval_steps_per_second": 1.456, | |
| "step": 8977 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 22400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.952618183954924e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |