| { | |
| "best_metric": 1.3649392127990723, | |
| "best_model_checkpoint": "./output/checkpoints/2024-06-11_11-02-23/checkpoint-50", | |
| "epoch": 0.5154639175257731, | |
| "eval_steps": 1, | |
| "global_step": 50, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010309278350515464, | |
| "grad_norm": 2.784351348876953, | |
| "learning_rate": 4e-05, | |
| "loss": 5.0719, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010309278350515464, | |
| "eval_loss": 4.890735149383545, | |
| "eval_runtime": 11.998, | |
| "eval_samples_per_second": 11.252, | |
| "eval_steps_per_second": 0.75, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.020618556701030927, | |
| "grad_norm": 2.8573310375213623, | |
| "learning_rate": 8e-05, | |
| "loss": 4.9508, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.020618556701030927, | |
| "eval_loss": 4.775210857391357, | |
| "eval_runtime": 12.0698, | |
| "eval_samples_per_second": 11.185, | |
| "eval_steps_per_second": 0.746, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.030927835051546393, | |
| "grad_norm": 2.920828104019165, | |
| "learning_rate": 0.00012, | |
| "loss": 4.973, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.030927835051546393, | |
| "eval_loss": 4.2439093589782715, | |
| "eval_runtime": 12.1674, | |
| "eval_samples_per_second": 11.095, | |
| "eval_steps_per_second": 0.74, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.041237113402061855, | |
| "grad_norm": 3.0682826042175293, | |
| "learning_rate": 0.00016, | |
| "loss": 4.3398, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.041237113402061855, | |
| "eval_loss": 3.316483497619629, | |
| "eval_runtime": 12.1421, | |
| "eval_samples_per_second": 11.118, | |
| "eval_steps_per_second": 0.741, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.05154639175257732, | |
| "grad_norm": 3.1937592029571533, | |
| "learning_rate": 0.0002, | |
| "loss": 3.253, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05154639175257732, | |
| "eval_loss": 2.4086239337921143, | |
| "eval_runtime": 12.0796, | |
| "eval_samples_per_second": 11.176, | |
| "eval_steps_per_second": 0.745, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.061855670103092786, | |
| "grad_norm": 2.0619874000549316, | |
| "learning_rate": 0.00024, | |
| "loss": 2.3725, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.061855670103092786, | |
| "eval_loss": 1.9427752494812012, | |
| "eval_runtime": 12.1816, | |
| "eval_samples_per_second": 11.082, | |
| "eval_steps_per_second": 0.739, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.07216494845360824, | |
| "grad_norm": 1.4178358316421509, | |
| "learning_rate": 0.00028, | |
| "loss": 1.7391, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.07216494845360824, | |
| "eval_loss": 1.6751385927200317, | |
| "eval_runtime": 12.2395, | |
| "eval_samples_per_second": 11.03, | |
| "eval_steps_per_second": 0.735, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.08247422680412371, | |
| "grad_norm": 0.9345605373382568, | |
| "learning_rate": 0.00032, | |
| "loss": 1.4029, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.08247422680412371, | |
| "eval_loss": 1.551221489906311, | |
| "eval_runtime": 12.2329, | |
| "eval_samples_per_second": 11.036, | |
| "eval_steps_per_second": 0.736, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.09278350515463918, | |
| "grad_norm": 1.1335052251815796, | |
| "learning_rate": 0.00036, | |
| "loss": 1.4404, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.09278350515463918, | |
| "eval_loss": 1.447738528251648, | |
| "eval_runtime": 12.2069, | |
| "eval_samples_per_second": 11.059, | |
| "eval_steps_per_second": 0.737, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.10309278350515463, | |
| "grad_norm": 0.23779241740703583, | |
| "learning_rate": 0.0004, | |
| "loss": 1.3197, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10309278350515463, | |
| "eval_loss": 1.4138007164001465, | |
| "eval_runtime": 12.2609, | |
| "eval_samples_per_second": 11.011, | |
| "eval_steps_per_second": 0.734, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1134020618556701, | |
| "grad_norm": 0.19672752916812897, | |
| "learning_rate": 0.00039540229885057476, | |
| "loss": 1.2882, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1134020618556701, | |
| "eval_loss": 1.393249750137329, | |
| "eval_runtime": 12.1836, | |
| "eval_samples_per_second": 11.08, | |
| "eval_steps_per_second": 0.739, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.12371134020618557, | |
| "grad_norm": 0.2525981366634369, | |
| "learning_rate": 0.00039080459770114945, | |
| "loss": 1.2831, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.12371134020618557, | |
| "eval_loss": 1.4042422771453857, | |
| "eval_runtime": 12.232, | |
| "eval_samples_per_second": 11.037, | |
| "eval_steps_per_second": 0.736, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.13402061855670103, | |
| "grad_norm": 0.23460708558559418, | |
| "learning_rate": 0.0003862068965517242, | |
| "loss": 1.1615, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.13402061855670103, | |
| "eval_loss": 1.4705734252929688, | |
| "eval_runtime": 12.2648, | |
| "eval_samples_per_second": 11.007, | |
| "eval_steps_per_second": 0.734, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.14432989690721648, | |
| "grad_norm": 0.21198733150959015, | |
| "learning_rate": 0.00038160919540229887, | |
| "loss": 1.0625, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.14432989690721648, | |
| "eval_loss": 1.5055038928985596, | |
| "eval_runtime": 12.2044, | |
| "eval_samples_per_second": 11.062, | |
| "eval_steps_per_second": 0.737, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.15463917525773196, | |
| "grad_norm": 0.3658374845981598, | |
| "learning_rate": 0.00037701149425287356, | |
| "loss": 1.1191, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.15463917525773196, | |
| "eval_loss": 1.4498964548110962, | |
| "eval_runtime": 12.2004, | |
| "eval_samples_per_second": 11.065, | |
| "eval_steps_per_second": 0.738, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.16494845360824742, | |
| "grad_norm": 0.222326397895813, | |
| "learning_rate": 0.0003724137931034483, | |
| "loss": 1.0887, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.16494845360824742, | |
| "eval_loss": 1.3819501399993896, | |
| "eval_runtime": 12.1444, | |
| "eval_samples_per_second": 11.116, | |
| "eval_steps_per_second": 0.741, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.17525773195876287, | |
| "grad_norm": 0.1682404726743698, | |
| "learning_rate": 0.000367816091954023, | |
| "loss": 1.0915, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.17525773195876287, | |
| "eval_loss": 1.3552738428115845, | |
| "eval_runtime": 12.2121, | |
| "eval_samples_per_second": 11.055, | |
| "eval_steps_per_second": 0.737, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.18556701030927836, | |
| "grad_norm": 0.15567483007907867, | |
| "learning_rate": 0.0003632183908045977, | |
| "loss": 1.0509, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.18556701030927836, | |
| "eval_loss": 1.3488575220108032, | |
| "eval_runtime": 12.2988, | |
| "eval_samples_per_second": 10.977, | |
| "eval_steps_per_second": 0.732, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1958762886597938, | |
| "grad_norm": 0.1600300371646881, | |
| "learning_rate": 0.0003586206896551724, | |
| "loss": 0.9982, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.1958762886597938, | |
| "eval_loss": 1.3651177883148193, | |
| "eval_runtime": 12.1263, | |
| "eval_samples_per_second": 11.133, | |
| "eval_steps_per_second": 0.742, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "grad_norm": 0.1044178307056427, | |
| "learning_rate": 0.00035402298850574715, | |
| "loss": 0.9809, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "eval_loss": 1.401971697807312, | |
| "eval_runtime": 12.2092, | |
| "eval_samples_per_second": 11.057, | |
| "eval_steps_per_second": 0.737, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.21649484536082475, | |
| "grad_norm": 0.12360141426324844, | |
| "learning_rate": 0.0003494252873563219, | |
| "loss": 1.0549, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.21649484536082475, | |
| "eval_loss": 1.426545262336731, | |
| "eval_runtime": 12.1581, | |
| "eval_samples_per_second": 11.104, | |
| "eval_steps_per_second": 0.74, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.2268041237113402, | |
| "grad_norm": 0.12509943544864655, | |
| "learning_rate": 0.0003448275862068965, | |
| "loss": 1.0323, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.2268041237113402, | |
| "eval_loss": 1.4258630275726318, | |
| "eval_runtime": 12.201, | |
| "eval_samples_per_second": 11.065, | |
| "eval_steps_per_second": 0.738, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.23711340206185566, | |
| "grad_norm": 0.13586747646331787, | |
| "learning_rate": 0.00034022988505747127, | |
| "loss": 1.0746, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.23711340206185566, | |
| "eval_loss": 1.3952091932296753, | |
| "eval_runtime": 12.1932, | |
| "eval_samples_per_second": 11.072, | |
| "eval_steps_per_second": 0.738, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.24742268041237114, | |
| "grad_norm": 0.08069202303886414, | |
| "learning_rate": 0.000335632183908046, | |
| "loss": 0.9645, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.24742268041237114, | |
| "eval_loss": 1.376875638961792, | |
| "eval_runtime": 12.2172, | |
| "eval_samples_per_second": 11.05, | |
| "eval_steps_per_second": 0.737, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.25773195876288657, | |
| "grad_norm": 0.09109444171190262, | |
| "learning_rate": 0.0003310344827586207, | |
| "loss": 0.9345, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.25773195876288657, | |
| "eval_loss": 1.373625636100769, | |
| "eval_runtime": 12.2642, | |
| "eval_samples_per_second": 11.008, | |
| "eval_steps_per_second": 0.734, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.26804123711340205, | |
| "grad_norm": 0.0649966150522232, | |
| "learning_rate": 0.00032643678160919543, | |
| "loss": 1.0266, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.26804123711340205, | |
| "eval_loss": 1.3720897436141968, | |
| "eval_runtime": 12.1576, | |
| "eval_samples_per_second": 11.104, | |
| "eval_steps_per_second": 0.74, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.27835051546391754, | |
| "grad_norm": 0.09308775514364243, | |
| "learning_rate": 0.0003218390804597701, | |
| "loss": 0.9797, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.27835051546391754, | |
| "eval_loss": 1.3858685493469238, | |
| "eval_runtime": 12.1546, | |
| "eval_samples_per_second": 11.107, | |
| "eval_steps_per_second": 0.74, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.28865979381443296, | |
| "grad_norm": 0.06654678285121918, | |
| "learning_rate": 0.00031724137931034486, | |
| "loss": 1.0072, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.28865979381443296, | |
| "eval_loss": 1.4031970500946045, | |
| "eval_runtime": 12.1666, | |
| "eval_samples_per_second": 11.096, | |
| "eval_steps_per_second": 0.74, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.29896907216494845, | |
| "grad_norm": 0.07720344513654709, | |
| "learning_rate": 0.0003126436781609196, | |
| "loss": 0.923, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.29896907216494845, | |
| "eval_loss": 1.4118249416351318, | |
| "eval_runtime": 12.1572, | |
| "eval_samples_per_second": 11.105, | |
| "eval_steps_per_second": 0.74, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.30927835051546393, | |
| "grad_norm": 0.10230278223752975, | |
| "learning_rate": 0.00030804597701149423, | |
| "loss": 0.9821, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.30927835051546393, | |
| "eval_loss": 1.4045817852020264, | |
| "eval_runtime": 12.1974, | |
| "eval_samples_per_second": 11.068, | |
| "eval_steps_per_second": 0.738, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.31958762886597936, | |
| "grad_norm": 0.07451125234365463, | |
| "learning_rate": 0.00030344827586206897, | |
| "loss": 1.0021, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.31958762886597936, | |
| "eval_loss": 1.391209363937378, | |
| "eval_runtime": 12.2389, | |
| "eval_samples_per_second": 11.03, | |
| "eval_steps_per_second": 0.735, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.32989690721649484, | |
| "grad_norm": 0.0714351087808609, | |
| "learning_rate": 0.00029885057471264366, | |
| "loss": 1.0071, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.32989690721649484, | |
| "eval_loss": 1.3730576038360596, | |
| "eval_runtime": 12.2102, | |
| "eval_samples_per_second": 11.056, | |
| "eval_steps_per_second": 0.737, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.3402061855670103, | |
| "grad_norm": 0.06839103996753693, | |
| "learning_rate": 0.0002942528735632184, | |
| "loss": 0.973, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.3402061855670103, | |
| "eval_loss": 1.3659778833389282, | |
| "eval_runtime": 12.2323, | |
| "eval_samples_per_second": 11.036, | |
| "eval_steps_per_second": 0.736, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.35051546391752575, | |
| "grad_norm": 0.08078178018331528, | |
| "learning_rate": 0.00028965517241379314, | |
| "loss": 0.964, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.35051546391752575, | |
| "eval_loss": 1.3762452602386475, | |
| "eval_runtime": 12.2027, | |
| "eval_samples_per_second": 11.063, | |
| "eval_steps_per_second": 0.738, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.36082474226804123, | |
| "grad_norm": 0.06870069354772568, | |
| "learning_rate": 0.0002850574712643678, | |
| "loss": 0.9148, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.36082474226804123, | |
| "eval_loss": 1.3925738334655762, | |
| "eval_runtime": 12.2644, | |
| "eval_samples_per_second": 11.007, | |
| "eval_steps_per_second": 0.734, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.3711340206185567, | |
| "grad_norm": 0.06974003463983536, | |
| "learning_rate": 0.00028045977011494257, | |
| "loss": 1.0128, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.3711340206185567, | |
| "eval_loss": 1.4087179899215698, | |
| "eval_runtime": 12.2407, | |
| "eval_samples_per_second": 11.029, | |
| "eval_steps_per_second": 0.735, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.38144329896907214, | |
| "grad_norm": 0.08603405207395554, | |
| "learning_rate": 0.00027586206896551725, | |
| "loss": 0.9776, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.38144329896907214, | |
| "eval_loss": 1.4067703485488892, | |
| "eval_runtime": 12.173, | |
| "eval_samples_per_second": 11.09, | |
| "eval_steps_per_second": 0.739, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.3917525773195876, | |
| "grad_norm": 0.07761300355195999, | |
| "learning_rate": 0.00027126436781609194, | |
| "loss": 0.9655, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.3917525773195876, | |
| "eval_loss": 1.3932013511657715, | |
| "eval_runtime": 12.1941, | |
| "eval_samples_per_second": 11.071, | |
| "eval_steps_per_second": 0.738, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.4020618556701031, | |
| "grad_norm": 0.06392566114664078, | |
| "learning_rate": 0.0002666666666666667, | |
| "loss": 0.974, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.4020618556701031, | |
| "eval_loss": 1.3819767236709595, | |
| "eval_runtime": 12.1765, | |
| "eval_samples_per_second": 11.087, | |
| "eval_steps_per_second": 0.739, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "grad_norm": 0.05517549812793732, | |
| "learning_rate": 0.00026206896551724137, | |
| "loss": 0.9793, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "eval_loss": 1.3717000484466553, | |
| "eval_runtime": 12.2217, | |
| "eval_samples_per_second": 11.046, | |
| "eval_steps_per_second": 0.736, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.422680412371134, | |
| "grad_norm": 0.0804053246974945, | |
| "learning_rate": 0.0002574712643678161, | |
| "loss": 0.9585, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.422680412371134, | |
| "eval_loss": 1.3579998016357422, | |
| "eval_runtime": 12.182, | |
| "eval_samples_per_second": 11.082, | |
| "eval_steps_per_second": 0.739, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.4329896907216495, | |
| "grad_norm": 0.07214821875095367, | |
| "learning_rate": 0.0002528735632183908, | |
| "loss": 0.9332, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.4329896907216495, | |
| "eval_loss": 1.3583327531814575, | |
| "eval_runtime": 12.1257, | |
| "eval_samples_per_second": 11.133, | |
| "eval_steps_per_second": 0.742, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.44329896907216493, | |
| "grad_norm": 0.07595060020685196, | |
| "learning_rate": 0.00024827586206896553, | |
| "loss": 0.8998, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.44329896907216493, | |
| "eval_loss": 1.3721704483032227, | |
| "eval_runtime": 12.2745, | |
| "eval_samples_per_second": 10.998, | |
| "eval_steps_per_second": 0.733, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.4536082474226804, | |
| "grad_norm": 0.07757716625928879, | |
| "learning_rate": 0.00024367816091954025, | |
| "loss": 0.9661, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.4536082474226804, | |
| "eval_loss": 1.3984168767929077, | |
| "eval_runtime": 12.1398, | |
| "eval_samples_per_second": 11.12, | |
| "eval_steps_per_second": 0.741, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.4639175257731959, | |
| "grad_norm": 0.053873661905527115, | |
| "learning_rate": 0.00023908045977011496, | |
| "loss": 0.9418, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4639175257731959, | |
| "eval_loss": 1.4266961812973022, | |
| "eval_runtime": 12.1955, | |
| "eval_samples_per_second": 11.07, | |
| "eval_steps_per_second": 0.738, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4742268041237113, | |
| "grad_norm": 0.08943776786327362, | |
| "learning_rate": 0.00023448275862068965, | |
| "loss": 0.9309, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.4742268041237113, | |
| "eval_loss": 1.4349451065063477, | |
| "eval_runtime": 12.1983, | |
| "eval_samples_per_second": 11.067, | |
| "eval_steps_per_second": 0.738, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.4845360824742268, | |
| "grad_norm": 0.0885058343410492, | |
| "learning_rate": 0.00022988505747126436, | |
| "loss": 1.0245, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.4845360824742268, | |
| "eval_loss": 1.4226434230804443, | |
| "eval_runtime": 12.1753, | |
| "eval_samples_per_second": 11.088, | |
| "eval_steps_per_second": 0.739, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.4948453608247423, | |
| "grad_norm": 0.058818139135837555, | |
| "learning_rate": 0.00022528735632183907, | |
| "loss": 0.9007, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4948453608247423, | |
| "eval_loss": 1.4033018350601196, | |
| "eval_runtime": 12.267, | |
| "eval_samples_per_second": 11.005, | |
| "eval_steps_per_second": 0.734, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.5051546391752577, | |
| "grad_norm": 0.07104739546775818, | |
| "learning_rate": 0.0002206896551724138, | |
| "loss": 0.9469, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.5051546391752577, | |
| "eval_loss": 1.3786002397537231, | |
| "eval_runtime": 12.2731, | |
| "eval_samples_per_second": 11.0, | |
| "eval_steps_per_second": 0.733, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.5154639175257731, | |
| "grad_norm": 0.05872216075658798, | |
| "learning_rate": 0.00021609195402298853, | |
| "loss": 0.9671, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5154639175257731, | |
| "eval_loss": 1.3649392127990723, | |
| "eval_runtime": 12.1662, | |
| "eval_samples_per_second": 11.096, | |
| "eval_steps_per_second": 0.74, | |
| "step": 50 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 97, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.85446101417984e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |