| { | |
| "best_global_step": 6505, | |
| "best_metric": 0.19649724662303925, | |
| "best_model_checkpoint": "/media/user/Expansion1/snowflake-arctic-embed-xs-refusal/checkpoint-6505", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 32525, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07686395080707148, | |
| "grad_norm": 2.155881881713867, | |
| "learning_rate": 4.923289777094543e-05, | |
| "loss": 0.3511, | |
| "num_input_tokens_seen": 512000, | |
| "step": 500, | |
| "train_runtime": 5.3031, | |
| "train_tokens_per_second": 96546.489 | |
| }, | |
| { | |
| "epoch": 0.15372790161414296, | |
| "grad_norm": 0.6236560344696045, | |
| "learning_rate": 4.846425826287471e-05, | |
| "loss": 0.2593, | |
| "num_input_tokens_seen": 1024000, | |
| "step": 1000, | |
| "train_runtime": 10.3436, | |
| "train_tokens_per_second": 98998.124 | |
| }, | |
| { | |
| "epoch": 0.23059185242121444, | |
| "grad_norm": 3.8176653385162354, | |
| "learning_rate": 4.7695618754804e-05, | |
| "loss": 0.2546, | |
| "num_input_tokens_seen": 1536000, | |
| "step": 1500, | |
| "train_runtime": 15.3626, | |
| "train_tokens_per_second": 99983.128 | |
| }, | |
| { | |
| "epoch": 0.3074558032282859, | |
| "grad_norm": 2.538367986679077, | |
| "learning_rate": 4.692697924673328e-05, | |
| "loss": 0.2377, | |
| "num_input_tokens_seen": 2048000, | |
| "step": 2000, | |
| "train_runtime": 20.351, | |
| "train_tokens_per_second": 100634.117 | |
| }, | |
| { | |
| "epoch": 0.3843197540353574, | |
| "grad_norm": 3.922593832015991, | |
| "learning_rate": 4.615833973866257e-05, | |
| "loss": 0.2411, | |
| "num_input_tokens_seen": 2560000, | |
| "step": 2500, | |
| "train_runtime": 25.363, | |
| "train_tokens_per_second": 100934.428 | |
| }, | |
| { | |
| "epoch": 0.4611837048424289, | |
| "grad_norm": 0.728330135345459, | |
| "learning_rate": 4.5389700230591855e-05, | |
| "loss": 0.2278, | |
| "num_input_tokens_seen": 3072000, | |
| "step": 3000, | |
| "train_runtime": 30.387, | |
| "train_tokens_per_second": 101095.729 | |
| }, | |
| { | |
| "epoch": 0.5380476556495004, | |
| "grad_norm": 0.5299816131591797, | |
| "learning_rate": 4.462106072252114e-05, | |
| "loss": 0.2422, | |
| "num_input_tokens_seen": 3584000, | |
| "step": 3500, | |
| "train_runtime": 35.3784, | |
| "train_tokens_per_second": 101304.819 | |
| }, | |
| { | |
| "epoch": 0.6149116064565718, | |
| "grad_norm": 9.030647277832031, | |
| "learning_rate": 4.3852421214450424e-05, | |
| "loss": 0.2269, | |
| "num_input_tokens_seen": 4096000, | |
| "step": 4000, | |
| "train_runtime": 40.3905, | |
| "train_tokens_per_second": 101409.866 | |
| }, | |
| { | |
| "epoch": 0.6917755572636434, | |
| "grad_norm": 1.8069450855255127, | |
| "learning_rate": 4.308378170637971e-05, | |
| "loss": 0.2378, | |
| "num_input_tokens_seen": 4608000, | |
| "step": 4500, | |
| "train_runtime": 45.4094, | |
| "train_tokens_per_second": 101476.838 | |
| }, | |
| { | |
| "epoch": 0.7686395080707148, | |
| "grad_norm": 2.4024786949157715, | |
| "learning_rate": 4.231514219830899e-05, | |
| "loss": 0.2129, | |
| "num_input_tokens_seen": 5120000, | |
| "step": 5000, | |
| "train_runtime": 50.4114, | |
| "train_tokens_per_second": 101564.317 | |
| }, | |
| { | |
| "epoch": 0.8455034588777863, | |
| "grad_norm": 0.8753976821899414, | |
| "learning_rate": 4.1546502690238284e-05, | |
| "loss": 0.2354, | |
| "num_input_tokens_seen": 5632000, | |
| "step": 5500, | |
| "train_runtime": 55.4722, | |
| "train_tokens_per_second": 101528.336 | |
| }, | |
| { | |
| "epoch": 0.9223674096848578, | |
| "grad_norm": 4.247986316680908, | |
| "learning_rate": 4.077786318216756e-05, | |
| "loss": 0.237, | |
| "num_input_tokens_seen": 6144000, | |
| "step": 6000, | |
| "train_runtime": 60.5329, | |
| "train_tokens_per_second": 101498.514 | |
| }, | |
| { | |
| "epoch": 0.9992313604919293, | |
| "grad_norm": 7.1489973068237305, | |
| "learning_rate": 4.000922367409685e-05, | |
| "loss": 0.2286, | |
| "num_input_tokens_seen": 6656000, | |
| "step": 6500, | |
| "train_runtime": 65.5361, | |
| "train_tokens_per_second": 101562.357 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.919369715603382, | |
| "eval_loss": 0.19649724662303925, | |
| "eval_runtime": 5.1167, | |
| "eval_samples_per_second": 2542.665, | |
| "eval_steps_per_second": 317.98, | |
| "num_input_tokens_seen": 6661120, | |
| "step": 6505 | |
| }, | |
| { | |
| "epoch": 1.0760953112990008, | |
| "grad_norm": 10.231003761291504, | |
| "learning_rate": 3.9240584166026136e-05, | |
| "loss": 0.1751, | |
| "num_input_tokens_seen": 7168000, | |
| "step": 7000, | |
| "train_runtime": 75.9746, | |
| "train_tokens_per_second": 94347.276 | |
| }, | |
| { | |
| "epoch": 1.1529592621060722, | |
| "grad_norm": 0.037300433963537216, | |
| "learning_rate": 3.847194465795542e-05, | |
| "loss": 0.1739, | |
| "num_input_tokens_seen": 7680000, | |
| "step": 7500, | |
| "train_runtime": 81.0147, | |
| "train_tokens_per_second": 94797.624 | |
| }, | |
| { | |
| "epoch": 1.2298232129131437, | |
| "grad_norm": 10.88604736328125, | |
| "learning_rate": 3.7703305149884705e-05, | |
| "loss": 0.1923, | |
| "num_input_tokens_seen": 8192000, | |
| "step": 8000, | |
| "train_runtime": 86.0728, | |
| "train_tokens_per_second": 95175.201 | |
| }, | |
| { | |
| "epoch": 1.3066871637202153, | |
| "grad_norm": 14.25737190246582, | |
| "learning_rate": 3.693466564181399e-05, | |
| "loss": 0.1809, | |
| "num_input_tokens_seen": 8704000, | |
| "step": 8500, | |
| "train_runtime": 91.0822, | |
| "train_tokens_per_second": 95562.051 | |
| }, | |
| { | |
| "epoch": 1.3835511145272867, | |
| "grad_norm": 0.3044818639755249, | |
| "learning_rate": 3.6166026133743274e-05, | |
| "loss": 0.1922, | |
| "num_input_tokens_seen": 9216000, | |
| "step": 9000, | |
| "train_runtime": 96.1257, | |
| "train_tokens_per_second": 95874.452 | |
| }, | |
| { | |
| "epoch": 1.4604150653343582, | |
| "grad_norm": 15.291511535644531, | |
| "learning_rate": 3.5397386625672565e-05, | |
| "loss": 0.171, | |
| "num_input_tokens_seen": 9728000, | |
| "step": 9500, | |
| "train_runtime": 101.1575, | |
| "train_tokens_per_second": 96166.85 | |
| }, | |
| { | |
| "epoch": 1.5372790161414298, | |
| "grad_norm": 12.030097007751465, | |
| "learning_rate": 3.462874711760184e-05, | |
| "loss": 0.1779, | |
| "num_input_tokens_seen": 10240000, | |
| "step": 10000, | |
| "train_runtime": 106.2268, | |
| "train_tokens_per_second": 96397.497 | |
| }, | |
| { | |
| "epoch": 1.614142966948501, | |
| "grad_norm": 0.17212723195552826, | |
| "learning_rate": 3.386010760953113e-05, | |
| "loss": 0.176, | |
| "num_input_tokens_seen": 10752000, | |
| "step": 10500, | |
| "train_runtime": 111.2959, | |
| "train_tokens_per_second": 96607.35 | |
| }, | |
| { | |
| "epoch": 1.6910069177555727, | |
| "grad_norm": 35.843482971191406, | |
| "learning_rate": 3.309146810146042e-05, | |
| "loss": 0.1879, | |
| "num_input_tokens_seen": 11264000, | |
| "step": 11000, | |
| "train_runtime": 116.3574, | |
| "train_tokens_per_second": 96805.219 | |
| }, | |
| { | |
| "epoch": 1.767870868562644, | |
| "grad_norm": 0.055776312947273254, | |
| "learning_rate": 3.23228285933897e-05, | |
| "loss": 0.1749, | |
| "num_input_tokens_seen": 11776000, | |
| "step": 11500, | |
| "train_runtime": 121.386, | |
| "train_tokens_per_second": 97012.845 | |
| }, | |
| { | |
| "epoch": 1.8447348193697155, | |
| "grad_norm": 0.48420748114585876, | |
| "learning_rate": 3.1554189085318986e-05, | |
| "loss": 0.1716, | |
| "num_input_tokens_seen": 12288000, | |
| "step": 12000, | |
| "train_runtime": 126.4198, | |
| "train_tokens_per_second": 97200.001 | |
| }, | |
| { | |
| "epoch": 1.9215987701767872, | |
| "grad_norm": 0.3808608949184418, | |
| "learning_rate": 3.078554957724827e-05, | |
| "loss": 0.1819, | |
| "num_input_tokens_seen": 12800000, | |
| "step": 12500, | |
| "train_runtime": 131.456, | |
| "train_tokens_per_second": 97370.996 | |
| }, | |
| { | |
| "epoch": 1.9984627209838586, | |
| "grad_norm": 0.15483863651752472, | |
| "learning_rate": 3.0016910069177555e-05, | |
| "loss": 0.1718, | |
| "num_input_tokens_seen": 13312000, | |
| "step": 13000, | |
| "train_runtime": 136.5089, | |
| "train_tokens_per_second": 97517.416 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9259031514219831, | |
| "eval_loss": 0.27698734402656555, | |
| "eval_runtime": 5.1343, | |
| "eval_samples_per_second": 2533.949, | |
| "eval_steps_per_second": 316.89, | |
| "num_input_tokens_seen": 13322240, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 2.07532667179093, | |
| "grad_norm": 0.06390306353569031, | |
| "learning_rate": 2.9248270561106846e-05, | |
| "loss": 0.1384, | |
| "num_input_tokens_seen": 13824000, | |
| "step": 13500, | |
| "train_runtime": 146.9662, | |
| "train_tokens_per_second": 94062.42 | |
| }, | |
| { | |
| "epoch": 2.1521906225980016, | |
| "grad_norm": 0.09185440093278885, | |
| "learning_rate": 2.8479631053036127e-05, | |
| "loss": 0.123, | |
| "num_input_tokens_seen": 14336000, | |
| "step": 14000, | |
| "train_runtime": 152.0064, | |
| "train_tokens_per_second": 94311.832 | |
| }, | |
| { | |
| "epoch": 2.229054573405073, | |
| "grad_norm": 0.05354034900665283, | |
| "learning_rate": 2.7710991544965414e-05, | |
| "loss": 0.1265, | |
| "num_input_tokens_seen": 14848000, | |
| "step": 14500, | |
| "train_runtime": 157.0491, | |
| "train_tokens_per_second": 94543.682 | |
| }, | |
| { | |
| "epoch": 2.3059185242121445, | |
| "grad_norm": 6.2592926025390625, | |
| "learning_rate": 2.6942352036894695e-05, | |
| "loss": 0.1214, | |
| "num_input_tokens_seen": 15360000, | |
| "step": 15000, | |
| "train_runtime": 162.0991, | |
| "train_tokens_per_second": 94756.854 | |
| }, | |
| { | |
| "epoch": 2.382782475019216, | |
| "grad_norm": 0.3284030854701996, | |
| "learning_rate": 2.6173712528823986e-05, | |
| "loss": 0.1298, | |
| "num_input_tokens_seen": 15872000, | |
| "step": 15500, | |
| "train_runtime": 167.141, | |
| "train_tokens_per_second": 94961.759 | |
| }, | |
| { | |
| "epoch": 2.4596464258262873, | |
| "grad_norm": 0.2101190984249115, | |
| "learning_rate": 2.5405073020753267e-05, | |
| "loss": 0.1279, | |
| "num_input_tokens_seen": 16384000, | |
| "step": 16000, | |
| "train_runtime": 172.1787, | |
| "train_tokens_per_second": 95156.958 | |
| }, | |
| { | |
| "epoch": 2.536510376633359, | |
| "grad_norm": 7.672014236450195, | |
| "learning_rate": 2.463643351268255e-05, | |
| "loss": 0.1435, | |
| "num_input_tokens_seen": 16896000, | |
| "step": 16500, | |
| "train_runtime": 177.2341, | |
| "train_tokens_per_second": 95331.566 | |
| }, | |
| { | |
| "epoch": 2.6133743274404306, | |
| "grad_norm": 37.9052734375, | |
| "learning_rate": 2.3867794004611836e-05, | |
| "loss": 0.123, | |
| "num_input_tokens_seen": 17408000, | |
| "step": 17000, | |
| "train_runtime": 182.2827, | |
| "train_tokens_per_second": 95500.03 | |
| }, | |
| { | |
| "epoch": 2.690238278247502, | |
| "grad_norm": 0.08578933030366898, | |
| "learning_rate": 2.3099154496541124e-05, | |
| "loss": 0.1289, | |
| "num_input_tokens_seen": 17920000, | |
| "step": 17500, | |
| "train_runtime": 187.3355, | |
| "train_tokens_per_second": 95657.272 | |
| }, | |
| { | |
| "epoch": 2.7671022290545735, | |
| "grad_norm": 0.08860859274864197, | |
| "learning_rate": 2.2330514988470408e-05, | |
| "loss": 0.1296, | |
| "num_input_tokens_seen": 18432000, | |
| "step": 18000, | |
| "train_runtime": 192.3781, | |
| "train_tokens_per_second": 95811.329 | |
| }, | |
| { | |
| "epoch": 2.8439661798616447, | |
| "grad_norm": 0.41104796528816223, | |
| "learning_rate": 2.1561875480399692e-05, | |
| "loss": 0.1124, | |
| "num_input_tokens_seen": 18944000, | |
| "step": 18500, | |
| "train_runtime": 197.7595, | |
| "train_tokens_per_second": 95793.134 | |
| }, | |
| { | |
| "epoch": 2.9208301306687163, | |
| "grad_norm": 37.97283172607422, | |
| "learning_rate": 2.079323597232898e-05, | |
| "loss": 0.1382, | |
| "num_input_tokens_seen": 19456000, | |
| "step": 19000, | |
| "train_runtime": 203.2967, | |
| "train_tokens_per_second": 95702.474 | |
| }, | |
| { | |
| "epoch": 2.997694081475788, | |
| "grad_norm": 0.0325402170419693, | |
| "learning_rate": 2.0024596464258264e-05, | |
| "loss": 0.1388, | |
| "num_input_tokens_seen": 19968000, | |
| "step": 19500, | |
| "train_runtime": 208.8029, | |
| "train_tokens_per_second": 95630.843 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9287471176018447, | |
| "eval_loss": 0.31825903058052063, | |
| "eval_runtime": 5.4038, | |
| "eval_samples_per_second": 2407.562, | |
| "eval_steps_per_second": 301.084, | |
| "num_input_tokens_seen": 19983360, | |
| "step": 19515 | |
| }, | |
| { | |
| "epoch": 3.074558032282859, | |
| "grad_norm": 0.05180477350950241, | |
| "learning_rate": 1.925595695618755e-05, | |
| "loss": 0.0823, | |
| "num_input_tokens_seen": 20480000, | |
| "step": 20000, | |
| "train_runtime": 219.9739, | |
| "train_tokens_per_second": 93101.956 | |
| }, | |
| { | |
| "epoch": 3.151421983089931, | |
| "grad_norm": 0.005111335311084986, | |
| "learning_rate": 1.8487317448116833e-05, | |
| "loss": 0.0716, | |
| "num_input_tokens_seen": 20992000, | |
| "step": 20500, | |
| "train_runtime": 225.3567, | |
| "train_tokens_per_second": 93150.099 | |
| }, | |
| { | |
| "epoch": 3.2282859338970025, | |
| "grad_norm": 0.012623129412531853, | |
| "learning_rate": 1.771867794004612e-05, | |
| "loss": 0.0915, | |
| "num_input_tokens_seen": 21504000, | |
| "step": 21000, | |
| "train_runtime": 230.6352, | |
| "train_tokens_per_second": 93238.15 | |
| }, | |
| { | |
| "epoch": 3.3051498847040737, | |
| "grad_norm": 10.89956283569336, | |
| "learning_rate": 1.6950038431975405e-05, | |
| "loss": 0.0783, | |
| "num_input_tokens_seen": 22016000, | |
| "step": 21500, | |
| "train_runtime": 236.059, | |
| "train_tokens_per_second": 93264.831 | |
| }, | |
| { | |
| "epoch": 3.3820138355111453, | |
| "grad_norm": 0.010125258006155491, | |
| "learning_rate": 1.618139892390469e-05, | |
| "loss": 0.0882, | |
| "num_input_tokens_seen": 22528000, | |
| "step": 22000, | |
| "train_runtime": 241.5242, | |
| "train_tokens_per_second": 93274.31 | |
| }, | |
| { | |
| "epoch": 3.458877786318217, | |
| "grad_norm": 0.04097803309559822, | |
| "learning_rate": 1.5412759415833973e-05, | |
| "loss": 0.095, | |
| "num_input_tokens_seen": 23040000, | |
| "step": 22500, | |
| "train_runtime": 247.0361, | |
| "train_tokens_per_second": 93265.737 | |
| }, | |
| { | |
| "epoch": 3.535741737125288, | |
| "grad_norm": 0.3172767758369446, | |
| "learning_rate": 1.464411990776326e-05, | |
| "loss": 0.0847, | |
| "num_input_tokens_seen": 23552000, | |
| "step": 23000, | |
| "train_runtime": 252.366, | |
| "train_tokens_per_second": 93324.783 | |
| }, | |
| { | |
| "epoch": 3.61260568793236, | |
| "grad_norm": 0.0049354820512235165, | |
| "learning_rate": 1.3875480399692545e-05, | |
| "loss": 0.0758, | |
| "num_input_tokens_seen": 24064000, | |
| "step": 23500, | |
| "train_runtime": 257.7685, | |
| "train_tokens_per_second": 93355.1 | |
| }, | |
| { | |
| "epoch": 3.689469638739431, | |
| "grad_norm": 0.09353843331336975, | |
| "learning_rate": 1.310684089162183e-05, | |
| "loss": 0.0812, | |
| "num_input_tokens_seen": 24576000, | |
| "step": 24000, | |
| "train_runtime": 263.0331, | |
| "train_tokens_per_second": 93433.102 | |
| }, | |
| { | |
| "epoch": 3.7663335895465027, | |
| "grad_norm": 13.593195915222168, | |
| "learning_rate": 1.2338201383551116e-05, | |
| "loss": 0.0882, | |
| "num_input_tokens_seen": 25088000, | |
| "step": 24500, | |
| "train_runtime": 268.3188, | |
| "train_tokens_per_second": 93500.701 | |
| }, | |
| { | |
| "epoch": 3.8431975403535743, | |
| "grad_norm": 0.12645399570465088, | |
| "learning_rate": 1.15695618754804e-05, | |
| "loss": 0.0862, | |
| "num_input_tokens_seen": 25600000, | |
| "step": 25000, | |
| "train_runtime": 273.6907, | |
| "train_tokens_per_second": 93536.259 | |
| }, | |
| { | |
| "epoch": 3.9200614911606455, | |
| "grad_norm": 0.015061162412166595, | |
| "learning_rate": 1.0800922367409686e-05, | |
| "loss": 0.0825, | |
| "num_input_tokens_seen": 26112000, | |
| "step": 25500, | |
| "train_runtime": 279.0408, | |
| "train_tokens_per_second": 93577.699 | |
| }, | |
| { | |
| "epoch": 3.996925441967717, | |
| "grad_norm": 0.020776506513357162, | |
| "learning_rate": 1.003228285933897e-05, | |
| "loss": 0.0772, | |
| "num_input_tokens_seen": 26624000, | |
| "step": 26000, | |
| "train_runtime": 284.4194, | |
| "train_tokens_per_second": 93608.25 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9270561106840891, | |
| "eval_loss": 0.43025368452072144, | |
| "eval_runtime": 5.3244, | |
| "eval_samples_per_second": 2443.487, | |
| "eval_steps_per_second": 305.577, | |
| "num_input_tokens_seen": 26644480, | |
| "step": 26020 | |
| }, | |
| { | |
| "epoch": 4.073789392774788, | |
| "grad_norm": 0.09127756953239441, | |
| "learning_rate": 9.263643351268256e-06, | |
| "loss": 0.0615, | |
| "num_input_tokens_seen": 27136000, | |
| "step": 26500, | |
| "train_runtime": 295.1257, | |
| "train_tokens_per_second": 91947.265 | |
| }, | |
| { | |
| "epoch": 4.15065334358186, | |
| "grad_norm": 0.11556842923164368, | |
| "learning_rate": 8.49500384319754e-06, | |
| "loss": 0.0444, | |
| "num_input_tokens_seen": 27648000, | |
| "step": 27000, | |
| "train_runtime": 300.149, | |
| "train_tokens_per_second": 92114.259 | |
| }, | |
| { | |
| "epoch": 4.227517294388932, | |
| "grad_norm": 0.09005430340766907, | |
| "learning_rate": 7.726364335126826e-06, | |
| "loss": 0.0434, | |
| "num_input_tokens_seen": 28160000, | |
| "step": 27500, | |
| "train_runtime": 305.2327, | |
| "train_tokens_per_second": 92257.495 | |
| }, | |
| { | |
| "epoch": 4.304381245196003, | |
| "grad_norm": 0.004569609649479389, | |
| "learning_rate": 6.9577248270561115e-06, | |
| "loss": 0.0352, | |
| "num_input_tokens_seen": 28672000, | |
| "step": 28000, | |
| "train_runtime": 310.2962, | |
| "train_tokens_per_second": 92402.026 | |
| }, | |
| { | |
| "epoch": 4.381245196003075, | |
| "grad_norm": 1.524936318397522, | |
| "learning_rate": 6.189085318985397e-06, | |
| "loss": 0.06, | |
| "num_input_tokens_seen": 29184000, | |
| "step": 28500, | |
| "train_runtime": 315.365, | |
| "train_tokens_per_second": 92540.388 | |
| }, | |
| { | |
| "epoch": 4.458109146810146, | |
| "grad_norm": 0.014427268877625465, | |
| "learning_rate": 5.420445810914681e-06, | |
| "loss": 0.0492, | |
| "num_input_tokens_seen": 29696000, | |
| "step": 29000, | |
| "train_runtime": 320.4274, | |
| "train_tokens_per_second": 92676.208 | |
| }, | |
| { | |
| "epoch": 4.534973097617217, | |
| "grad_norm": 0.07355033606290817, | |
| "learning_rate": 4.651806302843966e-06, | |
| "loss": 0.0572, | |
| "num_input_tokens_seen": 30208000, | |
| "step": 29500, | |
| "train_runtime": 325.4966, | |
| "train_tokens_per_second": 92805.883 | |
| }, | |
| { | |
| "epoch": 4.611837048424289, | |
| "grad_norm": 7.176478385925293, | |
| "learning_rate": 3.883166794773251e-06, | |
| "loss": 0.0442, | |
| "num_input_tokens_seen": 30720000, | |
| "step": 30000, | |
| "train_runtime": 330.568, | |
| "train_tokens_per_second": 92930.968 | |
| }, | |
| { | |
| "epoch": 4.688700999231361, | |
| "grad_norm": 0.030576860532164574, | |
| "learning_rate": 3.114527286702537e-06, | |
| "loss": 0.0465, | |
| "num_input_tokens_seen": 31232000, | |
| "step": 30500, | |
| "train_runtime": 335.6493, | |
| "train_tokens_per_second": 93049.488 | |
| }, | |
| { | |
| "epoch": 4.765564950038432, | |
| "grad_norm": 0.005597515497356653, | |
| "learning_rate": 2.345887778631822e-06, | |
| "loss": 0.0487, | |
| "num_input_tokens_seen": 31744000, | |
| "step": 31000, | |
| "train_runtime": 340.8069, | |
| "train_tokens_per_second": 93143.641 | |
| }, | |
| { | |
| "epoch": 4.842428900845503, | |
| "grad_norm": 0.021089155226945877, | |
| "learning_rate": 1.5772482705611067e-06, | |
| "loss": 0.0557, | |
| "num_input_tokens_seen": 32256000, | |
| "step": 31500, | |
| "train_runtime": 346.0097, | |
| "train_tokens_per_second": 93222.824 | |
| }, | |
| { | |
| "epoch": 4.919292851652575, | |
| "grad_norm": 0.023516027256846428, | |
| "learning_rate": 8.086087624903922e-07, | |
| "loss": 0.0399, | |
| "num_input_tokens_seen": 32768000, | |
| "step": 32000, | |
| "train_runtime": 351.2595, | |
| "train_tokens_per_second": 93287.165 | |
| }, | |
| { | |
| "epoch": 4.996156802459646, | |
| "grad_norm": 0.007581554353237152, | |
| "learning_rate": 3.996925441967718e-08, | |
| "loss": 0.0532, | |
| "num_input_tokens_seen": 33280000, | |
| "step": 32500, | |
| "train_runtime": 356.5514, | |
| "train_tokens_per_second": 93338.566 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9264411990776326, | |
| "eval_loss": 0.4613732397556305, | |
| "eval_runtime": 5.2997, | |
| "eval_samples_per_second": 2454.857, | |
| "eval_steps_per_second": 306.999, | |
| "num_input_tokens_seen": 33305600, | |
| "step": 32525 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "num_input_tokens_seen": 33305600, | |
| "step": 32525, | |
| "total_flos": 2157435918643200.0, | |
| "train_loss": 0.1371641572881533, | |
| "train_runtime": 362.4532, | |
| "train_samples_per_second": 717.886, | |
| "train_steps_per_second": 89.736 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 32525, | |
| "num_input_tokens_seen": 33305600, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2157435918643200.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |