| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 5630, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 6.937275985663082e-06, | |
| "loss": 0.6362, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.7197197079658508, | |
| "eval_loss": 0.5481122136116028, | |
| "eval_runtime": 6.2072, | |
| "eval_samples_per_second": 160.941, | |
| "eval_steps_per_second": 10.149, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 6.811827956989247e-06, | |
| "loss": 0.4264, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.8008008003234863, | |
| "eval_loss": 0.4550396203994751, | |
| "eval_runtime": 6.2195, | |
| "eval_samples_per_second": 160.623, | |
| "eval_steps_per_second": 10.129, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 6.6863799283154114e-06, | |
| "loss": 0.4174, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.7867867946624756, | |
| "eval_loss": 0.452409952878952, | |
| "eval_runtime": 6.2183, | |
| "eval_samples_per_second": 160.655, | |
| "eval_steps_per_second": 10.131, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 6.560931899641577e-06, | |
| "loss": 0.4197, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.7917917966842651, | |
| "eval_loss": 0.4586125910282135, | |
| "eval_runtime": 6.2441, | |
| "eval_samples_per_second": 159.991, | |
| "eval_steps_per_second": 10.09, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.435483870967742e-06, | |
| "loss": 0.3819, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.8078078031539917, | |
| "eval_loss": 0.4367608428001404, | |
| "eval_runtime": 6.2213, | |
| "eval_samples_per_second": 160.577, | |
| "eval_steps_per_second": 10.126, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.310035842293907e-06, | |
| "loss": 0.3558, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_accuracy": 0.8068068027496338, | |
| "eval_loss": 0.4524727463722229, | |
| "eval_runtime": 6.2342, | |
| "eval_samples_per_second": 160.246, | |
| "eval_steps_per_second": 10.106, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 6.184587813620071e-06, | |
| "loss": 0.2982, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_accuracy": 0.792792797088623, | |
| "eval_loss": 0.49992287158966064, | |
| "eval_runtime": 6.206, | |
| "eval_samples_per_second": 160.973, | |
| "eval_steps_per_second": 10.151, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 6.059139784946236e-06, | |
| "loss": 0.2885, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_accuracy": 0.8108108043670654, | |
| "eval_loss": 0.5129059553146362, | |
| "eval_runtime": 6.2199, | |
| "eval_samples_per_second": 160.613, | |
| "eval_steps_per_second": 10.129, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 5.933691756272401e-06, | |
| "loss": 0.253, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 0.8208208084106445, | |
| "eval_loss": 0.5872611403465271, | |
| "eval_runtime": 6.2332, | |
| "eval_samples_per_second": 160.27, | |
| "eval_steps_per_second": 10.107, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 5.8082437275985665e-06, | |
| "loss": 0.3354, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_accuracy": 0.8178178071975708, | |
| "eval_loss": 0.4244420826435089, | |
| "eval_runtime": 6.2275, | |
| "eval_samples_per_second": 160.417, | |
| "eval_steps_per_second": 10.116, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 5.682795698924731e-06, | |
| "loss": 0.3083, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_accuracy": 0.8058058023452759, | |
| "eval_loss": 0.4852960705757141, | |
| "eval_runtime": 6.2193, | |
| "eval_samples_per_second": 160.63, | |
| "eval_steps_per_second": 10.13, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 5.557347670250896e-06, | |
| "loss": 0.2301, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_accuracy": 0.8018018007278442, | |
| "eval_loss": 0.7208853960037231, | |
| "eval_runtime": 6.2021, | |
| "eval_samples_per_second": 161.075, | |
| "eval_steps_per_second": 10.158, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 5.431899641577061e-06, | |
| "loss": 0.2167, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_accuracy": 0.7777777910232544, | |
| "eval_loss": 0.8089737892150879, | |
| "eval_runtime": 6.2037, | |
| "eval_samples_per_second": 161.034, | |
| "eval_steps_per_second": 10.155, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 5.306451612903225e-06, | |
| "loss": 0.1863, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_accuracy": 0.8038038015365601, | |
| "eval_loss": 0.6812323927879333, | |
| "eval_runtime": 6.2398, | |
| "eval_samples_per_second": 160.102, | |
| "eval_steps_per_second": 10.097, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.181003584229391e-06, | |
| "loss": 0.2181, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_accuracy": 0.8138138055801392, | |
| "eval_loss": 0.6958026885986328, | |
| "eval_runtime": 6.2122, | |
| "eval_samples_per_second": 160.812, | |
| "eval_steps_per_second": 10.141, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 5.0555555555555555e-06, | |
| "loss": 0.2159, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_accuracy": 0.8118118047714233, | |
| "eval_loss": 0.6314735412597656, | |
| "eval_runtime": 6.2306, | |
| "eval_samples_per_second": 160.337, | |
| "eval_steps_per_second": 10.111, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 4.930107526881721e-06, | |
| "loss": 0.1828, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "eval_accuracy": 0.8138138055801392, | |
| "eval_loss": 0.7173236608505249, | |
| "eval_runtime": 6.2107, | |
| "eval_samples_per_second": 160.851, | |
| "eval_steps_per_second": 10.144, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 4.804659498207885e-06, | |
| "loss": 0.1287, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_accuracy": 0.8018018007278442, | |
| "eval_loss": 0.9080932140350342, | |
| "eval_runtime": 6.2027, | |
| "eval_samples_per_second": 161.06, | |
| "eval_steps_per_second": 10.157, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 4.67921146953405e-06, | |
| "loss": 0.1711, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "eval_accuracy": 0.8068068027496338, | |
| "eval_loss": 0.8858422040939331, | |
| "eval_runtime": 6.2188, | |
| "eval_samples_per_second": 160.641, | |
| "eval_steps_per_second": 10.131, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 4.553763440860215e-06, | |
| "loss": 0.1598, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_accuracy": 0.8028028011322021, | |
| "eval_loss": 0.7877860069274902, | |
| "eval_runtime": 6.2062, | |
| "eval_samples_per_second": 160.967, | |
| "eval_steps_per_second": 10.151, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 4.42831541218638e-06, | |
| "loss": 0.1467, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "eval_accuracy": 0.7947947978973389, | |
| "eval_loss": 0.900332510471344, | |
| "eval_runtime": 6.2358, | |
| "eval_samples_per_second": 160.203, | |
| "eval_steps_per_second": 10.103, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 4.302867383512545e-06, | |
| "loss": 0.127, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "eval_accuracy": 0.804804801940918, | |
| "eval_loss": 0.9066368341445923, | |
| "eval_runtime": 6.2129, | |
| "eval_samples_per_second": 160.795, | |
| "eval_steps_per_second": 10.14, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 4.17741935483871e-06, | |
| "loss": 0.1134, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "eval_accuracy": 0.8118118047714233, | |
| "eval_loss": 0.9645766615867615, | |
| "eval_runtime": 6.2157, | |
| "eval_samples_per_second": 160.721, | |
| "eval_steps_per_second": 10.136, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 4.051971326164874e-06, | |
| "loss": 0.1017, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "eval_accuracy": 0.804804801940918, | |
| "eval_loss": 0.9778422713279724, | |
| "eval_runtime": 6.2303, | |
| "eval_samples_per_second": 160.346, | |
| "eval_steps_per_second": 10.112, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 3.926523297491039e-06, | |
| "loss": 0.085, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "eval_accuracy": 0.8088088035583496, | |
| "eval_loss": 1.0528582334518433, | |
| "eval_runtime": 6.238, | |
| "eval_samples_per_second": 160.149, | |
| "eval_steps_per_second": 10.099, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 3.801075268817204e-06, | |
| "loss": 0.0996, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "eval_accuracy": 0.8058058023452759, | |
| "eval_loss": 1.0082268714904785, | |
| "eval_runtime": 6.2065, | |
| "eval_samples_per_second": 160.961, | |
| "eval_steps_per_second": 10.151, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 3.6756272401433694e-06, | |
| "loss": 0.1054, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_accuracy": 0.8108108043670654, | |
| "eval_loss": 0.9697705507278442, | |
| "eval_runtime": 6.2348, | |
| "eval_samples_per_second": 160.231, | |
| "eval_steps_per_second": 10.105, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 3.5501792114695336e-06, | |
| "loss": 0.1375, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "eval_accuracy": 0.804804801940918, | |
| "eval_loss": 0.9333746433258057, | |
| "eval_runtime": 6.2109, | |
| "eval_samples_per_second": 160.846, | |
| "eval_steps_per_second": 10.143, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 3.4247311827956988e-06, | |
| "loss": 0.0487, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "eval_accuracy": 0.8108108043670654, | |
| "eval_loss": 1.1273365020751953, | |
| "eval_runtime": 6.2065, | |
| "eval_samples_per_second": 160.961, | |
| "eval_steps_per_second": 10.151, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 3.299283154121864e-06, | |
| "loss": 0.0611, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "eval_accuracy": 0.8058058023452759, | |
| "eval_loss": 1.1528337001800537, | |
| "eval_runtime": 6.2119, | |
| "eval_samples_per_second": 160.821, | |
| "eval_steps_per_second": 10.142, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 3.1738351254480286e-06, | |
| "loss": 0.0668, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "eval_accuracy": 0.8118118047714233, | |
| "eval_loss": 1.0147671699523926, | |
| "eval_runtime": 6.2218, | |
| "eval_samples_per_second": 160.564, | |
| "eval_steps_per_second": 10.126, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 3.0483870967741937e-06, | |
| "loss": 0.0582, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "eval_accuracy": 0.8108108043670654, | |
| "eval_loss": 1.1332666873931885, | |
| "eval_runtime": 6.2186, | |
| "eval_samples_per_second": 160.648, | |
| "eval_steps_per_second": 10.131, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 2.9229390681003584e-06, | |
| "loss": 0.0869, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "eval_accuracy": 0.8088088035583496, | |
| "eval_loss": 1.060727596282959, | |
| "eval_runtime": 6.1932, | |
| "eval_samples_per_second": 161.305, | |
| "eval_steps_per_second": 10.172, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 2.797491039426523e-06, | |
| "loss": 0.0623, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "eval_accuracy": 0.8068068027496338, | |
| "eval_loss": 1.1880476474761963, | |
| "eval_runtime": 6.2192, | |
| "eval_samples_per_second": 160.631, | |
| "eval_steps_per_second": 10.13, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 2.6720430107526883e-06, | |
| "loss": 0.0317, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "eval_accuracy": 0.8008008003234863, | |
| "eval_loss": 1.2836244106292725, | |
| "eval_runtime": 6.2079, | |
| "eval_samples_per_second": 160.925, | |
| "eval_steps_per_second": 10.148, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 2.546594982078853e-06, | |
| "loss": 0.0546, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "eval_accuracy": 0.8058058023452759, | |
| "eval_loss": 1.2147704362869263, | |
| "eval_runtime": 6.2243, | |
| "eval_samples_per_second": 160.501, | |
| "eval_steps_per_second": 10.122, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 2.4211469534050177e-06, | |
| "loss": 0.0486, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "eval_accuracy": 0.8008008003234863, | |
| "eval_loss": 1.334807276725769, | |
| "eval_runtime": 6.1963, | |
| "eval_samples_per_second": 161.225, | |
| "eval_steps_per_second": 10.167, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 2.2956989247311828e-06, | |
| "loss": 0.0332, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "eval_accuracy": 0.8018018007278442, | |
| "eval_loss": 1.3734461069107056, | |
| "eval_runtime": 6.3321, | |
| "eval_samples_per_second": 157.768, | |
| "eval_steps_per_second": 9.949, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 2.1702508960573475e-06, | |
| "loss": 0.051, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "eval_accuracy": 0.7977977991104126, | |
| "eval_loss": 1.2966439723968506, | |
| "eval_runtime": 6.2073, | |
| "eval_samples_per_second": 160.94, | |
| "eval_steps_per_second": 10.149, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 2.044802867383512e-06, | |
| "loss": 0.0217, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "eval_accuracy": 0.804804801940918, | |
| "eval_loss": 1.385273814201355, | |
| "eval_runtime": 6.2117, | |
| "eval_samples_per_second": 160.826, | |
| "eval_steps_per_second": 10.142, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.9193548387096773e-06, | |
| "loss": 0.0109, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "eval_accuracy": 0.8068068027496338, | |
| "eval_loss": 1.480326533317566, | |
| "eval_runtime": 6.2106, | |
| "eval_samples_per_second": 160.854, | |
| "eval_steps_per_second": 10.144, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 1.793906810035842e-06, | |
| "loss": 0.0345, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "eval_accuracy": 0.7997997999191284, | |
| "eval_loss": 1.4906260967254639, | |
| "eval_runtime": 6.2002, | |
| "eval_samples_per_second": 161.124, | |
| "eval_steps_per_second": 10.161, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 1.6684587813620071e-06, | |
| "loss": 0.0365, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "eval_accuracy": 0.8028028011322021, | |
| "eval_loss": 1.4347106218338013, | |
| "eval_runtime": 6.2133, | |
| "eval_samples_per_second": 160.783, | |
| "eval_steps_per_second": 10.139, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 1.543010752688172e-06, | |
| "loss": 0.0265, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "eval_accuracy": 0.8128128051757812, | |
| "eval_loss": 1.3976863622665405, | |
| "eval_runtime": 6.224, | |
| "eval_samples_per_second": 160.508, | |
| "eval_steps_per_second": 10.122, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 1.417562724014337e-06, | |
| "loss": 0.0257, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "eval_accuracy": 0.8108108043670654, | |
| "eval_loss": 1.370467185974121, | |
| "eval_runtime": 6.2313, | |
| "eval_samples_per_second": 160.321, | |
| "eval_steps_per_second": 10.11, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 1.2921146953405017e-06, | |
| "loss": 0.0036, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "eval_accuracy": 0.8168168067932129, | |
| "eval_loss": 1.4352822303771973, | |
| "eval_runtime": 6.2072, | |
| "eval_samples_per_second": 160.943, | |
| "eval_steps_per_second": 10.15, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 1.1666666666666666e-06, | |
| "loss": 0.0269, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "eval_accuracy": 0.8068068027496338, | |
| "eval_loss": 1.4826140403747559, | |
| "eval_runtime": 6.2178, | |
| "eval_samples_per_second": 160.669, | |
| "eval_steps_per_second": 10.132, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 1.0412186379928315e-06, | |
| "loss": 0.0231, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "eval_accuracy": 0.8118118047714233, | |
| "eval_loss": 1.4810999631881714, | |
| "eval_runtime": 6.3061, | |
| "eval_samples_per_second": 158.417, | |
| "eval_steps_per_second": 9.99, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 9.157706093189965e-07, | |
| "loss": 0.0204, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "eval_accuracy": 0.8028028011322021, | |
| "eval_loss": 1.5245323181152344, | |
| "eval_runtime": 6.2057, | |
| "eval_samples_per_second": 160.982, | |
| "eval_steps_per_second": 10.152, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 7.903225806451612e-07, | |
| "loss": 0.0263, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "eval_accuracy": 0.8018018007278442, | |
| "eval_loss": 1.5123308897018433, | |
| "eval_runtime": 6.2053, | |
| "eval_samples_per_second": 160.991, | |
| "eval_steps_per_second": 10.153, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 6.648745519713261e-07, | |
| "loss": 0.0138, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "eval_accuracy": 0.8028028011322021, | |
| "eval_loss": 1.51128089427948, | |
| "eval_runtime": 6.2898, | |
| "eval_samples_per_second": 158.83, | |
| "eval_steps_per_second": 10.016, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "learning_rate": 5.39426523297491e-07, | |
| "loss": 0.0089, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "eval_accuracy": 0.7977977991104126, | |
| "eval_loss": 1.5846397876739502, | |
| "eval_runtime": 6.2124, | |
| "eval_samples_per_second": 160.808, | |
| "eval_steps_per_second": 10.141, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 4.1397849462365595e-07, | |
| "loss": 0.029, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "eval_accuracy": 0.8008008003234863, | |
| "eval_loss": 1.5361814498901367, | |
| "eval_runtime": 6.2541, | |
| "eval_samples_per_second": 159.736, | |
| "eval_steps_per_second": 10.073, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 2.8853046594982076e-07, | |
| "loss": 0.0058, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "eval_accuracy": 0.8018018007278442, | |
| "eval_loss": 1.5759379863739014, | |
| "eval_runtime": 6.221, | |
| "eval_samples_per_second": 160.585, | |
| "eval_steps_per_second": 10.127, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 1.6308243727598568e-07, | |
| "loss": 0.0084, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "eval_accuracy": 0.8018018007278442, | |
| "eval_loss": 1.5678976774215698, | |
| "eval_runtime": 6.2009, | |
| "eval_samples_per_second": 161.105, | |
| "eval_steps_per_second": 10.16, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "learning_rate": 3.763440860215054e-08, | |
| "loss": 0.0065, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "eval_accuracy": 0.8028028011322021, | |
| "eval_loss": 1.568334937095642, | |
| "eval_runtime": 6.2439, | |
| "eval_samples_per_second": 159.996, | |
| "eval_steps_per_second": 10.09, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 5630, | |
| "total_flos": 4.193719446528e+16, | |
| "train_loss": 0.13640729715885533, | |
| "train_runtime": 2182.3127, | |
| "train_samples_per_second": 41.241, | |
| "train_steps_per_second": 2.58 | |
| } | |
| ], | |
| "max_steps": 5630, | |
| "num_train_epochs": 10, | |
| "total_flos": 4.193719446528e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |