[ { "loss": 2.3629, "grad_norm": 6.658691883087158, "learning_rate": 1.2457627118644069e-05, "epoch": 0.2127659574468085, "step": 50 }, { "loss": 1.4786, "grad_norm": 4.87250280380249, "learning_rate": 2.5169491525423728e-05, "epoch": 0.425531914893617, "step": 100 }, { "loss": 0.515, "grad_norm": 3.0046627521514893, "learning_rate": 2.9120151371807e-05, "epoch": 0.6382978723404256, "step": 150 }, { "loss": 0.3254, "grad_norm": 2.944659948348999, "learning_rate": 2.770104068117313e-05, "epoch": 0.851063829787234, "step": 200 }, { "eval_loss": 0.10763410478830338, "eval_accuracy": 0.972, "eval_runtime": 98.2742, "eval_samples_per_second": 25.439, "eval_steps_per_second": 0.804, "epoch": 1.0, "step": 235 }, { "loss": 0.2256, "grad_norm": 4.162021160125732, "learning_rate": 2.628192999053926e-05, "epoch": 1.0638297872340425, "step": 250 }, { "loss": 0.1403, "grad_norm": 1.9201328754425049, "learning_rate": 2.4862819299905392e-05, "epoch": 1.2765957446808511, "step": 300 }, { "loss": 0.1532, "grad_norm": 2.2434935569763184, "learning_rate": 2.3443708609271523e-05, "epoch": 1.4893617021276595, "step": 350 }, { "loss": 0.1162, "grad_norm": 2.351989984512329, "learning_rate": 2.2024597918637654e-05, "epoch": 1.702127659574468, "step": 400 }, { "loss": 0.1216, "grad_norm": 2.133737325668335, "learning_rate": 2.0605487228003786e-05, "epoch": 1.9148936170212765, "step": 450 }, { "eval_loss": 0.09042185544967651, "eval_accuracy": 0.9768, "eval_runtime": 97.4381, "eval_samples_per_second": 25.657, "eval_steps_per_second": 0.811, "epoch": 2.0, "step": 470 }, { "loss": 0.0778, "grad_norm": 0.8670908808708191, "learning_rate": 1.9186376537369917e-05, "epoch": 2.127659574468085, "step": 500 }, { "loss": 0.0433, "grad_norm": 0.14258132874965668, "learning_rate": 1.7767265846736048e-05, "epoch": 2.3404255319148937, "step": 550 }, { "loss": 0.0369, "grad_norm": 3.188659191131592, "learning_rate": 1.634815515610218e-05, "epoch": 2.5531914893617023, "step": 600 }, { "loss": 0.0305, "grad_norm": 2.0361948013305664, "learning_rate": 1.4929044465468307e-05, "epoch": 2.7659574468085104, "step": 650 }, { "loss": 0.0361, "grad_norm": 1.4521644115447998, "learning_rate": 1.3509933774834438e-05, "epoch": 2.978723404255319, "step": 700 }, { "eval_loss": 0.07695046812295914, "eval_accuracy": 0.9788, "eval_runtime": 98.1763, "eval_samples_per_second": 25.464, "eval_steps_per_second": 0.805, "epoch": 3.0, "step": 705 }, { "loss": 0.0192, "grad_norm": 0.425351083278656, "learning_rate": 1.2090823084200568e-05, "epoch": 3.1914893617021276, "step": 750 }, { "loss": 0.013, "grad_norm": 0.42595893144607544, "learning_rate": 1.0671712393566697e-05, "epoch": 3.404255319148936, "step": 800 }, { "loss": 0.0092, "grad_norm": 0.24749380350112915, "learning_rate": 9.252601702932829e-06, "epoch": 3.617021276595745, "step": 850 }, { "loss": 0.0118, "grad_norm": 0.6825519800186157, "learning_rate": 7.83349101229896e-06, "epoch": 3.829787234042553, "step": 900 }, { "eval_loss": 0.07638780027627945, "eval_accuracy": 0.98, "eval_runtime": 97.9771, "eval_samples_per_second": 25.516, "eval_steps_per_second": 0.806, "epoch": 4.0, "step": 940 }, { "loss": 0.0061, "grad_norm": 0.02345215529203415, "learning_rate": 6.41438032166509e-06, "epoch": 4.042553191489362, "step": 950 }, { "loss": 0.0045, "grad_norm": 0.12952572107315063, "learning_rate": 4.995269631031221e-06, "epoch": 4.25531914893617, "step": 1000 }, { "loss": 0.0048, "grad_norm": 0.040700629353523254, "learning_rate": 3.576158940397351e-06, "epoch": 4.468085106382979, "step": 1050 }, { "loss": 0.0042, "grad_norm": 0.03013400174677372, "learning_rate": 2.1570482497634815e-06, "epoch": 4.680851063829787, "step": 1100 }, { "loss": 0.0084, "grad_norm": 0.03551739081740379, "learning_rate": 7.379375591296122e-07, "epoch": 4.8936170212765955, "step": 1150 }, { "eval_loss": 0.07666528224945068, "eval_accuracy": 0.9804, "eval_runtime": 96.8698, "eval_samples_per_second": 25.808, "eval_steps_per_second": 0.816, "epoch": 5.0, "step": 1175 }, { "train_runtime": 2705.0877, "train_samples_per_second": 13.863, "train_steps_per_second": 0.434, "total_flos": 2.9061579714048e+18, "train_loss": 0.24456460309789535, "epoch": 5.0, "step": 1175 }, { "eval_loss": 0.07666528224945068, "eval_accuracy": 0.9804, "eval_runtime": 95.6512, "eval_samples_per_second": 26.137, "eval_steps_per_second": 0.826, "epoch": 5.0, "step": 1175 } ]