| [ | |
| { | |
| "loss": 2.3629, | |
| "grad_norm": 6.658691883087158, | |
| "learning_rate": 1.2457627118644069e-05, | |
| "epoch": 0.2127659574468085, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 1.4786, | |
| "grad_norm": 4.87250280380249, | |
| "learning_rate": 2.5169491525423728e-05, | |
| "epoch": 0.425531914893617, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 0.515, | |
| "grad_norm": 3.0046627521514893, | |
| "learning_rate": 2.9120151371807e-05, | |
| "epoch": 0.6382978723404256, | |
| "step": 150 | |
| }, | |
| { | |
| "loss": 0.3254, | |
| "grad_norm": 2.944659948348999, | |
| "learning_rate": 2.770104068117313e-05, | |
| "epoch": 0.851063829787234, | |
| "step": 200 | |
| }, | |
| { | |
| "eval_loss": 0.10763410478830338, | |
| "eval_accuracy": 0.972, | |
| "eval_runtime": 98.2742, | |
| "eval_samples_per_second": 25.439, | |
| "eval_steps_per_second": 0.804, | |
| "epoch": 1.0, | |
| "step": 235 | |
| }, | |
| { | |
| "loss": 0.2256, | |
| "grad_norm": 4.162021160125732, | |
| "learning_rate": 2.628192999053926e-05, | |
| "epoch": 1.0638297872340425, | |
| "step": 250 | |
| }, | |
| { | |
| "loss": 0.1403, | |
| "grad_norm": 1.9201328754425049, | |
| "learning_rate": 2.4862819299905392e-05, | |
| "epoch": 1.2765957446808511, | |
| "step": 300 | |
| }, | |
| { | |
| "loss": 0.1532, | |
| "grad_norm": 2.2434935569763184, | |
| "learning_rate": 2.3443708609271523e-05, | |
| "epoch": 1.4893617021276595, | |
| "step": 350 | |
| }, | |
| { | |
| "loss": 0.1162, | |
| "grad_norm": 2.351989984512329, | |
| "learning_rate": 2.2024597918637654e-05, | |
| "epoch": 1.702127659574468, | |
| "step": 400 | |
| }, | |
| { | |
| "loss": 0.1216, | |
| "grad_norm": 2.133737325668335, | |
| "learning_rate": 2.0605487228003786e-05, | |
| "epoch": 1.9148936170212765, | |
| "step": 450 | |
| }, | |
| { | |
| "eval_loss": 0.09042185544967651, | |
| "eval_accuracy": 0.9768, | |
| "eval_runtime": 97.4381, | |
| "eval_samples_per_second": 25.657, | |
| "eval_steps_per_second": 0.811, | |
| "epoch": 2.0, | |
| "step": 470 | |
| }, | |
| { | |
| "loss": 0.0778, | |
| "grad_norm": 0.8670908808708191, | |
| "learning_rate": 1.9186376537369917e-05, | |
| "epoch": 2.127659574468085, | |
| "step": 500 | |
| }, | |
| { | |
| "loss": 0.0433, | |
| "grad_norm": 0.14258132874965668, | |
| "learning_rate": 1.7767265846736048e-05, | |
| "epoch": 2.3404255319148937, | |
| "step": 550 | |
| }, | |
| { | |
| "loss": 0.0369, | |
| "grad_norm": 3.188659191131592, | |
| "learning_rate": 1.634815515610218e-05, | |
| "epoch": 2.5531914893617023, | |
| "step": 600 | |
| }, | |
| { | |
| "loss": 0.0305, | |
| "grad_norm": 2.0361948013305664, | |
| "learning_rate": 1.4929044465468307e-05, | |
| "epoch": 2.7659574468085104, | |
| "step": 650 | |
| }, | |
| { | |
| "loss": 0.0361, | |
| "grad_norm": 1.4521644115447998, | |
| "learning_rate": 1.3509933774834438e-05, | |
| "epoch": 2.978723404255319, | |
| "step": 700 | |
| }, | |
| { | |
| "eval_loss": 0.07695046812295914, | |
| "eval_accuracy": 0.9788, | |
| "eval_runtime": 98.1763, | |
| "eval_samples_per_second": 25.464, | |
| "eval_steps_per_second": 0.805, | |
| "epoch": 3.0, | |
| "step": 705 | |
| }, | |
| { | |
| "loss": 0.0192, | |
| "grad_norm": 0.425351083278656, | |
| "learning_rate": 1.2090823084200568e-05, | |
| "epoch": 3.1914893617021276, | |
| "step": 750 | |
| }, | |
| { | |
| "loss": 0.013, | |
| "grad_norm": 0.42595893144607544, | |
| "learning_rate": 1.0671712393566697e-05, | |
| "epoch": 3.404255319148936, | |
| "step": 800 | |
| }, | |
| { | |
| "loss": 0.0092, | |
| "grad_norm": 0.24749380350112915, | |
| "learning_rate": 9.252601702932829e-06, | |
| "epoch": 3.617021276595745, | |
| "step": 850 | |
| }, | |
| { | |
| "loss": 0.0118, | |
| "grad_norm": 0.6825519800186157, | |
| "learning_rate": 7.83349101229896e-06, | |
| "epoch": 3.829787234042553, | |
| "step": 900 | |
| }, | |
| { | |
| "eval_loss": 0.07638780027627945, | |
| "eval_accuracy": 0.98, | |
| "eval_runtime": 97.9771, | |
| "eval_samples_per_second": 25.516, | |
| "eval_steps_per_second": 0.806, | |
| "epoch": 4.0, | |
| "step": 940 | |
| }, | |
| { | |
| "loss": 0.0061, | |
| "grad_norm": 0.02345215529203415, | |
| "learning_rate": 6.41438032166509e-06, | |
| "epoch": 4.042553191489362, | |
| "step": 950 | |
| }, | |
| { | |
| "loss": 0.0045, | |
| "grad_norm": 0.12952572107315063, | |
| "learning_rate": 4.995269631031221e-06, | |
| "epoch": 4.25531914893617, | |
| "step": 1000 | |
| }, | |
| { | |
| "loss": 0.0048, | |
| "grad_norm": 0.040700629353523254, | |
| "learning_rate": 3.576158940397351e-06, | |
| "epoch": 4.468085106382979, | |
| "step": 1050 | |
| }, | |
| { | |
| "loss": 0.0042, | |
| "grad_norm": 0.03013400174677372, | |
| "learning_rate": 2.1570482497634815e-06, | |
| "epoch": 4.680851063829787, | |
| "step": 1100 | |
| }, | |
| { | |
| "loss": 0.0084, | |
| "grad_norm": 0.03551739081740379, | |
| "learning_rate": 7.379375591296122e-07, | |
| "epoch": 4.8936170212765955, | |
| "step": 1150 | |
| }, | |
| { | |
| "eval_loss": 0.07666528224945068, | |
| "eval_accuracy": 0.9804, | |
| "eval_runtime": 96.8698, | |
| "eval_samples_per_second": 25.808, | |
| "eval_steps_per_second": 0.816, | |
| "epoch": 5.0, | |
| "step": 1175 | |
| }, | |
| { | |
| "train_runtime": 2705.0877, | |
| "train_samples_per_second": 13.863, | |
| "train_steps_per_second": 0.434, | |
| "total_flos": 2.9061579714048e+18, | |
| "train_loss": 0.24456460309789535, | |
| "epoch": 5.0, | |
| "step": 1175 | |
| }, | |
| { | |
| "eval_loss": 0.07666528224945068, | |
| "eval_accuracy": 0.9804, | |
| "eval_runtime": 95.6512, | |
| "eval_samples_per_second": 26.137, | |
| "eval_steps_per_second": 0.826, | |
| "epoch": 5.0, | |
| "step": 1175 | |
| } | |
| ] |