| { |
| "best_metric": 0.7272727272727273, |
| "best_model_checkpoint": "deit-base-distilled-patch16-224-hasta-85-fold1/checkpoint-3", |
| "epoch": 100.0, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.09090909090909091, |
| "eval_loss": 1.2771788835525513, |
| "eval_runtime": 0.1734, |
| "eval_samples_per_second": 63.434, |
| "eval_steps_per_second": 5.767, |
| "step": 1 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.18181818181818182, |
| "eval_loss": 1.1447755098342896, |
| "eval_runtime": 0.1646, |
| "eval_samples_per_second": 66.839, |
| "eval_steps_per_second": 6.076, |
| "step": 2 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 0.9744274020195007, |
| "eval_runtime": 0.1717, |
| "eval_samples_per_second": 64.083, |
| "eval_steps_per_second": 5.826, |
| "step": 3 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 0.9233787655830383, |
| "eval_runtime": 0.172, |
| "eval_samples_per_second": 63.969, |
| "eval_steps_per_second": 5.815, |
| "step": 4 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.0760316848754883, |
| "eval_runtime": 0.1729, |
| "eval_samples_per_second": 63.616, |
| "eval_steps_per_second": 5.783, |
| "step": 5 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.3222259283065796, |
| "eval_runtime": 0.1683, |
| "eval_samples_per_second": 65.343, |
| "eval_steps_per_second": 5.94, |
| "step": 6 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.5247870683670044, |
| "eval_runtime": 0.1317, |
| "eval_samples_per_second": 83.521, |
| "eval_steps_per_second": 7.593, |
| "step": 7 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6138737201690674, |
| "eval_runtime": 0.1738, |
| "eval_samples_per_second": 63.299, |
| "eval_steps_per_second": 5.754, |
| "step": 8 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.671146035194397, |
| "eval_runtime": 0.1801, |
| "eval_samples_per_second": 61.094, |
| "eval_steps_per_second": 5.554, |
| "step": 9 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.1924768686294556, |
| "learning_rate": 5e-05, |
| "loss": 0.3554, |
| "step": 10 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7354072332382202, |
| "eval_runtime": 0.1604, |
| "eval_samples_per_second": 68.592, |
| "eval_steps_per_second": 6.236, |
| "step": 10 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6721187829971313, |
| "eval_runtime": 0.1745, |
| "eval_samples_per_second": 63.042, |
| "eval_steps_per_second": 5.731, |
| "step": 11 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.5987907648086548, |
| "eval_runtime": 0.1742, |
| "eval_samples_per_second": 63.135, |
| "eval_steps_per_second": 5.74, |
| "step": 12 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.5959930419921875, |
| "eval_runtime": 0.1674, |
| "eval_samples_per_second": 65.72, |
| "eval_steps_per_second": 5.975, |
| "step": 13 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.583227515220642, |
| "eval_runtime": 0.1711, |
| "eval_samples_per_second": 64.284, |
| "eval_steps_per_second": 5.844, |
| "step": 14 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.568271517753601, |
| "eval_runtime": 0.17, |
| "eval_samples_per_second": 64.702, |
| "eval_steps_per_second": 5.882, |
| "step": 15 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.5774298906326294, |
| "eval_runtime": 0.1701, |
| "eval_samples_per_second": 64.672, |
| "eval_steps_per_second": 5.879, |
| "step": 16 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6467945575714111, |
| "eval_runtime": 0.1692, |
| "eval_samples_per_second": 65.014, |
| "eval_steps_per_second": 5.91, |
| "step": 17 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7090667486190796, |
| "eval_runtime": 0.1688, |
| "eval_samples_per_second": 65.178, |
| "eval_steps_per_second": 5.925, |
| "step": 18 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7276203632354736, |
| "eval_runtime": 0.1682, |
| "eval_samples_per_second": 65.379, |
| "eval_steps_per_second": 5.944, |
| "step": 19 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 1.0401579141616821, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.1335, |
| "step": 20 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7052208185195923, |
| "eval_runtime": 0.1669, |
| "eval_samples_per_second": 65.92, |
| "eval_steps_per_second": 5.993, |
| "step": 20 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.642613172531128, |
| "eval_runtime": 0.1706, |
| "eval_samples_per_second": 64.477, |
| "eval_steps_per_second": 5.862, |
| "step": 21 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.5315605401992798, |
| "eval_runtime": 0.1679, |
| "eval_samples_per_second": 65.515, |
| "eval_steps_per_second": 5.956, |
| "step": 22 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.401681661605835, |
| "eval_runtime": 0.2092, |
| "eval_samples_per_second": 52.574, |
| "eval_steps_per_second": 4.779, |
| "step": 23 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.300853967666626, |
| "eval_runtime": 0.1638, |
| "eval_samples_per_second": 67.161, |
| "eval_steps_per_second": 6.106, |
| "step": 24 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.2863625288009644, |
| "eval_runtime": 0.1692, |
| "eval_samples_per_second": 64.994, |
| "eval_steps_per_second": 5.909, |
| "step": 25 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.393377423286438, |
| "eval_runtime": 0.1719, |
| "eval_samples_per_second": 64.001, |
| "eval_steps_per_second": 5.818, |
| "step": 26 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.4435607194900513, |
| "eval_runtime": 0.171, |
| "eval_samples_per_second": 64.321, |
| "eval_steps_per_second": 5.847, |
| "step": 27 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.5751649141311646, |
| "eval_runtime": 0.1734, |
| "eval_samples_per_second": 63.455, |
| "eval_steps_per_second": 5.769, |
| "step": 28 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6211150884628296, |
| "eval_runtime": 0.1701, |
| "eval_samples_per_second": 64.665, |
| "eval_steps_per_second": 5.879, |
| "step": 29 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 1.1806550025939941, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.0769, |
| "step": 30 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.5944229364395142, |
| "eval_runtime": 0.1629, |
| "eval_samples_per_second": 67.545, |
| "eval_steps_per_second": 6.14, |
| "step": 30 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.5282962322235107, |
| "eval_runtime": 0.1679, |
| "eval_samples_per_second": 65.5, |
| "eval_steps_per_second": 5.955, |
| "step": 31 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.4341241121292114, |
| "eval_runtime": 0.1709, |
| "eval_samples_per_second": 64.372, |
| "eval_steps_per_second": 5.852, |
| "step": 32 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.451222538948059, |
| "eval_runtime": 0.1774, |
| "eval_samples_per_second": 62.024, |
| "eval_steps_per_second": 5.639, |
| "step": 33 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.4979816675186157, |
| "eval_runtime": 0.1698, |
| "eval_samples_per_second": 64.797, |
| "eval_steps_per_second": 5.891, |
| "step": 34 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.5802719593048096, |
| "eval_runtime": 0.1718, |
| "eval_samples_per_second": 64.014, |
| "eval_steps_per_second": 5.819, |
| "step": 35 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7675877809524536, |
| "eval_runtime": 0.1737, |
| "eval_samples_per_second": 63.319, |
| "eval_steps_per_second": 5.756, |
| "step": 36 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.8581143617630005, |
| "eval_runtime": 0.1763, |
| "eval_samples_per_second": 62.379, |
| "eval_steps_per_second": 5.671, |
| "step": 37 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.8816009759902954, |
| "eval_runtime": 0.1739, |
| "eval_samples_per_second": 63.25, |
| "eval_steps_per_second": 5.75, |
| "step": 38 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.8316550254821777, |
| "eval_runtime": 0.1703, |
| "eval_samples_per_second": 64.601, |
| "eval_steps_per_second": 5.873, |
| "step": 39 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 0.9434149861335754, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.0505, |
| "step": 40 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7445045709609985, |
| "eval_runtime": 0.1655, |
| "eval_samples_per_second": 66.473, |
| "eval_steps_per_second": 6.043, |
| "step": 40 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6965172290802002, |
| "eval_runtime": 0.1727, |
| "eval_samples_per_second": 63.702, |
| "eval_steps_per_second": 5.791, |
| "step": 41 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.721003770828247, |
| "eval_runtime": 0.1728, |
| "eval_samples_per_second": 63.673, |
| "eval_steps_per_second": 5.788, |
| "step": 42 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6903266906738281, |
| "eval_runtime": 0.1704, |
| "eval_samples_per_second": 64.544, |
| "eval_steps_per_second": 5.868, |
| "step": 43 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.694361925125122, |
| "eval_runtime": 0.1732, |
| "eval_samples_per_second": 63.528, |
| "eval_steps_per_second": 5.775, |
| "step": 44 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6923259496688843, |
| "eval_runtime": 0.1663, |
| "eval_samples_per_second": 66.135, |
| "eval_steps_per_second": 6.012, |
| "step": 45 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7470028400421143, |
| "eval_runtime": 0.1683, |
| "eval_samples_per_second": 65.348, |
| "eval_steps_per_second": 5.941, |
| "step": 46 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7501949071884155, |
| "eval_runtime": 0.1672, |
| "eval_samples_per_second": 65.8, |
| "eval_steps_per_second": 5.982, |
| "step": 47 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7738779783248901, |
| "eval_runtime": 0.174, |
| "eval_samples_per_second": 63.224, |
| "eval_steps_per_second": 5.748, |
| "step": 48 |
| }, |
| { |
| "epoch": 49.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7819253206253052, |
| "eval_runtime": 0.1701, |
| "eval_samples_per_second": 64.658, |
| "eval_steps_per_second": 5.878, |
| "step": 49 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.3251829743385315, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 0.0255, |
| "step": 50 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.8199766874313354, |
| "eval_runtime": 0.1617, |
| "eval_samples_per_second": 68.041, |
| "eval_steps_per_second": 6.186, |
| "step": 50 |
| }, |
| { |
| "epoch": 51.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.8121682405471802, |
| "eval_runtime": 0.1715, |
| "eval_samples_per_second": 64.129, |
| "eval_steps_per_second": 5.83, |
| "step": 51 |
| }, |
| { |
| "epoch": 52.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7939274311065674, |
| "eval_runtime": 0.1789, |
| "eval_samples_per_second": 61.501, |
| "eval_steps_per_second": 5.591, |
| "step": 52 |
| }, |
| { |
| "epoch": 53.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7736003398895264, |
| "eval_runtime": 0.1717, |
| "eval_samples_per_second": 64.058, |
| "eval_steps_per_second": 5.823, |
| "step": 53 |
| }, |
| { |
| "epoch": 54.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7411243915557861, |
| "eval_runtime": 0.17, |
| "eval_samples_per_second": 64.709, |
| "eval_steps_per_second": 5.883, |
| "step": 54 |
| }, |
| { |
| "epoch": 55.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6772898435592651, |
| "eval_runtime": 0.1718, |
| "eval_samples_per_second": 64.013, |
| "eval_steps_per_second": 5.819, |
| "step": 55 |
| }, |
| { |
| "epoch": 56.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6555858850479126, |
| "eval_runtime": 0.172, |
| "eval_samples_per_second": 63.951, |
| "eval_steps_per_second": 5.814, |
| "step": 56 |
| }, |
| { |
| "epoch": 57.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6766685247421265, |
| "eval_runtime": 0.1687, |
| "eval_samples_per_second": 65.223, |
| "eval_steps_per_second": 5.929, |
| "step": 57 |
| }, |
| { |
| "epoch": 58.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6623153686523438, |
| "eval_runtime": 0.1813, |
| "eval_samples_per_second": 60.676, |
| "eval_steps_per_second": 5.516, |
| "step": 58 |
| }, |
| { |
| "epoch": 59.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6553212404251099, |
| "eval_runtime": 0.1694, |
| "eval_samples_per_second": 64.947, |
| "eval_steps_per_second": 5.904, |
| "step": 59 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.4616081416606903, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 0.0227, |
| "step": 60 |
| }, |
| { |
| "epoch": 60.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6682250499725342, |
| "eval_runtime": 0.1787, |
| "eval_samples_per_second": 61.567, |
| "eval_steps_per_second": 5.597, |
| "step": 60 |
| }, |
| { |
| "epoch": 61.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.62090265750885, |
| "eval_runtime": 0.1815, |
| "eval_samples_per_second": 60.623, |
| "eval_steps_per_second": 5.511, |
| "step": 61 |
| }, |
| { |
| "epoch": 62.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6187800168991089, |
| "eval_runtime": 0.1699, |
| "eval_samples_per_second": 64.734, |
| "eval_steps_per_second": 5.885, |
| "step": 62 |
| }, |
| { |
| "epoch": 63.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.6919440031051636, |
| "eval_runtime": 0.1718, |
| "eval_samples_per_second": 64.021, |
| "eval_steps_per_second": 5.82, |
| "step": 63 |
| }, |
| { |
| "epoch": 64.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.7957440614700317, |
| "eval_runtime": 0.171, |
| "eval_samples_per_second": 64.31, |
| "eval_steps_per_second": 5.846, |
| "step": 64 |
| }, |
| { |
| "epoch": 65.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.8749808073043823, |
| "eval_runtime": 0.1689, |
| "eval_samples_per_second": 65.119, |
| "eval_steps_per_second": 5.92, |
| "step": 65 |
| }, |
| { |
| "epoch": 66.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.9156414270401, |
| "eval_runtime": 0.1703, |
| "eval_samples_per_second": 64.59, |
| "eval_steps_per_second": 5.872, |
| "step": 66 |
| }, |
| { |
| "epoch": 67.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.9163463115692139, |
| "eval_runtime": 0.1741, |
| "eval_samples_per_second": 63.174, |
| "eval_steps_per_second": 5.743, |
| "step": 67 |
| }, |
| { |
| "epoch": 68.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.8968899250030518, |
| "eval_runtime": 0.1686, |
| "eval_samples_per_second": 65.251, |
| "eval_steps_per_second": 5.932, |
| "step": 68 |
| }, |
| { |
| "epoch": 69.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.881422519683838, |
| "eval_runtime": 0.171, |
| "eval_samples_per_second": 64.319, |
| "eval_steps_per_second": 5.847, |
| "step": 69 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 0.3331330716609955, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.0185, |
| "step": 70 |
| }, |
| { |
| "epoch": 70.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.8714560270309448, |
| "eval_runtime": 0.1697, |
| "eval_samples_per_second": 64.821, |
| "eval_steps_per_second": 5.893, |
| "step": 70 |
| }, |
| { |
| "epoch": 71.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.8891681432724, |
| "eval_runtime": 0.1694, |
| "eval_samples_per_second": 64.943, |
| "eval_steps_per_second": 5.904, |
| "step": 71 |
| }, |
| { |
| "epoch": 72.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.9382548332214355, |
| "eval_runtime": 0.1738, |
| "eval_samples_per_second": 63.278, |
| "eval_steps_per_second": 5.753, |
| "step": 72 |
| }, |
| { |
| "epoch": 73.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 1.9627383947372437, |
| "eval_runtime": 0.1697, |
| "eval_samples_per_second": 64.822, |
| "eval_steps_per_second": 5.893, |
| "step": 73 |
| }, |
| { |
| "epoch": 74.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.0154221057891846, |
| "eval_runtime": 0.1705, |
| "eval_samples_per_second": 64.527, |
| "eval_steps_per_second": 5.866, |
| "step": 74 |
| }, |
| { |
| "epoch": 75.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.032640218734741, |
| "eval_runtime": 0.1767, |
| "eval_samples_per_second": 62.244, |
| "eval_steps_per_second": 5.659, |
| "step": 75 |
| }, |
| { |
| "epoch": 76.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.0424911975860596, |
| "eval_runtime": 0.1711, |
| "eval_samples_per_second": 64.272, |
| "eval_steps_per_second": 5.843, |
| "step": 76 |
| }, |
| { |
| "epoch": 77.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.0586304664611816, |
| "eval_runtime": 0.177, |
| "eval_samples_per_second": 62.132, |
| "eval_steps_per_second": 5.648, |
| "step": 77 |
| }, |
| { |
| "epoch": 78.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.058166265487671, |
| "eval_runtime": 0.1793, |
| "eval_samples_per_second": 61.357, |
| "eval_steps_per_second": 5.578, |
| "step": 78 |
| }, |
| { |
| "epoch": 79.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.086315870285034, |
| "eval_runtime": 0.1722, |
| "eval_samples_per_second": 63.888, |
| "eval_steps_per_second": 5.808, |
| "step": 79 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 0.5901564955711365, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 0.0246, |
| "step": 80 |
| }, |
| { |
| "epoch": 80.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.123337507247925, |
| "eval_runtime": 0.168, |
| "eval_samples_per_second": 65.467, |
| "eval_steps_per_second": 5.952, |
| "step": 80 |
| }, |
| { |
| "epoch": 81.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1527483463287354, |
| "eval_runtime": 0.1714, |
| "eval_samples_per_second": 64.176, |
| "eval_steps_per_second": 5.834, |
| "step": 81 |
| }, |
| { |
| "epoch": 82.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.176025152206421, |
| "eval_runtime": 0.177, |
| "eval_samples_per_second": 62.154, |
| "eval_steps_per_second": 5.65, |
| "step": 82 |
| }, |
| { |
| "epoch": 83.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.190654993057251, |
| "eval_runtime": 0.1738, |
| "eval_samples_per_second": 63.301, |
| "eval_steps_per_second": 5.755, |
| "step": 83 |
| }, |
| { |
| "epoch": 84.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1858766078948975, |
| "eval_runtime": 0.179, |
| "eval_samples_per_second": 61.465, |
| "eval_steps_per_second": 5.588, |
| "step": 84 |
| }, |
| { |
| "epoch": 85.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.165440320968628, |
| "eval_runtime": 0.1802, |
| "eval_samples_per_second": 61.042, |
| "eval_steps_per_second": 5.549, |
| "step": 85 |
| }, |
| { |
| "epoch": 86.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1478655338287354, |
| "eval_runtime": 0.1681, |
| "eval_samples_per_second": 65.441, |
| "eval_steps_per_second": 5.949, |
| "step": 86 |
| }, |
| { |
| "epoch": 87.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1194212436676025, |
| "eval_runtime": 0.1706, |
| "eval_samples_per_second": 64.486, |
| "eval_steps_per_second": 5.862, |
| "step": 87 |
| }, |
| { |
| "epoch": 88.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1058995723724365, |
| "eval_runtime": 0.173, |
| "eval_samples_per_second": 63.597, |
| "eval_steps_per_second": 5.782, |
| "step": 88 |
| }, |
| { |
| "epoch": 89.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.103247880935669, |
| "eval_runtime": 0.1721, |
| "eval_samples_per_second": 63.931, |
| "eval_steps_per_second": 5.812, |
| "step": 89 |
| }, |
| { |
| "epoch": 90.0, |
| "grad_norm": 1.2021247148513794, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 0.0228, |
| "step": 90 |
| }, |
| { |
| "epoch": 90.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.0998663902282715, |
| "eval_runtime": 0.1654, |
| "eval_samples_per_second": 66.509, |
| "eval_steps_per_second": 6.046, |
| "step": 90 |
| }, |
| { |
| "epoch": 91.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1037116050720215, |
| "eval_runtime": 0.1718, |
| "eval_samples_per_second": 64.033, |
| "eval_steps_per_second": 5.821, |
| "step": 91 |
| }, |
| { |
| "epoch": 92.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1025755405426025, |
| "eval_runtime": 0.175, |
| "eval_samples_per_second": 62.874, |
| "eval_steps_per_second": 5.716, |
| "step": 92 |
| }, |
| { |
| "epoch": 93.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1132094860076904, |
| "eval_runtime": 0.1668, |
| "eval_samples_per_second": 65.929, |
| "eval_steps_per_second": 5.994, |
| "step": 93 |
| }, |
| { |
| "epoch": 94.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1301839351654053, |
| "eval_runtime": 0.171, |
| "eval_samples_per_second": 64.331, |
| "eval_steps_per_second": 5.848, |
| "step": 94 |
| }, |
| { |
| "epoch": 95.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1452560424804688, |
| "eval_runtime": 0.1758, |
| "eval_samples_per_second": 62.58, |
| "eval_steps_per_second": 5.689, |
| "step": 95 |
| }, |
| { |
| "epoch": 96.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.163395643234253, |
| "eval_runtime": 0.1756, |
| "eval_samples_per_second": 62.64, |
| "eval_steps_per_second": 5.695, |
| "step": 96 |
| }, |
| { |
| "epoch": 97.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1761701107025146, |
| "eval_runtime": 0.1744, |
| "eval_samples_per_second": 63.075, |
| "eval_steps_per_second": 5.734, |
| "step": 97 |
| }, |
| { |
| "epoch": 98.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1859476566314697, |
| "eval_runtime": 0.1746, |
| "eval_samples_per_second": 63.0, |
| "eval_steps_per_second": 5.727, |
| "step": 98 |
| }, |
| { |
| "epoch": 99.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.1915647983551025, |
| "eval_runtime": 0.1687, |
| "eval_samples_per_second": 65.197, |
| "eval_steps_per_second": 5.927, |
| "step": 99 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 1.034210205078125, |
| "learning_rate": 0.0, |
| "loss": 0.0142, |
| "step": 100 |
| }, |
| { |
| "epoch": 100.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 2.193308115005493, |
| "eval_runtime": 0.1657, |
| "eval_samples_per_second": 66.395, |
| "eval_steps_per_second": 6.036, |
| "step": 100 |
| }, |
| { |
| "epoch": 100.0, |
| "step": 100, |
| "total_flos": 4.572150213593088e+17, |
| "train_loss": 0.07446861431002617, |
| "train_runtime": 420.0976, |
| "train_samples_per_second": 14.044, |
| "train_steps_per_second": 0.238 |
| }, |
| { |
| "epoch": 100.0, |
| "eval_accuracy": 0.7272727272727273, |
| "eval_loss": 0.9744274020195007, |
| "eval_runtime": 0.2198, |
| "eval_samples_per_second": 50.054, |
| "eval_steps_per_second": 4.55, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 100, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.572150213593088e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|