Instructions to use tuanio/training_sentiment_analysis with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use tuanio/training_sentiment_analysis with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 0.8174300254452926, | |
| "best_model_checkpoint": "training_sentiment_analysis/checkpoint-8600", | |
| "epoch": 20.0, | |
| "eval_steps": 200, | |
| "global_step": 18680, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.3381836414337158, | |
| "learning_rate": 3.2119914346895075e-05, | |
| "loss": 0.9299, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.638676844783715, | |
| "eval_loss": 0.827367901802063, | |
| "eval_runtime": 3.055, | |
| "eval_samples_per_second": 514.569, | |
| "eval_steps_per_second": 16.367, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.0220164060592651, | |
| "learning_rate": 6.423982869379015e-05, | |
| "loss": 0.7793, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.7188295165394402, | |
| "eval_loss": 0.6643335223197937, | |
| "eval_runtime": 3.0013, | |
| "eval_samples_per_second": 523.77, | |
| "eval_steps_per_second": 16.659, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.7421491146087646, | |
| "learning_rate": 9.635974304068522e-05, | |
| "loss": 0.6574, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.7659033078880407, | |
| "eval_loss": 0.5868020057678223, | |
| "eval_runtime": 2.9749, | |
| "eval_samples_per_second": 528.422, | |
| "eval_steps_per_second": 16.807, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.8133894205093384, | |
| "learning_rate": 0.0001284796573875803, | |
| "loss": 0.6132, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.772264631043257, | |
| "eval_loss": 0.5582301616668701, | |
| "eval_runtime": 2.9908, | |
| "eval_samples_per_second": 525.617, | |
| "eval_steps_per_second": 16.718, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 1.3071078062057495, | |
| "learning_rate": 0.00016059957173447537, | |
| "loss": 0.5791, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_accuracy": 0.7830788804071247, | |
| "eval_loss": 0.5515692234039307, | |
| "eval_runtime": 2.9665, | |
| "eval_samples_per_second": 529.915, | |
| "eval_steps_per_second": 16.855, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 1.0445743799209595, | |
| "learning_rate": 0.00019271948608137044, | |
| "loss": 0.554, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_accuracy": 0.7964376590330788, | |
| "eval_loss": 0.5187413692474365, | |
| "eval_runtime": 2.9846, | |
| "eval_samples_per_second": 526.705, | |
| "eval_steps_per_second": 16.753, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.0763362646102905, | |
| "learning_rate": 0.0002248394004282655, | |
| "loss": 0.5258, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_accuracy": 0.8034351145038168, | |
| "eval_loss": 0.5125576257705688, | |
| "eval_runtime": 2.9831, | |
| "eval_samples_per_second": 526.967, | |
| "eval_steps_per_second": 16.761, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 0.8554897308349609, | |
| "learning_rate": 0.0002569593147751606, | |
| "loss": 0.5373, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_accuracy": 0.8002544529262087, | |
| "eval_loss": 0.51680988073349, | |
| "eval_runtime": 2.9726, | |
| "eval_samples_per_second": 528.823, | |
| "eval_steps_per_second": 16.82, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 1.538806438446045, | |
| "learning_rate": 0.0002890792291220556, | |
| "loss": 0.5266, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_accuracy": 0.8027989821882952, | |
| "eval_loss": 0.5283887982368469, | |
| "eval_runtime": 2.9766, | |
| "eval_samples_per_second": 528.12, | |
| "eval_steps_per_second": 16.798, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 1.1234441995620728, | |
| "learning_rate": 0.000297644539614561, | |
| "loss": 0.5076, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_accuracy": 0.7977099236641222, | |
| "eval_loss": 0.5178301334381104, | |
| "eval_runtime": 2.9829, | |
| "eval_samples_per_second": 526.996, | |
| "eval_steps_per_second": 16.762, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.6212774515151978, | |
| "learning_rate": 0.0002940756602426838, | |
| "loss": 0.5094, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.8027989821882952, | |
| "eval_loss": 0.5134572982788086, | |
| "eval_runtime": 2.981, | |
| "eval_samples_per_second": 527.334, | |
| "eval_steps_per_second": 16.773, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 1.4514294862747192, | |
| "learning_rate": 0.00029050678087080655, | |
| "loss": 0.5032, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_accuracy": 0.8104325699745547, | |
| "eval_loss": 0.5022692084312439, | |
| "eval_runtime": 2.963, | |
| "eval_samples_per_second": 530.535, | |
| "eval_steps_per_second": 16.875, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.826932668685913, | |
| "learning_rate": 0.0002869379014989293, | |
| "loss": 0.5034, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_accuracy": 0.80470737913486, | |
| "eval_loss": 0.5088226199150085, | |
| "eval_runtime": 2.9831, | |
| "eval_samples_per_second": 526.969, | |
| "eval_steps_per_second": 16.761, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.4404336214065552, | |
| "learning_rate": 0.0002833690221270521, | |
| "loss": 0.4923, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.799618320610687, | |
| "eval_loss": 0.5219257473945618, | |
| "eval_runtime": 2.9722, | |
| "eval_samples_per_second": 528.9, | |
| "eval_steps_per_second": 16.823, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 0.8795878291130066, | |
| "learning_rate": 0.00027980014275517484, | |
| "loss": 0.4934, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "eval_accuracy": 0.8129770992366412, | |
| "eval_loss": 0.4905295968055725, | |
| "eval_runtime": 2.9734, | |
| "eval_samples_per_second": 528.696, | |
| "eval_steps_per_second": 16.816, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 1.6092537641525269, | |
| "learning_rate": 0.0002762312633832976, | |
| "loss": 0.4798, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "eval_accuracy": 0.8097964376590331, | |
| "eval_loss": 0.4907812178134918, | |
| "eval_runtime": 2.9897, | |
| "eval_samples_per_second": 525.803, | |
| "eval_steps_per_second": 16.724, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 1.6475110054016113, | |
| "learning_rate": 0.0002726623840114204, | |
| "loss": 0.4831, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "eval_accuracy": 0.8072519083969466, | |
| "eval_loss": 0.48748457431793213, | |
| "eval_runtime": 2.9694, | |
| "eval_samples_per_second": 529.396, | |
| "eval_steps_per_second": 16.838, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 1.1669467687606812, | |
| "learning_rate": 0.00026909350463954313, | |
| "loss": 0.4707, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "eval_accuracy": 0.8072519083969466, | |
| "eval_loss": 0.4985896944999695, | |
| "eval_runtime": 2.991, | |
| "eval_samples_per_second": 525.579, | |
| "eval_steps_per_second": 16.717, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "grad_norm": 0.9440352320671082, | |
| "learning_rate": 0.00026552462526766593, | |
| "loss": 0.4674, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "eval_accuracy": 0.8104325699745547, | |
| "eval_loss": 0.5195557475090027, | |
| "eval_runtime": 2.9789, | |
| "eval_samples_per_second": 527.711, | |
| "eval_steps_per_second": 16.785, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 1.8151628971099854, | |
| "learning_rate": 0.0002619557458957887, | |
| "loss": 0.4535, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "eval_accuracy": 0.8097964376590331, | |
| "eval_loss": 0.4896373152732849, | |
| "eval_runtime": 2.9869, | |
| "eval_samples_per_second": 526.295, | |
| "eval_steps_per_second": 16.74, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 3.0790090560913086, | |
| "learning_rate": 0.0002583868665239115, | |
| "loss": 0.464, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "eval_accuracy": 0.8078880407124682, | |
| "eval_loss": 0.517495334148407, | |
| "eval_runtime": 2.9986, | |
| "eval_samples_per_second": 524.246, | |
| "eval_steps_per_second": 16.674, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 1.1520639657974243, | |
| "learning_rate": 0.0002548179871520343, | |
| "loss": 0.4715, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "eval_accuracy": 0.8027989821882952, | |
| "eval_loss": 0.5001667737960815, | |
| "eval_runtime": 2.9723, | |
| "eval_samples_per_second": 528.885, | |
| "eval_steps_per_second": 16.822, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "grad_norm": 0.8184943795204163, | |
| "learning_rate": 0.000251249107780157, | |
| "loss": 0.468, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "eval_accuracy": 0.8110687022900763, | |
| "eval_loss": 0.4883332848548889, | |
| "eval_runtime": 2.9769, | |
| "eval_samples_per_second": 528.068, | |
| "eval_steps_per_second": 16.796, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "grad_norm": 1.155013084411621, | |
| "learning_rate": 0.00024768022840827977, | |
| "loss": 0.4645, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "eval_accuracy": 0.8040712468193384, | |
| "eval_loss": 0.5186554789543152, | |
| "eval_runtime": 2.9698, | |
| "eval_samples_per_second": 529.333, | |
| "eval_steps_per_second": 16.836, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "grad_norm": 1.6959339380264282, | |
| "learning_rate": 0.00024411134903640257, | |
| "loss": 0.445, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "eval_accuracy": 0.806615776081425, | |
| "eval_loss": 0.4928103983402252, | |
| "eval_runtime": 2.9782, | |
| "eval_samples_per_second": 527.83, | |
| "eval_steps_per_second": 16.789, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "grad_norm": 1.0461735725402832, | |
| "learning_rate": 0.00024054246966452532, | |
| "loss": 0.4558, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "eval_accuracy": 0.8078880407124682, | |
| "eval_loss": 0.48704999685287476, | |
| "eval_runtime": 2.9838, | |
| "eval_samples_per_second": 526.839, | |
| "eval_steps_per_second": 16.757, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "grad_norm": 0.9599233269691467, | |
| "learning_rate": 0.00023697359029264806, | |
| "loss": 0.4405, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "eval_accuracy": 0.8104325699745547, | |
| "eval_loss": 0.4985482692718506, | |
| "eval_runtime": 3.0065, | |
| "eval_samples_per_second": 522.862, | |
| "eval_steps_per_second": 16.63, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 1.4131615161895752, | |
| "learning_rate": 0.00023340471092077086, | |
| "loss": 0.4648, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8059796437659033, | |
| "eval_loss": 0.48415422439575195, | |
| "eval_runtime": 2.9786, | |
| "eval_samples_per_second": 527.759, | |
| "eval_steps_per_second": 16.786, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "grad_norm": 1.189572811126709, | |
| "learning_rate": 0.0002298358315488936, | |
| "loss": 0.435, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "eval_accuracy": 0.811704834605598, | |
| "eval_loss": 0.4911487102508545, | |
| "eval_runtime": 2.9997, | |
| "eval_samples_per_second": 524.044, | |
| "eval_steps_per_second": 16.668, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "grad_norm": 1.5198345184326172, | |
| "learning_rate": 0.00022626695217701638, | |
| "loss": 0.437, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "eval_accuracy": 0.8085241730279898, | |
| "eval_loss": 0.48542749881744385, | |
| "eval_runtime": 3.0042, | |
| "eval_samples_per_second": 523.274, | |
| "eval_steps_per_second": 16.644, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 1.1990240812301636, | |
| "learning_rate": 0.00022269807280513918, | |
| "loss": 0.4588, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "eval_accuracy": 0.8085241730279898, | |
| "eval_loss": 0.48791924118995667, | |
| "eval_runtime": 3.0014, | |
| "eval_samples_per_second": 523.758, | |
| "eval_steps_per_second": 16.659, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "grad_norm": 1.346658706665039, | |
| "learning_rate": 0.00021912919343326193, | |
| "loss": 0.4342, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "eval_accuracy": 0.8104325699745547, | |
| "eval_loss": 0.49220582842826843, | |
| "eval_runtime": 3.0046, | |
| "eval_samples_per_second": 523.193, | |
| "eval_steps_per_second": 16.641, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "grad_norm": 1.8644700050354004, | |
| "learning_rate": 0.00021556031406138473, | |
| "loss": 0.4347, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "eval_accuracy": 0.8142493638676844, | |
| "eval_loss": 0.49111655354499817, | |
| "eval_runtime": 2.985, | |
| "eval_samples_per_second": 526.634, | |
| "eval_steps_per_second": 16.75, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 1.9364045858383179, | |
| "learning_rate": 0.00021199143468950748, | |
| "loss": 0.4326, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "eval_accuracy": 0.8078880407124682, | |
| "eval_loss": 0.491384893655777, | |
| "eval_runtime": 2.9723, | |
| "eval_samples_per_second": 528.882, | |
| "eval_steps_per_second": 16.822, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "grad_norm": 0.9911957383155823, | |
| "learning_rate": 0.00020842255531763022, | |
| "loss": 0.4267, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "eval_accuracy": 0.8104325699745547, | |
| "eval_loss": 0.4917159080505371, | |
| "eval_runtime": 2.9808, | |
| "eval_samples_per_second": 527.373, | |
| "eval_steps_per_second": 16.774, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "grad_norm": 1.2186638116836548, | |
| "learning_rate": 0.00020485367594575302, | |
| "loss": 0.4241, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "eval_accuracy": 0.8136132315521628, | |
| "eval_loss": 0.4887010455131531, | |
| "eval_runtime": 2.9872, | |
| "eval_samples_per_second": 526.253, | |
| "eval_steps_per_second": 16.738, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "grad_norm": 1.1467108726501465, | |
| "learning_rate": 0.0002012847965738758, | |
| "loss": 0.4376, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "eval_accuracy": 0.8078880407124682, | |
| "eval_loss": 0.5122085213661194, | |
| "eval_runtime": 2.9829, | |
| "eval_samples_per_second": 527.007, | |
| "eval_steps_per_second": 16.762, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "grad_norm": 0.8427834510803223, | |
| "learning_rate": 0.00019771591720199854, | |
| "loss": 0.4323, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "eval_accuracy": 0.8097964376590331, | |
| "eval_loss": 0.49093857407569885, | |
| "eval_runtime": 2.9738, | |
| "eval_samples_per_second": 528.625, | |
| "eval_steps_per_second": 16.814, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "grad_norm": 1.2060902118682861, | |
| "learning_rate": 0.00019414703783012134, | |
| "loss": 0.4264, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "eval_accuracy": 0.8142493638676844, | |
| "eval_loss": 0.48821595311164856, | |
| "eval_runtime": 2.9836, | |
| "eval_samples_per_second": 526.88, | |
| "eval_steps_per_second": 16.758, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "grad_norm": 1.7033394575119019, | |
| "learning_rate": 0.0001905781584582441, | |
| "loss": 0.4175, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "eval_accuracy": 0.8053435114503816, | |
| "eval_loss": 0.5090692043304443, | |
| "eval_runtime": 2.9978, | |
| "eval_samples_per_second": 524.393, | |
| "eval_steps_per_second": 16.679, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "grad_norm": 1.3033976554870605, | |
| "learning_rate": 0.0001870092790863669, | |
| "loss": 0.4228, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "eval_accuracy": 0.8097964376590331, | |
| "eval_loss": 0.5060204863548279, | |
| "eval_runtime": 2.9975, | |
| "eval_samples_per_second": 524.436, | |
| "eval_steps_per_second": 16.681, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "grad_norm": 1.2635438442230225, | |
| "learning_rate": 0.00018344039971448964, | |
| "loss": 0.4189, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.8091603053435115, | |
| "eval_loss": 0.4940575361251831, | |
| "eval_runtime": 2.9634, | |
| "eval_samples_per_second": 530.468, | |
| "eval_steps_per_second": 16.872, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "grad_norm": 1.496982455253601, | |
| "learning_rate": 0.0001798715203426124, | |
| "loss": 0.4161, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "eval_accuracy": 0.8174300254452926, | |
| "eval_loss": 0.5010442137718201, | |
| "eval_runtime": 2.973, | |
| "eval_samples_per_second": 528.758, | |
| "eval_steps_per_second": 16.818, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "grad_norm": 1.355362892150879, | |
| "learning_rate": 0.00017630264097073518, | |
| "loss": 0.4078, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "eval_accuracy": 0.8078880407124682, | |
| "eval_loss": 0.4949406683444977, | |
| "eval_runtime": 2.9901, | |
| "eval_samples_per_second": 525.736, | |
| "eval_steps_per_second": 16.722, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "grad_norm": 1.180076241493225, | |
| "learning_rate": 0.00017273376159885795, | |
| "loss": 0.4201, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "eval_accuracy": 0.8072519083969466, | |
| "eval_loss": 0.5017107129096985, | |
| "eval_runtime": 2.952, | |
| "eval_samples_per_second": 532.525, | |
| "eval_steps_per_second": 16.938, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "grad_norm": 1.1020286083221436, | |
| "learning_rate": 0.0001691648822269807, | |
| "loss": 0.4141, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "eval_accuracy": 0.8091603053435115, | |
| "eval_loss": 0.4984731078147888, | |
| "eval_runtime": 2.9633, | |
| "eval_samples_per_second": 530.497, | |
| "eval_steps_per_second": 16.873, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "grad_norm": 1.2666047811508179, | |
| "learning_rate": 0.0001655960028551035, | |
| "loss": 0.4132, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "eval_accuracy": 0.8053435114503816, | |
| "eval_loss": 0.5031649470329285, | |
| "eval_runtime": 2.9822, | |
| "eval_samples_per_second": 527.133, | |
| "eval_steps_per_second": 16.766, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "grad_norm": 0.6767197251319885, | |
| "learning_rate": 0.00016202712348322625, | |
| "loss": 0.4043, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "eval_accuracy": 0.8129770992366412, | |
| "eval_loss": 0.5038406848907471, | |
| "eval_runtime": 2.9816, | |
| "eval_samples_per_second": 527.24, | |
| "eval_steps_per_second": 16.77, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "grad_norm": 1.147275447845459, | |
| "learning_rate": 0.00015845824411134902, | |
| "loss": 0.4187, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "eval_accuracy": 0.8104325699745547, | |
| "eval_loss": 0.4981047213077545, | |
| "eval_runtime": 2.9858, | |
| "eval_samples_per_second": 526.485, | |
| "eval_steps_per_second": 16.746, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "grad_norm": 1.6172677278518677, | |
| "learning_rate": 0.0001548893647394718, | |
| "loss": 0.3827, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "eval_accuracy": 0.8072519083969466, | |
| "eval_loss": 0.5126467943191528, | |
| "eval_runtime": 2.9825, | |
| "eval_samples_per_second": 527.072, | |
| "eval_steps_per_second": 16.764, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "grad_norm": 1.8639923334121704, | |
| "learning_rate": 0.00015132048536759457, | |
| "loss": 0.4074, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "eval_accuracy": 0.8072519083969466, | |
| "eval_loss": 0.5088323950767517, | |
| "eval_runtime": 2.9816, | |
| "eval_samples_per_second": 527.237, | |
| "eval_steps_per_second": 16.77, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 11.13, | |
| "grad_norm": 1.2519667148590088, | |
| "learning_rate": 0.00014775160599571734, | |
| "loss": 0.4013, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 11.13, | |
| "eval_accuracy": 0.8072519083969466, | |
| "eval_loss": 0.5061373114585876, | |
| "eval_runtime": 2.9811, | |
| "eval_samples_per_second": 527.316, | |
| "eval_steps_per_second": 16.772, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "grad_norm": 1.1711052656173706, | |
| "learning_rate": 0.0001441827266238401, | |
| "loss": 0.3888, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "eval_accuracy": 0.8085241730279898, | |
| "eval_loss": 0.5013065338134766, | |
| "eval_runtime": 2.9847, | |
| "eval_samples_per_second": 526.681, | |
| "eval_steps_per_second": 16.752, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "grad_norm": 1.8078001737594604, | |
| "learning_rate": 0.00014061384725196286, | |
| "loss": 0.3855, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "eval_accuracy": 0.8059796437659033, | |
| "eval_loss": 0.4992610514163971, | |
| "eval_runtime": 2.9927, | |
| "eval_samples_per_second": 525.27, | |
| "eval_steps_per_second": 16.707, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "grad_norm": 1.1071592569351196, | |
| "learning_rate": 0.00013704496788008563, | |
| "loss": 0.3924, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "eval_accuracy": 0.8085241730279898, | |
| "eval_loss": 0.5075262188911438, | |
| "eval_runtime": 3.0066, | |
| "eval_samples_per_second": 522.844, | |
| "eval_steps_per_second": 16.63, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "grad_norm": 1.3704427480697632, | |
| "learning_rate": 0.0001334760885082084, | |
| "loss": 0.4046, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "eval_accuracy": 0.8027989821882952, | |
| "eval_loss": 0.49990707635879517, | |
| "eval_runtime": 3.0049, | |
| "eval_samples_per_second": 523.149, | |
| "eval_steps_per_second": 16.64, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 12.21, | |
| "grad_norm": 1.40170419216156, | |
| "learning_rate": 0.00012990720913633118, | |
| "loss": 0.3957, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 12.21, | |
| "eval_accuracy": 0.8034351145038168, | |
| "eval_loss": 0.5089264512062073, | |
| "eval_runtime": 2.9942, | |
| "eval_samples_per_second": 525.011, | |
| "eval_steps_per_second": 16.699, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "grad_norm": 1.1885521411895752, | |
| "learning_rate": 0.00012633832976445395, | |
| "loss": 0.381, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "eval_accuracy": 0.8072519083969466, | |
| "eval_loss": 0.5207549929618835, | |
| "eval_runtime": 2.9746, | |
| "eval_samples_per_second": 528.479, | |
| "eval_steps_per_second": 16.809, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 12.63, | |
| "grad_norm": 0.8873888254165649, | |
| "learning_rate": 0.00012276945039257673, | |
| "loss": 0.3906, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 12.63, | |
| "eval_accuracy": 0.806615776081425, | |
| "eval_loss": 0.513671875, | |
| "eval_runtime": 2.961, | |
| "eval_samples_per_second": 530.901, | |
| "eval_steps_per_second": 16.886, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 12.85, | |
| "grad_norm": 1.6491570472717285, | |
| "learning_rate": 0.0001192005710206995, | |
| "loss": 0.3734, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 12.85, | |
| "eval_accuracy": 0.8040712468193384, | |
| "eval_loss": 0.5183374881744385, | |
| "eval_runtime": 2.9533, | |
| "eval_samples_per_second": 532.292, | |
| "eval_steps_per_second": 16.93, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 13.06, | |
| "grad_norm": 2.042646884918213, | |
| "learning_rate": 0.00011563169164882227, | |
| "loss": 0.3928, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 13.06, | |
| "eval_accuracy": 0.806615776081425, | |
| "eval_loss": 0.5069447159767151, | |
| "eval_runtime": 2.959, | |
| "eval_samples_per_second": 531.259, | |
| "eval_steps_per_second": 16.898, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "grad_norm": 0.817425549030304, | |
| "learning_rate": 0.00011206281227694502, | |
| "loss": 0.3774, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "eval_accuracy": 0.8008905852417303, | |
| "eval_loss": 0.5086419582366943, | |
| "eval_runtime": 2.9547, | |
| "eval_samples_per_second": 532.04, | |
| "eval_steps_per_second": 16.922, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 13.49, | |
| "grad_norm": 1.0988578796386719, | |
| "learning_rate": 0.0001084939329050678, | |
| "loss": 0.3892, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 13.49, | |
| "eval_accuracy": 0.8059796437659033, | |
| "eval_loss": 0.4966925382614136, | |
| "eval_runtime": 2.9538, | |
| "eval_samples_per_second": 532.194, | |
| "eval_steps_per_second": 16.927, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "grad_norm": 1.312321662902832, | |
| "learning_rate": 0.00010492505353319058, | |
| "loss": 0.372, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "eval_accuracy": 0.8040712468193384, | |
| "eval_loss": 0.5042534470558167, | |
| "eval_runtime": 2.9651, | |
| "eval_samples_per_second": 530.16, | |
| "eval_steps_per_second": 16.863, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 13.92, | |
| "grad_norm": 1.642741322517395, | |
| "learning_rate": 0.00010135617416131332, | |
| "loss": 0.388, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 13.92, | |
| "eval_accuracy": 0.8072519083969466, | |
| "eval_loss": 0.5095480680465698, | |
| "eval_runtime": 2.9526, | |
| "eval_samples_per_second": 532.404, | |
| "eval_steps_per_second": 16.934, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 14.13, | |
| "grad_norm": 1.10377836227417, | |
| "learning_rate": 9.778729478943611e-05, | |
| "loss": 0.3754, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 14.13, | |
| "eval_accuracy": 0.8021628498727735, | |
| "eval_loss": 0.5103972554206848, | |
| "eval_runtime": 2.9663, | |
| "eval_samples_per_second": 529.954, | |
| "eval_steps_per_second": 16.856, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "grad_norm": 1.1614229679107666, | |
| "learning_rate": 9.421841541755888e-05, | |
| "loss": 0.3639, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "eval_accuracy": 0.7983460559796438, | |
| "eval_loss": 0.5263165235519409, | |
| "eval_runtime": 2.9391, | |
| "eval_samples_per_second": 534.858, | |
| "eval_steps_per_second": 17.012, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "grad_norm": 1.6049692630767822, | |
| "learning_rate": 9.064953604568166e-05, | |
| "loss": 0.3795, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "eval_accuracy": 0.8015267175572519, | |
| "eval_loss": 0.5145931839942932, | |
| "eval_runtime": 2.9465, | |
| "eval_samples_per_second": 533.506, | |
| "eval_steps_per_second": 16.969, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "grad_norm": 2.813002347946167, | |
| "learning_rate": 8.708065667380442e-05, | |
| "loss": 0.3792, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "eval_accuracy": 0.8040712468193384, | |
| "eval_loss": 0.5066380500793457, | |
| "eval_runtime": 2.9409, | |
| "eval_samples_per_second": 534.523, | |
| "eval_steps_per_second": 17.001, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "grad_norm": 1.2670201063156128, | |
| "learning_rate": 8.351177730192719e-05, | |
| "loss": 0.3589, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "eval_accuracy": 0.8078880407124682, | |
| "eval_loss": 0.5135853886604309, | |
| "eval_runtime": 2.962, | |
| "eval_samples_per_second": 530.717, | |
| "eval_steps_per_second": 16.88, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "grad_norm": 1.9681557416915894, | |
| "learning_rate": 7.994289793004996e-05, | |
| "loss": 0.3624, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "eval_accuracy": 0.8021628498727735, | |
| "eval_loss": 0.5237164497375488, | |
| "eval_runtime": 2.9535, | |
| "eval_samples_per_second": 532.245, | |
| "eval_steps_per_second": 16.929, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "grad_norm": 1.8548041582107544, | |
| "learning_rate": 7.637401855817274e-05, | |
| "loss": 0.3659, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "eval_accuracy": 0.8059796437659033, | |
| "eval_loss": 0.5165674090385437, | |
| "eval_runtime": 2.9482, | |
| "eval_samples_per_second": 533.2, | |
| "eval_steps_per_second": 16.959, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 15.63, | |
| "grad_norm": 1.3727173805236816, | |
| "learning_rate": 7.28051391862955e-05, | |
| "loss": 0.3657, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 15.63, | |
| "eval_accuracy": 0.8002544529262087, | |
| "eval_loss": 0.5177738070487976, | |
| "eval_runtime": 2.9451, | |
| "eval_samples_per_second": 533.764, | |
| "eval_steps_per_second": 16.977, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 15.85, | |
| "grad_norm": 2.10198974609375, | |
| "learning_rate": 6.923625981441827e-05, | |
| "loss": 0.359, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 15.85, | |
| "eval_accuracy": 0.7983460559796438, | |
| "eval_loss": 0.5152426362037659, | |
| "eval_runtime": 2.9473, | |
| "eval_samples_per_second": 533.372, | |
| "eval_steps_per_second": 16.965, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 16.06, | |
| "grad_norm": 1.0453667640686035, | |
| "learning_rate": 6.566738044254104e-05, | |
| "loss": 0.3677, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 16.06, | |
| "eval_accuracy": 0.8034351145038168, | |
| "eval_loss": 0.5211815237998962, | |
| "eval_runtime": 2.9478, | |
| "eval_samples_per_second": 533.274, | |
| "eval_steps_per_second": 16.962, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "grad_norm": 1.0645538568496704, | |
| "learning_rate": 6.20985010706638e-05, | |
| "loss": 0.3521, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "eval_accuracy": 0.8002544529262087, | |
| "eval_loss": 0.5323696732521057, | |
| "eval_runtime": 2.9594, | |
| "eval_samples_per_second": 531.197, | |
| "eval_steps_per_second": 16.896, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 16.49, | |
| "grad_norm": 3.849015951156616, | |
| "learning_rate": 5.852962169878657e-05, | |
| "loss": 0.3589, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 16.49, | |
| "eval_accuracy": 0.8040712468193384, | |
| "eval_loss": 0.5237988829612732, | |
| "eval_runtime": 2.9364, | |
| "eval_samples_per_second": 535.357, | |
| "eval_steps_per_second": 17.028, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 16.7, | |
| "grad_norm": 1.3231987953186035, | |
| "learning_rate": 5.496074232690935e-05, | |
| "loss": 0.3695, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 16.7, | |
| "eval_accuracy": 0.7977099236641222, | |
| "eval_loss": 0.511340320110321, | |
| "eval_runtime": 2.969, | |
| "eval_samples_per_second": 529.468, | |
| "eval_steps_per_second": 16.841, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "grad_norm": 1.7709985971450806, | |
| "learning_rate": 5.139186295503211e-05, | |
| "loss": 0.3606, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "eval_accuracy": 0.7983460559796438, | |
| "eval_loss": 0.5136662721633911, | |
| "eval_runtime": 2.9594, | |
| "eval_samples_per_second": 531.193, | |
| "eval_steps_per_second": 16.895, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 17.13, | |
| "grad_norm": 1.5108495950698853, | |
| "learning_rate": 4.782298358315489e-05, | |
| "loss": 0.3581, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 17.13, | |
| "eval_accuracy": 0.799618320610687, | |
| "eval_loss": 0.5130853056907654, | |
| "eval_runtime": 2.9611, | |
| "eval_samples_per_second": 530.882, | |
| "eval_steps_per_second": 16.886, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 17.34, | |
| "grad_norm": 1.3634617328643799, | |
| "learning_rate": 4.4254104211277655e-05, | |
| "loss": 0.3488, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 17.34, | |
| "eval_accuracy": 0.7989821882951654, | |
| "eval_loss": 0.5270070433616638, | |
| "eval_runtime": 2.9953, | |
| "eval_samples_per_second": 524.824, | |
| "eval_steps_per_second": 16.693, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 17.56, | |
| "grad_norm": 1.0239213705062866, | |
| "learning_rate": 4.068522483940043e-05, | |
| "loss": 0.3499, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 17.56, | |
| "eval_accuracy": 0.7964376590330788, | |
| "eval_loss": 0.523576021194458, | |
| "eval_runtime": 2.9356, | |
| "eval_samples_per_second": 535.502, | |
| "eval_steps_per_second": 17.033, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 17.77, | |
| "grad_norm": 1.108484148979187, | |
| "learning_rate": 3.7116345467523195e-05, | |
| "loss": 0.3603, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 17.77, | |
| "eval_accuracy": 0.8002544529262087, | |
| "eval_loss": 0.5186541080474854, | |
| "eval_runtime": 2.9666, | |
| "eval_samples_per_second": 529.891, | |
| "eval_steps_per_second": 16.854, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 17.99, | |
| "grad_norm": 2.816092014312744, | |
| "learning_rate": 3.354746609564596e-05, | |
| "loss": 0.3578, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 17.99, | |
| "eval_accuracy": 0.8021628498727735, | |
| "eval_loss": 0.5223926901817322, | |
| "eval_runtime": 2.9355, | |
| "eval_samples_per_second": 535.521, | |
| "eval_steps_per_second": 17.033, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 18.2, | |
| "grad_norm": 1.5831489562988281, | |
| "learning_rate": 2.997858672376873e-05, | |
| "loss": 0.3449, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 18.2, | |
| "eval_accuracy": 0.7989821882951654, | |
| "eval_loss": 0.5227622389793396, | |
| "eval_runtime": 2.9602, | |
| "eval_samples_per_second": 531.048, | |
| "eval_steps_per_second": 16.891, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 18.42, | |
| "grad_norm": 1.0060327053070068, | |
| "learning_rate": 2.64097073518915e-05, | |
| "loss": 0.3418, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 18.42, | |
| "eval_accuracy": 0.8008905852417303, | |
| "eval_loss": 0.5287216901779175, | |
| "eval_runtime": 2.9537, | |
| "eval_samples_per_second": 532.21, | |
| "eval_steps_per_second": 16.928, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 18.63, | |
| "grad_norm": 1.8092093467712402, | |
| "learning_rate": 2.2840827980014274e-05, | |
| "loss": 0.3334, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 18.63, | |
| "eval_accuracy": 0.799618320610687, | |
| "eval_loss": 0.5322315096855164, | |
| "eval_runtime": 2.9745, | |
| "eval_samples_per_second": 528.484, | |
| "eval_steps_per_second": 16.809, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 18.84, | |
| "grad_norm": 1.4800430536270142, | |
| "learning_rate": 1.9271948608137044e-05, | |
| "loss": 0.3567, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 18.84, | |
| "eval_accuracy": 0.7983460559796438, | |
| "eval_loss": 0.5293812155723572, | |
| "eval_runtime": 2.9485, | |
| "eval_samples_per_second": 533.161, | |
| "eval_steps_per_second": 16.958, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 19.06, | |
| "grad_norm": 1.6271811723709106, | |
| "learning_rate": 1.5703069236259814e-05, | |
| "loss": 0.3541, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 19.06, | |
| "eval_accuracy": 0.8002544529262087, | |
| "eval_loss": 0.5250320434570312, | |
| "eval_runtime": 2.9479, | |
| "eval_samples_per_second": 533.268, | |
| "eval_steps_per_second": 16.961, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 19.27, | |
| "grad_norm": 0.7758527994155884, | |
| "learning_rate": 1.2134189864382584e-05, | |
| "loss": 0.365, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 19.27, | |
| "eval_accuracy": 0.7983460559796438, | |
| "eval_loss": 0.5246437788009644, | |
| "eval_runtime": 2.9363, | |
| "eval_samples_per_second": 535.369, | |
| "eval_steps_per_second": 17.028, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 19.49, | |
| "grad_norm": 0.9722337126731873, | |
| "learning_rate": 8.565310492505352e-06, | |
| "loss": 0.337, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 19.49, | |
| "eval_accuracy": 0.7977099236641222, | |
| "eval_loss": 0.527810275554657, | |
| "eval_runtime": 2.9383, | |
| "eval_samples_per_second": 535.006, | |
| "eval_steps_per_second": 17.017, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 19.7, | |
| "grad_norm": 1.5007203817367554, | |
| "learning_rate": 4.996431120628123e-06, | |
| "loss": 0.3301, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 19.7, | |
| "eval_accuracy": 0.7989821882951654, | |
| "eval_loss": 0.5283259153366089, | |
| "eval_runtime": 2.9603, | |
| "eval_samples_per_second": 531.035, | |
| "eval_steps_per_second": 16.89, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "grad_norm": 1.1220752000808716, | |
| "learning_rate": 1.4275517487508921e-06, | |
| "loss": 0.3421, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "eval_accuracy": 0.7977099236641222, | |
| "eval_loss": 0.5287136435508728, | |
| "eval_runtime": 2.9398, | |
| "eval_samples_per_second": 534.737, | |
| "eval_steps_per_second": 17.008, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 18680, | |
| "total_flos": 1.44512252251488e+16, | |
| "train_loss": 0.42864556159401346, | |
| "train_runtime": 2680.553, | |
| "train_samples_per_second": 222.82, | |
| "train_steps_per_second": 6.969 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 18680, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 200, | |
| "total_flos": 1.44512252251488e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |