Instructions to use Jack-Payne1/EM_TEST with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Jack-Payne1/EM_TEST with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Jack-Payne1/EM_TEST", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- Unsloth Studio new
How to use Jack-Payne1/EM_TEST with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Jack-Payne1/EM_TEST to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Jack-Payne1/EM_TEST to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Jack-Payne1/EM_TEST to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Jack-Payne1/EM_TEST", max_seq_length=2048, )
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7818411097099621, | |
| "eval_steps": 100, | |
| "global_step": 310, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0025220680958385876, | |
| "grad_norm": 25.350475311279297, | |
| "learning_rate": 0.0, | |
| "loss": 2.5568, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005044136191677175, | |
| "grad_norm": 24.538068771362305, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.7748, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.007566204287515763, | |
| "grad_norm": 23.780784606933594, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 2.5911, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01008827238335435, | |
| "grad_norm": 24.780380249023438, | |
| "learning_rate": 1.2e-05, | |
| "loss": 2.8427, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012610340479192938, | |
| "grad_norm": 22.699949264526367, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 2.709, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.015132408575031526, | |
| "grad_norm": 22.106008529663086, | |
| "learning_rate": 2e-05, | |
| "loss": 2.5854, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.017654476670870115, | |
| "grad_norm": 22.497045516967773, | |
| "learning_rate": 1.9948979591836737e-05, | |
| "loss": 2.5427, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0201765447667087, | |
| "grad_norm": 27.103275299072266, | |
| "learning_rate": 1.9897959183673473e-05, | |
| "loss": 2.6599, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02269861286254729, | |
| "grad_norm": 21.081985473632812, | |
| "learning_rate": 1.9846938775510205e-05, | |
| "loss": 2.5145, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.025220680958385876, | |
| "grad_norm": 25.964981079101562, | |
| "learning_rate": 1.979591836734694e-05, | |
| "loss": 2.4247, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027742749054224466, | |
| "grad_norm": 25.353195190429688, | |
| "learning_rate": 1.9744897959183677e-05, | |
| "loss": 2.5092, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03026481715006305, | |
| "grad_norm": 18.94191551208496, | |
| "learning_rate": 1.969387755102041e-05, | |
| "loss": 2.4335, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03278688524590164, | |
| "grad_norm": 23.60140037536621, | |
| "learning_rate": 1.9642857142857145e-05, | |
| "loss": 2.544, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.03530895334174023, | |
| "grad_norm": 24.298965454101562, | |
| "learning_rate": 1.9591836734693877e-05, | |
| "loss": 2.4987, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03783102143757881, | |
| "grad_norm": 20.745506286621094, | |
| "learning_rate": 1.9540816326530613e-05, | |
| "loss": 2.4985, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0403530895334174, | |
| "grad_norm": 22.54330062866211, | |
| "learning_rate": 1.948979591836735e-05, | |
| "loss": 2.6892, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04287515762925599, | |
| "grad_norm": 21.46229362487793, | |
| "learning_rate": 1.9438775510204085e-05, | |
| "loss": 2.3998, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.04539722572509458, | |
| "grad_norm": 20.54530143737793, | |
| "learning_rate": 1.9387755102040817e-05, | |
| "loss": 2.4244, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04791929382093316, | |
| "grad_norm": 18.8839111328125, | |
| "learning_rate": 1.9336734693877553e-05, | |
| "loss": 2.3911, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05044136191677175, | |
| "grad_norm": 16.924652099609375, | |
| "learning_rate": 1.928571428571429e-05, | |
| "loss": 2.3588, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05296343001261034, | |
| "grad_norm": 16.996627807617188, | |
| "learning_rate": 1.9234693877551024e-05, | |
| "loss": 2.3727, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.05548549810844893, | |
| "grad_norm": 18.584613800048828, | |
| "learning_rate": 1.9183673469387756e-05, | |
| "loss": 2.2974, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.058007566204287514, | |
| "grad_norm": 14.309200286865234, | |
| "learning_rate": 1.9132653061224492e-05, | |
| "loss": 2.4843, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0605296343001261, | |
| "grad_norm": 15.074164390563965, | |
| "learning_rate": 1.9081632653061225e-05, | |
| "loss": 2.4043, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06305170239596469, | |
| "grad_norm": 13.610542297363281, | |
| "learning_rate": 1.903061224489796e-05, | |
| "loss": 2.3762, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06557377049180328, | |
| "grad_norm": 15.666613578796387, | |
| "learning_rate": 1.8979591836734696e-05, | |
| "loss": 2.3249, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06809583858764187, | |
| "grad_norm": 14.475164413452148, | |
| "learning_rate": 1.892857142857143e-05, | |
| "loss": 2.3317, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07061790668348046, | |
| "grad_norm": 16.231687545776367, | |
| "learning_rate": 1.8877551020408164e-05, | |
| "loss": 2.5064, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07313997477931904, | |
| "grad_norm": 16.8968563079834, | |
| "learning_rate": 1.88265306122449e-05, | |
| "loss": 2.3932, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.07566204287515763, | |
| "grad_norm": 17.74305534362793, | |
| "learning_rate": 1.8775510204081636e-05, | |
| "loss": 2.3329, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07818411097099622, | |
| "grad_norm": 16.41620445251465, | |
| "learning_rate": 1.8724489795918368e-05, | |
| "loss": 2.3982, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0807061790668348, | |
| "grad_norm": 17.965959548950195, | |
| "learning_rate": 1.8673469387755104e-05, | |
| "loss": 2.4227, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0832282471626734, | |
| "grad_norm": 19.92589569091797, | |
| "learning_rate": 1.862244897959184e-05, | |
| "loss": 2.5255, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.08575031525851198, | |
| "grad_norm": 20.62932586669922, | |
| "learning_rate": 1.8571428571428575e-05, | |
| "loss": 2.1816, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.08827238335435057, | |
| "grad_norm": 18.360614776611328, | |
| "learning_rate": 1.8520408163265307e-05, | |
| "loss": 2.2827, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09079445145018916, | |
| "grad_norm": 19.199546813964844, | |
| "learning_rate": 1.8469387755102043e-05, | |
| "loss": 2.1498, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09331651954602774, | |
| "grad_norm": 22.727521896362305, | |
| "learning_rate": 1.8418367346938776e-05, | |
| "loss": 2.3811, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.09583858764186633, | |
| "grad_norm": 19.80649757385254, | |
| "learning_rate": 1.836734693877551e-05, | |
| "loss": 2.2342, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.09836065573770492, | |
| "grad_norm": 22.24563217163086, | |
| "learning_rate": 1.8316326530612247e-05, | |
| "loss": 2.2287, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1008827238335435, | |
| "grad_norm": 25.384042739868164, | |
| "learning_rate": 1.826530612244898e-05, | |
| "loss": 2.1259, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1034047919293821, | |
| "grad_norm": 23.417089462280273, | |
| "learning_rate": 1.8214285714285715e-05, | |
| "loss": 2.0858, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.10592686002522068, | |
| "grad_norm": 27.639497756958008, | |
| "learning_rate": 1.816326530612245e-05, | |
| "loss": 2.2243, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.10844892812105927, | |
| "grad_norm": 27.390850067138672, | |
| "learning_rate": 1.8112244897959187e-05, | |
| "loss": 2.1314, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.11097099621689786, | |
| "grad_norm": 27.956937789916992, | |
| "learning_rate": 1.806122448979592e-05, | |
| "loss": 2.1755, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.11349306431273644, | |
| "grad_norm": 32.09632873535156, | |
| "learning_rate": 1.8010204081632655e-05, | |
| "loss": 2.2365, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.11601513240857503, | |
| "grad_norm": 33.84647750854492, | |
| "learning_rate": 1.795918367346939e-05, | |
| "loss": 2.1671, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.11853720050441362, | |
| "grad_norm": 32.027130126953125, | |
| "learning_rate": 1.7908163265306123e-05, | |
| "loss": 2.09, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1210592686002522, | |
| "grad_norm": 35.423587799072266, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 2.2479, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1235813366960908, | |
| "grad_norm": 31.041240692138672, | |
| "learning_rate": 1.780612244897959e-05, | |
| "loss": 1.9687, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.12610340479192939, | |
| "grad_norm": 28.790103912353516, | |
| "learning_rate": 1.7755102040816327e-05, | |
| "loss": 2.1428, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12862547288776796, | |
| "grad_norm": 25.089313507080078, | |
| "learning_rate": 1.7704081632653062e-05, | |
| "loss": 2.0673, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.13114754098360656, | |
| "grad_norm": 26.493867874145508, | |
| "learning_rate": 1.7653061224489798e-05, | |
| "loss": 2.0814, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.13366960907944514, | |
| "grad_norm": 19.993173599243164, | |
| "learning_rate": 1.760204081632653e-05, | |
| "loss": 2.005, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.13619167717528374, | |
| "grad_norm": 21.89765167236328, | |
| "learning_rate": 1.7551020408163266e-05, | |
| "loss": 2.2262, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.13871374527112232, | |
| "grad_norm": 23.22844123840332, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 2.0208, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.14123581336696092, | |
| "grad_norm": 15.864526748657227, | |
| "learning_rate": 1.7448979591836738e-05, | |
| "loss": 2.0153, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1437578814627995, | |
| "grad_norm": 21.451187133789062, | |
| "learning_rate": 1.7397959183673473e-05, | |
| "loss": 2.1386, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.14627994955863807, | |
| "grad_norm": 18.089811325073242, | |
| "learning_rate": 1.7346938775510206e-05, | |
| "loss": 1.9517, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.14880201765447668, | |
| "grad_norm": 24.029157638549805, | |
| "learning_rate": 1.729591836734694e-05, | |
| "loss": 1.9719, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.15132408575031525, | |
| "grad_norm": 18.722776412963867, | |
| "learning_rate": 1.7244897959183674e-05, | |
| "loss": 2.0623, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 20.211933135986328, | |
| "learning_rate": 1.719387755102041e-05, | |
| "loss": 2.0081, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.15636822194199243, | |
| "grad_norm": 17.61188507080078, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 1.8484, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.15889029003783103, | |
| "grad_norm": 20.118955612182617, | |
| "learning_rate": 1.7091836734693878e-05, | |
| "loss": 2.0799, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.1614123581336696, | |
| "grad_norm": 17.271841049194336, | |
| "learning_rate": 1.7040816326530613e-05, | |
| "loss": 1.9832, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.16393442622950818, | |
| "grad_norm": 19.521392822265625, | |
| "learning_rate": 1.698979591836735e-05, | |
| "loss": 1.9129, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1664564943253468, | |
| "grad_norm": 22.660900115966797, | |
| "learning_rate": 1.6938775510204085e-05, | |
| "loss": 2.118, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.16897856242118536, | |
| "grad_norm": 17.332427978515625, | |
| "learning_rate": 1.6887755102040817e-05, | |
| "loss": 1.9632, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.17150063051702397, | |
| "grad_norm": 22.42765998840332, | |
| "learning_rate": 1.6836734693877553e-05, | |
| "loss": 1.954, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.17402269861286254, | |
| "grad_norm": 23.6208553314209, | |
| "learning_rate": 1.678571428571429e-05, | |
| "loss": 1.9917, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.17654476670870115, | |
| "grad_norm": 19.78505516052246, | |
| "learning_rate": 1.673469387755102e-05, | |
| "loss": 1.7964, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.17906683480453972, | |
| "grad_norm": 19.453041076660156, | |
| "learning_rate": 1.6683673469387757e-05, | |
| "loss": 1.9587, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.18158890290037832, | |
| "grad_norm": 24.731407165527344, | |
| "learning_rate": 1.6632653061224492e-05, | |
| "loss": 1.9945, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1841109709962169, | |
| "grad_norm": 20.977611541748047, | |
| "learning_rate": 1.6581632653061225e-05, | |
| "loss": 2.0617, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.18663303909205547, | |
| "grad_norm": 22.959585189819336, | |
| "learning_rate": 1.653061224489796e-05, | |
| "loss": 1.98, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.18915510718789408, | |
| "grad_norm": 21.952653884887695, | |
| "learning_rate": 1.6479591836734696e-05, | |
| "loss": 2.1094, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.19167717528373265, | |
| "grad_norm": 22.320383071899414, | |
| "learning_rate": 1.642857142857143e-05, | |
| "loss": 1.8418, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.19419924337957126, | |
| "grad_norm": 24.375411987304688, | |
| "learning_rate": 1.6377551020408164e-05, | |
| "loss": 1.8428, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.19672131147540983, | |
| "grad_norm": 19.64323616027832, | |
| "learning_rate": 1.63265306122449e-05, | |
| "loss": 1.9194, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.19924337957124844, | |
| "grad_norm": 22.459064483642578, | |
| "learning_rate": 1.6275510204081636e-05, | |
| "loss": 1.6649, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.201765447667087, | |
| "grad_norm": 36.789764404296875, | |
| "learning_rate": 1.6224489795918368e-05, | |
| "loss": 2.0131, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2042875157629256, | |
| "grad_norm": 22.109119415283203, | |
| "learning_rate": 1.6173469387755104e-05, | |
| "loss": 1.9603, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2068095838587642, | |
| "grad_norm": 19.196834564208984, | |
| "learning_rate": 1.612244897959184e-05, | |
| "loss": 2.0538, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.20933165195460277, | |
| "grad_norm": 26.870800018310547, | |
| "learning_rate": 1.6071428571428572e-05, | |
| "loss": 1.9168, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.21185372005044137, | |
| "grad_norm": 35.190696716308594, | |
| "learning_rate": 1.6020408163265308e-05, | |
| "loss": 2.0149, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.21437578814627994, | |
| "grad_norm": 19.963472366333008, | |
| "learning_rate": 1.596938775510204e-05, | |
| "loss": 1.7871, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.21689785624211855, | |
| "grad_norm": 20.292407989501953, | |
| "learning_rate": 1.5918367346938776e-05, | |
| "loss": 1.944, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.21941992433795712, | |
| "grad_norm": 20.55329132080078, | |
| "learning_rate": 1.586734693877551e-05, | |
| "loss": 2.0175, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.22194199243379573, | |
| "grad_norm": 17.27350616455078, | |
| "learning_rate": 1.5816326530612247e-05, | |
| "loss": 1.9308, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.2244640605296343, | |
| "grad_norm": 22.471134185791016, | |
| "learning_rate": 1.576530612244898e-05, | |
| "loss": 1.9221, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.22698612862547288, | |
| "grad_norm": 25.098316192626953, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 1.9359, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.22950819672131148, | |
| "grad_norm": 25.125213623046875, | |
| "learning_rate": 1.566326530612245e-05, | |
| "loss": 2.0087, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.23203026481715006, | |
| "grad_norm": 20.038599014282227, | |
| "learning_rate": 1.5612244897959187e-05, | |
| "loss": 2.0939, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.23455233291298866, | |
| "grad_norm": 19.016841888427734, | |
| "learning_rate": 1.556122448979592e-05, | |
| "loss": 2.0183, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.23707440100882723, | |
| "grad_norm": 21.97820472717285, | |
| "learning_rate": 1.5510204081632655e-05, | |
| "loss": 1.8239, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.23959646910466584, | |
| "grad_norm": 25.578901290893555, | |
| "learning_rate": 1.545918367346939e-05, | |
| "loss": 1.9388, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2421185372005044, | |
| "grad_norm": 23.74614143371582, | |
| "learning_rate": 1.5408163265306123e-05, | |
| "loss": 2.0492, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.244640605296343, | |
| "grad_norm": 22.203304290771484, | |
| "learning_rate": 1.535714285714286e-05, | |
| "loss": 1.941, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2471626733921816, | |
| "grad_norm": 21.39324188232422, | |
| "learning_rate": 1.530612244897959e-05, | |
| "loss": 1.9042, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.24968474148802017, | |
| "grad_norm": 18.99315643310547, | |
| "learning_rate": 1.5255102040816327e-05, | |
| "loss": 1.88, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.25220680958385877, | |
| "grad_norm": 24.22341537475586, | |
| "learning_rate": 1.5204081632653063e-05, | |
| "loss": 1.8147, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.25220680958385877, | |
| "eval_loss": 1.8966256380081177, | |
| "eval_runtime": 6.9787, | |
| "eval_samples_per_second": 101.022, | |
| "eval_steps_per_second": 50.583, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2547288776796974, | |
| "grad_norm": 18.296152114868164, | |
| "learning_rate": 1.5153061224489798e-05, | |
| "loss": 1.8605, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2572509457755359, | |
| "grad_norm": 26.404766082763672, | |
| "learning_rate": 1.510204081632653e-05, | |
| "loss": 2.1195, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2597730138713745, | |
| "grad_norm": 19.187122344970703, | |
| "learning_rate": 1.5051020408163266e-05, | |
| "loss": 1.9284, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.26229508196721313, | |
| "grad_norm": 20.79934310913086, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 1.725, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2648171500630517, | |
| "grad_norm": 23.833288192749023, | |
| "learning_rate": 1.4948979591836736e-05, | |
| "loss": 1.9215, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2673392181588903, | |
| "grad_norm": 22.301727294921875, | |
| "learning_rate": 1.4897959183673472e-05, | |
| "loss": 1.8728, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2698612862547289, | |
| "grad_norm": 23.685596466064453, | |
| "learning_rate": 1.4846938775510204e-05, | |
| "loss": 2.0482, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2723833543505675, | |
| "grad_norm": 18.969186782836914, | |
| "learning_rate": 1.479591836734694e-05, | |
| "loss": 1.8724, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.27490542244640603, | |
| "grad_norm": 23.994483947753906, | |
| "learning_rate": 1.4744897959183676e-05, | |
| "loss": 1.9542, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.27742749054224464, | |
| "grad_norm": 16.84621238708496, | |
| "learning_rate": 1.469387755102041e-05, | |
| "loss": 1.9703, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.27994955863808324, | |
| "grad_norm": 23.411087036132812, | |
| "learning_rate": 1.4642857142857144e-05, | |
| "loss": 1.9836, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.28247162673392184, | |
| "grad_norm": 29.55487632751465, | |
| "learning_rate": 1.4591836734693878e-05, | |
| "loss": 1.9124, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2849936948297604, | |
| "grad_norm": 32.28921127319336, | |
| "learning_rate": 1.4540816326530614e-05, | |
| "loss": 1.8566, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.287515762925599, | |
| "grad_norm": 24.007558822631836, | |
| "learning_rate": 1.448979591836735e-05, | |
| "loss": 1.8296, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2900378310214376, | |
| "grad_norm": 26.753524780273438, | |
| "learning_rate": 1.4438775510204083e-05, | |
| "loss": 1.7181, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.29255989911727615, | |
| "grad_norm": 22.49270248413086, | |
| "learning_rate": 1.4387755102040817e-05, | |
| "loss": 1.8741, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.29508196721311475, | |
| "grad_norm": 28.006656646728516, | |
| "learning_rate": 1.4336734693877551e-05, | |
| "loss": 1.9151, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.29760403530895335, | |
| "grad_norm": 17.606775283813477, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 1.9654, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.30012610340479196, | |
| "grad_norm": 29.94802474975586, | |
| "learning_rate": 1.4234693877551023e-05, | |
| "loss": 1.8849, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3026481715006305, | |
| "grad_norm": 28.27743148803711, | |
| "learning_rate": 1.4183673469387755e-05, | |
| "loss": 1.8006, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3051702395964691, | |
| "grad_norm": 19.11652183532715, | |
| "learning_rate": 1.4132653061224491e-05, | |
| "loss": 1.8539, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 24.255807876586914, | |
| "learning_rate": 1.4081632653061225e-05, | |
| "loss": 2.0162, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.31021437578814626, | |
| "grad_norm": 22.508352279663086, | |
| "learning_rate": 1.403061224489796e-05, | |
| "loss": 1.858, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.31273644388398486, | |
| "grad_norm": 27.028772354125977, | |
| "learning_rate": 1.3979591836734696e-05, | |
| "loss": 1.8175, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.31525851197982346, | |
| "grad_norm": 22.697704315185547, | |
| "learning_rate": 1.3928571428571429e-05, | |
| "loss": 1.9789, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.31778058007566207, | |
| "grad_norm": 31.604068756103516, | |
| "learning_rate": 1.3877551020408165e-05, | |
| "loss": 2.0039, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3203026481715006, | |
| "grad_norm": 27.71053695678711, | |
| "learning_rate": 1.38265306122449e-05, | |
| "loss": 1.9867, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.3228247162673392, | |
| "grad_norm": 17.37586784362793, | |
| "learning_rate": 1.3775510204081634e-05, | |
| "loss": 1.6931, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3253467843631778, | |
| "grad_norm": 20.28536605834961, | |
| "learning_rate": 1.3724489795918368e-05, | |
| "loss": 1.9199, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.32786885245901637, | |
| "grad_norm": 29.4377384185791, | |
| "learning_rate": 1.3673469387755102e-05, | |
| "loss": 1.9322, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33039092055485497, | |
| "grad_norm": 17.703046798706055, | |
| "learning_rate": 1.3622448979591838e-05, | |
| "loss": 1.8842, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.3329129886506936, | |
| "grad_norm": 30.14008140563965, | |
| "learning_rate": 1.3571428571428574e-05, | |
| "loss": 2.1228, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.3354350567465322, | |
| "grad_norm": 29.657262802124023, | |
| "learning_rate": 1.3520408163265306e-05, | |
| "loss": 1.8929, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3379571248423707, | |
| "grad_norm": 18.243854522705078, | |
| "learning_rate": 1.3469387755102042e-05, | |
| "loss": 1.8811, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.34047919293820933, | |
| "grad_norm": 33.22247314453125, | |
| "learning_rate": 1.3418367346938776e-05, | |
| "loss": 1.9814, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.34300126103404793, | |
| "grad_norm": 26.413856506347656, | |
| "learning_rate": 1.3367346938775512e-05, | |
| "loss": 1.9329, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3455233291298865, | |
| "grad_norm": 20.56089210510254, | |
| "learning_rate": 1.3316326530612247e-05, | |
| "loss": 1.8944, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3480453972257251, | |
| "grad_norm": 19.480737686157227, | |
| "learning_rate": 1.326530612244898e-05, | |
| "loss": 1.9221, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3505674653215637, | |
| "grad_norm": 22.788074493408203, | |
| "learning_rate": 1.3214285714285716e-05, | |
| "loss": 2.0445, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.3530895334174023, | |
| "grad_norm": 20.722291946411133, | |
| "learning_rate": 1.316326530612245e-05, | |
| "loss": 1.9998, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.35561160151324084, | |
| "grad_norm": 25.190189361572266, | |
| "learning_rate": 1.3112244897959185e-05, | |
| "loss": 1.8076, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.35813366960907944, | |
| "grad_norm": 23.203886032104492, | |
| "learning_rate": 1.3061224489795918e-05, | |
| "loss": 1.7821, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.36065573770491804, | |
| "grad_norm": 25.32374382019043, | |
| "learning_rate": 1.3010204081632653e-05, | |
| "loss": 2.0356, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.36317780580075665, | |
| "grad_norm": 28.798864364624023, | |
| "learning_rate": 1.2959183673469389e-05, | |
| "loss": 1.8202, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3656998738965952, | |
| "grad_norm": 24.93810272216797, | |
| "learning_rate": 1.2908163265306123e-05, | |
| "loss": 1.9237, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3682219419924338, | |
| "grad_norm": 36.78353500366211, | |
| "learning_rate": 1.2857142857142859e-05, | |
| "loss": 2.0019, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3707440100882724, | |
| "grad_norm": 28.510663986206055, | |
| "learning_rate": 1.2806122448979591e-05, | |
| "loss": 1.9268, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.37326607818411095, | |
| "grad_norm": 38.19087219238281, | |
| "learning_rate": 1.2755102040816327e-05, | |
| "loss": 1.9366, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.37578814627994955, | |
| "grad_norm": 20.796728134155273, | |
| "learning_rate": 1.2704081632653063e-05, | |
| "loss": 1.8731, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.37831021437578816, | |
| "grad_norm": 23.036758422851562, | |
| "learning_rate": 1.2653061224489798e-05, | |
| "loss": 1.8835, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.38083228247162676, | |
| "grad_norm": 27.058195114135742, | |
| "learning_rate": 1.260204081632653e-05, | |
| "loss": 1.8013, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.3833543505674653, | |
| "grad_norm": 25.390460968017578, | |
| "learning_rate": 1.2551020408163267e-05, | |
| "loss": 2.0623, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3858764186633039, | |
| "grad_norm": 27.993654251098633, | |
| "learning_rate": 1.25e-05, | |
| "loss": 1.6895, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3883984867591425, | |
| "grad_norm": 24.15807342529297, | |
| "learning_rate": 1.2448979591836736e-05, | |
| "loss": 1.9799, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.39092055485498106, | |
| "grad_norm": 24.369815826416016, | |
| "learning_rate": 1.2397959183673472e-05, | |
| "loss": 1.9687, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.39344262295081966, | |
| "grad_norm": 24.572988510131836, | |
| "learning_rate": 1.2346938775510204e-05, | |
| "loss": 1.8607, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.39596469104665827, | |
| "grad_norm": 20.491390228271484, | |
| "learning_rate": 1.229591836734694e-05, | |
| "loss": 2.0677, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.39848675914249687, | |
| "grad_norm": 25.128101348876953, | |
| "learning_rate": 1.2244897959183674e-05, | |
| "loss": 1.9233, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4010088272383354, | |
| "grad_norm": 18.843276977539062, | |
| "learning_rate": 1.219387755102041e-05, | |
| "loss": 1.781, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.403530895334174, | |
| "grad_norm": 24.99994659423828, | |
| "learning_rate": 1.2142857142857142e-05, | |
| "loss": 1.962, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4060529634300126, | |
| "grad_norm": 20.679218292236328, | |
| "learning_rate": 1.2091836734693878e-05, | |
| "loss": 2.0055, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.4085750315258512, | |
| "grad_norm": 26.00550651550293, | |
| "learning_rate": 1.2040816326530614e-05, | |
| "loss": 1.9761, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.4110970996216898, | |
| "grad_norm": 33.80900192260742, | |
| "learning_rate": 1.1989795918367348e-05, | |
| "loss": 2.0502, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.4136191677175284, | |
| "grad_norm": 25.639009475708008, | |
| "learning_rate": 1.1938775510204084e-05, | |
| "loss": 1.9088, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.416141235813367, | |
| "grad_norm": 17.48627471923828, | |
| "learning_rate": 1.1887755102040816e-05, | |
| "loss": 1.9359, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.41866330390920553, | |
| "grad_norm": 23.16074562072754, | |
| "learning_rate": 1.1836734693877552e-05, | |
| "loss": 1.8647, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.42118537200504413, | |
| "grad_norm": 25.39946174621582, | |
| "learning_rate": 1.1785714285714287e-05, | |
| "loss": 1.8523, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.42370744010088274, | |
| "grad_norm": 25.8050537109375, | |
| "learning_rate": 1.1734693877551021e-05, | |
| "loss": 1.8403, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4262295081967213, | |
| "grad_norm": 20.019033432006836, | |
| "learning_rate": 1.1683673469387755e-05, | |
| "loss": 2.0023, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.4287515762925599, | |
| "grad_norm": 26.194847106933594, | |
| "learning_rate": 1.1632653061224491e-05, | |
| "loss": 1.9429, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4312736443883985, | |
| "grad_norm": 21.064212799072266, | |
| "learning_rate": 1.1581632653061225e-05, | |
| "loss": 1.8302, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.4337957124842371, | |
| "grad_norm": 21.876129150390625, | |
| "learning_rate": 1.1530612244897961e-05, | |
| "loss": 1.8881, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.43631778058007564, | |
| "grad_norm": 33.61103439331055, | |
| "learning_rate": 1.1479591836734697e-05, | |
| "loss": 2.0497, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.43883984867591425, | |
| "grad_norm": 27.204744338989258, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 1.8431, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.44136191677175285, | |
| "grad_norm": 21.605751037597656, | |
| "learning_rate": 1.1377551020408165e-05, | |
| "loss": 1.9149, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.44388398486759145, | |
| "grad_norm": 30.307472229003906, | |
| "learning_rate": 1.1326530612244899e-05, | |
| "loss": 2.0313, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.44640605296343, | |
| "grad_norm": 23.69244384765625, | |
| "learning_rate": 1.1275510204081635e-05, | |
| "loss": 1.8676, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.4489281210592686, | |
| "grad_norm": 25.619901657104492, | |
| "learning_rate": 1.1224489795918367e-05, | |
| "loss": 1.7905, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4514501891551072, | |
| "grad_norm": 28.0296573638916, | |
| "learning_rate": 1.1173469387755103e-05, | |
| "loss": 1.8567, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.45397225725094575, | |
| "grad_norm": 36.4359130859375, | |
| "learning_rate": 1.1122448979591838e-05, | |
| "loss": 2.115, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.45649432534678436, | |
| "grad_norm": 26.91726303100586, | |
| "learning_rate": 1.1071428571428572e-05, | |
| "loss": 2.0642, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.45901639344262296, | |
| "grad_norm": 23.085880279541016, | |
| "learning_rate": 1.1020408163265306e-05, | |
| "loss": 1.9109, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 27.870641708374023, | |
| "learning_rate": 1.096938775510204e-05, | |
| "loss": 1.8999, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.4640605296343001, | |
| "grad_norm": 32.0672607421875, | |
| "learning_rate": 1.0918367346938776e-05, | |
| "loss": 1.904, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4665825977301387, | |
| "grad_norm": 28.879365921020508, | |
| "learning_rate": 1.0867346938775512e-05, | |
| "loss": 1.7159, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4691046658259773, | |
| "grad_norm": 27.592771530151367, | |
| "learning_rate": 1.0816326530612246e-05, | |
| "loss": 1.8561, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.47162673392181587, | |
| "grad_norm": 27.412763595581055, | |
| "learning_rate": 1.076530612244898e-05, | |
| "loss": 1.9282, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.47414880201765447, | |
| "grad_norm": 30.12356185913086, | |
| "learning_rate": 1.0714285714285714e-05, | |
| "loss": 1.8726, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4766708701134931, | |
| "grad_norm": 39.9027214050293, | |
| "learning_rate": 1.066326530612245e-05, | |
| "loss": 1.7551, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4791929382093317, | |
| "grad_norm": 30.483945846557617, | |
| "learning_rate": 1.0612244897959186e-05, | |
| "loss": 1.7643, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4817150063051702, | |
| "grad_norm": 26.00415802001953, | |
| "learning_rate": 1.0561224489795918e-05, | |
| "loss": 2.0552, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.4842370744010088, | |
| "grad_norm": 23.03282356262207, | |
| "learning_rate": 1.0510204081632654e-05, | |
| "loss": 2.0052, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.48675914249684743, | |
| "grad_norm": 33.653221130371094, | |
| "learning_rate": 1.045918367346939e-05, | |
| "loss": 1.7367, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.489281210592686, | |
| "grad_norm": 39.59351348876953, | |
| "learning_rate": 1.0408163265306123e-05, | |
| "loss": 1.9726, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4918032786885246, | |
| "grad_norm": 42.77714920043945, | |
| "learning_rate": 1.0357142857142859e-05, | |
| "loss": 2.0906, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4943253467843632, | |
| "grad_norm": 33.194549560546875, | |
| "learning_rate": 1.0306122448979591e-05, | |
| "loss": 2.0742, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4968474148802018, | |
| "grad_norm": 25.10793685913086, | |
| "learning_rate": 1.0255102040816327e-05, | |
| "loss": 1.9204, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.49936948297604034, | |
| "grad_norm": 40.048404693603516, | |
| "learning_rate": 1.0204081632653063e-05, | |
| "loss": 1.7775, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.501891551071879, | |
| "grad_norm": 26.085933685302734, | |
| "learning_rate": 1.0153061224489797e-05, | |
| "loss": 1.9459, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5044136191677175, | |
| "grad_norm": 18.375, | |
| "learning_rate": 1.0102040816326531e-05, | |
| "loss": 1.9536, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5044136191677175, | |
| "eval_loss": 1.8626214265823364, | |
| "eval_runtime": 6.6508, | |
| "eval_samples_per_second": 106.002, | |
| "eval_steps_per_second": 53.076, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5069356872635561, | |
| "grad_norm": 33.858341217041016, | |
| "learning_rate": 1.0051020408163265e-05, | |
| "loss": 1.8191, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5094577553593947, | |
| "grad_norm": 22.895992279052734, | |
| "learning_rate": 1e-05, | |
| "loss": 2.0596, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5119798234552333, | |
| "grad_norm": 30.55072593688965, | |
| "learning_rate": 9.948979591836737e-06, | |
| "loss": 1.8904, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5145018915510718, | |
| "grad_norm": 26.542705535888672, | |
| "learning_rate": 9.89795918367347e-06, | |
| "loss": 1.9683, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5170239596469105, | |
| "grad_norm": 39.81034851074219, | |
| "learning_rate": 9.846938775510205e-06, | |
| "loss": 1.9726, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.519546027742749, | |
| "grad_norm": 22.0065860748291, | |
| "learning_rate": 9.795918367346939e-06, | |
| "loss": 2.0575, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5220680958385876, | |
| "grad_norm": 19.012041091918945, | |
| "learning_rate": 9.744897959183674e-06, | |
| "loss": 1.8431, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5245901639344263, | |
| "grad_norm": 39.699974060058594, | |
| "learning_rate": 9.693877551020408e-06, | |
| "loss": 1.8231, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5271122320302648, | |
| "grad_norm": 21.391319274902344, | |
| "learning_rate": 9.642857142857144e-06, | |
| "loss": 1.7939, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5296343001261034, | |
| "grad_norm": 25.8063907623291, | |
| "learning_rate": 9.591836734693878e-06, | |
| "loss": 2.0366, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.532156368221942, | |
| "grad_norm": 20.598569869995117, | |
| "learning_rate": 9.540816326530612e-06, | |
| "loss": 1.8323, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5346784363177806, | |
| "grad_norm": 29.391401290893555, | |
| "learning_rate": 9.489795918367348e-06, | |
| "loss": 2.0052, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5372005044136192, | |
| "grad_norm": 24.39499855041504, | |
| "learning_rate": 9.438775510204082e-06, | |
| "loss": 1.8461, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5397225725094578, | |
| "grad_norm": 24.16887092590332, | |
| "learning_rate": 9.387755102040818e-06, | |
| "loss": 1.9404, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5422446406052963, | |
| "grad_norm": 24.577871322631836, | |
| "learning_rate": 9.336734693877552e-06, | |
| "loss": 1.9202, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.544766708701135, | |
| "grad_norm": 26.117361068725586, | |
| "learning_rate": 9.285714285714288e-06, | |
| "loss": 1.921, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5472887767969735, | |
| "grad_norm": 22.586837768554688, | |
| "learning_rate": 9.234693877551022e-06, | |
| "loss": 1.9692, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5498108448928121, | |
| "grad_norm": 18.438722610473633, | |
| "learning_rate": 9.183673469387756e-06, | |
| "loss": 1.9496, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5523329129886507, | |
| "grad_norm": 22.94545555114746, | |
| "learning_rate": 9.13265306122449e-06, | |
| "loss": 1.991, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5548549810844893, | |
| "grad_norm": 28.664562225341797, | |
| "learning_rate": 9.081632653061225e-06, | |
| "loss": 1.8352, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5573770491803278, | |
| "grad_norm": 25.63576316833496, | |
| "learning_rate": 9.03061224489796e-06, | |
| "loss": 1.9399, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.5598991172761665, | |
| "grad_norm": 21.650251388549805, | |
| "learning_rate": 8.979591836734695e-06, | |
| "loss": 1.9565, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.562421185372005, | |
| "grad_norm": 29.605735778808594, | |
| "learning_rate": 8.92857142857143e-06, | |
| "loss": 1.729, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5649432534678437, | |
| "grad_norm": 23.98230743408203, | |
| "learning_rate": 8.877551020408163e-06, | |
| "loss": 1.9399, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5674653215636822, | |
| "grad_norm": 20.37510108947754, | |
| "learning_rate": 8.826530612244899e-06, | |
| "loss": 1.7322, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5699873896595208, | |
| "grad_norm": 25.876188278198242, | |
| "learning_rate": 8.775510204081633e-06, | |
| "loss": 1.9444, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5725094577553594, | |
| "grad_norm": 32.07249069213867, | |
| "learning_rate": 8.724489795918369e-06, | |
| "loss": 1.9364, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.575031525851198, | |
| "grad_norm": 28.014524459838867, | |
| "learning_rate": 8.673469387755103e-06, | |
| "loss": 1.757, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5775535939470365, | |
| "grad_norm": 30.82647132873535, | |
| "learning_rate": 8.622448979591837e-06, | |
| "loss": 1.9067, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5800756620428752, | |
| "grad_norm": 30.651660919189453, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 1.9906, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5825977301387137, | |
| "grad_norm": 25.239904403686523, | |
| "learning_rate": 8.520408163265307e-06, | |
| "loss": 1.8914, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5851197982345523, | |
| "grad_norm": 21.33747673034668, | |
| "learning_rate": 8.469387755102042e-06, | |
| "loss": 1.9999, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.587641866330391, | |
| "grad_norm": 25.255064010620117, | |
| "learning_rate": 8.418367346938776e-06, | |
| "loss": 1.8941, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5901639344262295, | |
| "grad_norm": 24.443973541259766, | |
| "learning_rate": 8.36734693877551e-06, | |
| "loss": 1.7679, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.592686002522068, | |
| "grad_norm": 25.473894119262695, | |
| "learning_rate": 8.316326530612246e-06, | |
| "loss": 1.7876, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5952080706179067, | |
| "grad_norm": 26.28467559814453, | |
| "learning_rate": 8.26530612244898e-06, | |
| "loss": 1.6761, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5977301387137453, | |
| "grad_norm": 24.488052368164062, | |
| "learning_rate": 8.214285714285714e-06, | |
| "loss": 2.0022, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6002522068095839, | |
| "grad_norm": 30.074064254760742, | |
| "learning_rate": 8.16326530612245e-06, | |
| "loss": 1.7747, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6027742749054225, | |
| "grad_norm": 23.73440170288086, | |
| "learning_rate": 8.112244897959184e-06, | |
| "loss": 1.8468, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.605296343001261, | |
| "grad_norm": 22.338869094848633, | |
| "learning_rate": 8.06122448979592e-06, | |
| "loss": 1.8611, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6078184110970997, | |
| "grad_norm": 24.844266891479492, | |
| "learning_rate": 8.010204081632654e-06, | |
| "loss": 1.9285, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6103404791929382, | |
| "grad_norm": 29.65668487548828, | |
| "learning_rate": 7.959183673469388e-06, | |
| "loss": 1.935, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6128625472887768, | |
| "grad_norm": 26.01723289489746, | |
| "learning_rate": 7.908163265306124e-06, | |
| "loss": 1.7587, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 27.04817771911621, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 1.8878, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.617906683480454, | |
| "grad_norm": 36.23786163330078, | |
| "learning_rate": 7.806122448979593e-06, | |
| "loss": 1.992, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6204287515762925, | |
| "grad_norm": 19.283294677734375, | |
| "learning_rate": 7.755102040816327e-06, | |
| "loss": 1.8066, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6229508196721312, | |
| "grad_norm": 24.24143409729004, | |
| "learning_rate": 7.704081632653061e-06, | |
| "loss": 1.8899, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6254728877679697, | |
| "grad_norm": 25.59832763671875, | |
| "learning_rate": 7.653061224489796e-06, | |
| "loss": 1.9601, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6279949558638083, | |
| "grad_norm": 27.195640563964844, | |
| "learning_rate": 7.602040816326531e-06, | |
| "loss": 1.9561, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6305170239596469, | |
| "grad_norm": 27.854570388793945, | |
| "learning_rate": 7.551020408163265e-06, | |
| "loss": 1.8781, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6330390920554855, | |
| "grad_norm": 25.715761184692383, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.8542, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6355611601513241, | |
| "grad_norm": 22.562984466552734, | |
| "learning_rate": 7.448979591836736e-06, | |
| "loss": 1.7681, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6380832282471627, | |
| "grad_norm": 20.540348052978516, | |
| "learning_rate": 7.39795918367347e-06, | |
| "loss": 1.9617, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6406052963430012, | |
| "grad_norm": 24.610937118530273, | |
| "learning_rate": 7.346938775510205e-06, | |
| "loss": 1.9694, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6431273644388399, | |
| "grad_norm": 27.93538475036621, | |
| "learning_rate": 7.295918367346939e-06, | |
| "loss": 1.8858, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.6456494325346784, | |
| "grad_norm": 31.466445922851562, | |
| "learning_rate": 7.244897959183675e-06, | |
| "loss": 1.8252, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.648171500630517, | |
| "grad_norm": 26.276226043701172, | |
| "learning_rate": 7.193877551020409e-06, | |
| "loss": 1.8865, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.6506935687263556, | |
| "grad_norm": 22.52095603942871, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 1.7649, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6532156368221942, | |
| "grad_norm": 20.15144157409668, | |
| "learning_rate": 7.091836734693878e-06, | |
| "loss": 2.0158, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 26.405349731445312, | |
| "learning_rate": 7.0408163265306125e-06, | |
| "loss": 1.8932, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6582597730138714, | |
| "grad_norm": 32.94384765625, | |
| "learning_rate": 6.989795918367348e-06, | |
| "loss": 1.7795, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.6607818411097099, | |
| "grad_norm": 23.109092712402344, | |
| "learning_rate": 6.938775510204082e-06, | |
| "loss": 1.8383, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6633039092055486, | |
| "grad_norm": 21.75737190246582, | |
| "learning_rate": 6.887755102040817e-06, | |
| "loss": 1.8727, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.6658259773013872, | |
| "grad_norm": 22.96916389465332, | |
| "learning_rate": 6.836734693877551e-06, | |
| "loss": 1.8544, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6683480453972257, | |
| "grad_norm": 25.62445640563965, | |
| "learning_rate": 6.785714285714287e-06, | |
| "loss": 1.7503, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.6708701134930644, | |
| "grad_norm": 25.430530548095703, | |
| "learning_rate": 6.734693877551021e-06, | |
| "loss": 1.7938, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6733921815889029, | |
| "grad_norm": 26.462881088256836, | |
| "learning_rate": 6.683673469387756e-06, | |
| "loss": 1.8284, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.6759142496847415, | |
| "grad_norm": 31.45004653930664, | |
| "learning_rate": 6.63265306122449e-06, | |
| "loss": 2.0328, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6784363177805801, | |
| "grad_norm": 30.525737762451172, | |
| "learning_rate": 6.581632653061225e-06, | |
| "loss": 1.8192, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.6809583858764187, | |
| "grad_norm": 25.705707550048828, | |
| "learning_rate": 6.530612244897959e-06, | |
| "loss": 1.8533, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6834804539722572, | |
| "grad_norm": 39.90187454223633, | |
| "learning_rate": 6.4795918367346946e-06, | |
| "loss": 1.9483, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.6860025220680959, | |
| "grad_norm": 28.0180721282959, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 1.8132, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6885245901639344, | |
| "grad_norm": 34.821372985839844, | |
| "learning_rate": 6.3775510204081635e-06, | |
| "loss": 1.9599, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.691046658259773, | |
| "grad_norm": 24.018394470214844, | |
| "learning_rate": 6.326530612244899e-06, | |
| "loss": 1.9248, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6935687263556116, | |
| "grad_norm": 24.074344635009766, | |
| "learning_rate": 6.275510204081633e-06, | |
| "loss": 2.0148, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6960907944514502, | |
| "grad_norm": 31.1939754486084, | |
| "learning_rate": 6.224489795918368e-06, | |
| "loss": 1.8959, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6986128625472888, | |
| "grad_norm": 25.481502532958984, | |
| "learning_rate": 6.173469387755102e-06, | |
| "loss": 1.9832, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7011349306431274, | |
| "grad_norm": 29.6664981842041, | |
| "learning_rate": 6.122448979591837e-06, | |
| "loss": 1.9222, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7036569987389659, | |
| "grad_norm": 26.30698585510254, | |
| "learning_rate": 6.071428571428571e-06, | |
| "loss": 1.9897, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7061790668348046, | |
| "grad_norm": 31.827558517456055, | |
| "learning_rate": 6.020408163265307e-06, | |
| "loss": 1.8615, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7087011349306431, | |
| "grad_norm": 24.80223846435547, | |
| "learning_rate": 5.969387755102042e-06, | |
| "loss": 1.9579, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7112232030264817, | |
| "grad_norm": 36.134700775146484, | |
| "learning_rate": 5.918367346938776e-06, | |
| "loss": 1.723, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7137452711223203, | |
| "grad_norm": 30.388233184814453, | |
| "learning_rate": 5.867346938775511e-06, | |
| "loss": 1.9736, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7162673392181589, | |
| "grad_norm": 32.231563568115234, | |
| "learning_rate": 5.816326530612246e-06, | |
| "loss": 1.9228, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7187894073139974, | |
| "grad_norm": 38.05869674682617, | |
| "learning_rate": 5.7653061224489805e-06, | |
| "loss": 1.9159, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7213114754098361, | |
| "grad_norm": 27.256147384643555, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 1.8072, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7238335435056746, | |
| "grad_norm": 25.67181396484375, | |
| "learning_rate": 5.663265306122449e-06, | |
| "loss": 1.8633, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7263556116015133, | |
| "grad_norm": 31.8681697845459, | |
| "learning_rate": 5.6122448979591834e-06, | |
| "loss": 1.9822, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7288776796973518, | |
| "grad_norm": 32.85325241088867, | |
| "learning_rate": 5.561224489795919e-06, | |
| "loss": 1.8166, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7313997477931904, | |
| "grad_norm": 35.64312744140625, | |
| "learning_rate": 5.510204081632653e-06, | |
| "loss": 1.7166, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.733921815889029, | |
| "grad_norm": 24.276235580444336, | |
| "learning_rate": 5.459183673469388e-06, | |
| "loss": 1.7593, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7364438839848676, | |
| "grad_norm": 29.371950149536133, | |
| "learning_rate": 5.408163265306123e-06, | |
| "loss": 1.8124, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7389659520807061, | |
| "grad_norm": 23.76220703125, | |
| "learning_rate": 5.357142857142857e-06, | |
| "loss": 1.7775, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7414880201765448, | |
| "grad_norm": 37.103050231933594, | |
| "learning_rate": 5.306122448979593e-06, | |
| "loss": 1.9253, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.7440100882723834, | |
| "grad_norm": 20.0811767578125, | |
| "learning_rate": 5.255102040816327e-06, | |
| "loss": 1.8711, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.7465321563682219, | |
| "grad_norm": 35.33123016357422, | |
| "learning_rate": 5.204081632653062e-06, | |
| "loss": 1.8764, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7490542244640606, | |
| "grad_norm": 31.880672454833984, | |
| "learning_rate": 5.153061224489796e-06, | |
| "loss": 1.8929, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7515762925598991, | |
| "grad_norm": 21.682334899902344, | |
| "learning_rate": 5.1020408163265315e-06, | |
| "loss": 2.0377, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7540983606557377, | |
| "grad_norm": 34.68608474731445, | |
| "learning_rate": 5.0510204081632655e-06, | |
| "loss": 1.9341, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.7566204287515763, | |
| "grad_norm": 25.59632110595703, | |
| "learning_rate": 5e-06, | |
| "loss": 1.8264, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7566204287515763, | |
| "eval_loss": 1.8502724170684814, | |
| "eval_runtime": 6.6208, | |
| "eval_samples_per_second": 106.483, | |
| "eval_steps_per_second": 53.317, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7591424968474149, | |
| "grad_norm": 33.780616760253906, | |
| "learning_rate": 4.948979591836735e-06, | |
| "loss": 1.8315, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.7616645649432535, | |
| "grad_norm": 23.005069732666016, | |
| "learning_rate": 4.897959183673469e-06, | |
| "loss": 1.8434, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7641866330390921, | |
| "grad_norm": 27.338787078857422, | |
| "learning_rate": 4.846938775510204e-06, | |
| "loss": 1.9102, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.7667087011349306, | |
| "grad_norm": 26.87493133544922, | |
| "learning_rate": 4.795918367346939e-06, | |
| "loss": 1.8408, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 33.59117126464844, | |
| "learning_rate": 4.744897959183674e-06, | |
| "loss": 1.8633, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.7717528373266078, | |
| "grad_norm": 38.98092269897461, | |
| "learning_rate": 4.693877551020409e-06, | |
| "loss": 1.8187, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7742749054224464, | |
| "grad_norm": 28.7203369140625, | |
| "learning_rate": 4.642857142857144e-06, | |
| "loss": 1.8425, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.776796973518285, | |
| "grad_norm": 30.91414451599121, | |
| "learning_rate": 4.591836734693878e-06, | |
| "loss": 1.8526, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.7793190416141236, | |
| "grad_norm": 29.04154396057129, | |
| "learning_rate": 4.540816326530613e-06, | |
| "loss": 1.8913, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.7818411097099621, | |
| "grad_norm": 29.638099670410156, | |
| "learning_rate": 4.489795918367348e-06, | |
| "loss": 1.8736, | |
| "step": 310 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 397, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4524488042102784.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |