Instructions to use furproxy/9b-26 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use furproxy/9b-26 with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/workspace/models/Qwen3.5-9B") model = PeftModel.from_pretrained(base_model, "furproxy/9b-26") - Transformers
How to use furproxy/9b-26 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="furproxy/9b-26") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("furproxy/9b-26", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-26 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-26" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-26", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/furproxy/9b-26
- SGLang
How to use furproxy/9b-26 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-26" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-26", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-26" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-26", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use furproxy/9b-26 with Docker Model Runner:
docker model run hf.co/furproxy/9b-26
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1962, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0030581039755351682, | |
| "grad_norm": 0.6435028314590454, | |
| "learning_rate": 1.0101010101010103e-07, | |
| "loss": 1.8936554193496704, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0061162079510703364, | |
| "grad_norm": 0.5548882484436035, | |
| "learning_rate": 3.0303030303030305e-07, | |
| "loss": 1.8550586700439453, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.009174311926605505, | |
| "grad_norm": 0.27108362317085266, | |
| "learning_rate": 5.05050505050505e-07, | |
| "loss": 1.890197992324829, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.012232415902140673, | |
| "grad_norm": 0.24754057824611664, | |
| "learning_rate": 7.070707070707071e-07, | |
| "loss": 1.8445472717285156, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01529051987767584, | |
| "grad_norm": 0.39890649914741516, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 2.010572910308838, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01834862385321101, | |
| "grad_norm": 0.23249551653862, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 1.8801705837249756, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.021406727828746176, | |
| "grad_norm": 0.4299562871456146, | |
| "learning_rate": 1.3131313131313134e-06, | |
| "loss": 1.8805203437805176, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.024464831804281346, | |
| "grad_norm": 0.5231528282165527, | |
| "learning_rate": 1.5151515151515152e-06, | |
| "loss": 1.9465537071228027, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.027522935779816515, | |
| "grad_norm": 0.3482355773448944, | |
| "learning_rate": 1.7171717171717173e-06, | |
| "loss": 1.8298053741455078, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.03058103975535168, | |
| "grad_norm": 0.3003389239311218, | |
| "learning_rate": 1.9191919191919192e-06, | |
| "loss": 1.853845238685608, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03363914373088685, | |
| "grad_norm": 0.5087025165557861, | |
| "learning_rate": 2.1212121212121216e-06, | |
| "loss": 1.9923889636993408, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.03669724770642202, | |
| "grad_norm": 2.0046560764312744, | |
| "learning_rate": 2.3232323232323234e-06, | |
| "loss": 2.008021354675293, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.039755351681957186, | |
| "grad_norm": 0.2651369571685791, | |
| "learning_rate": 2.5252525252525258e-06, | |
| "loss": 1.7058303356170654, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04281345565749235, | |
| "grad_norm": 0.5547925233840942, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 1.8821287155151367, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.045871559633027525, | |
| "grad_norm": 0.5607280731201172, | |
| "learning_rate": 2.9292929292929295e-06, | |
| "loss": 2.1788079738616943, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04892966360856269, | |
| "grad_norm": 0.36416563391685486, | |
| "learning_rate": 3.131313131313132e-06, | |
| "loss": 1.8534326553344727, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.05198776758409786, | |
| "grad_norm": 0.4965146481990814, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.9557833671569824, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.05504587155963303, | |
| "grad_norm": 0.3163432776927948, | |
| "learning_rate": 3.5353535353535356e-06, | |
| "loss": 1.7984235286712646, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0581039755351682, | |
| "grad_norm": 0.3063645362854004, | |
| "learning_rate": 3.737373737373738e-06, | |
| "loss": 1.8264985084533691, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.06116207951070336, | |
| "grad_norm": 0.30639225244522095, | |
| "learning_rate": 3.93939393939394e-06, | |
| "loss": 1.8241571187973022, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06422018348623854, | |
| "grad_norm": 0.3971042335033417, | |
| "learning_rate": 4.141414141414142e-06, | |
| "loss": 1.874243974685669, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0672782874617737, | |
| "grad_norm": 0.6156560182571411, | |
| "learning_rate": 4.343434343434344e-06, | |
| "loss": 1.965466022491455, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.07033639143730887, | |
| "grad_norm": 0.5533192753791809, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 2.0693740844726562, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.07339449541284404, | |
| "grad_norm": 1.9126055240631104, | |
| "learning_rate": 4.747474747474748e-06, | |
| "loss": 2.060253143310547, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0764525993883792, | |
| "grad_norm": 0.3860923647880554, | |
| "learning_rate": 4.94949494949495e-06, | |
| "loss": 1.8577625751495361, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07951070336391437, | |
| "grad_norm": 0.4684409499168396, | |
| "learning_rate": 5.151515151515152e-06, | |
| "loss": 1.8510971069335938, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.08256880733944955, | |
| "grad_norm": 0.4307204484939575, | |
| "learning_rate": 5.353535353535354e-06, | |
| "loss": 1.9931628704071045, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0856269113149847, | |
| "grad_norm": 0.3140373229980469, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 1.925836443901062, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08868501529051988, | |
| "grad_norm": 0.36317509412765503, | |
| "learning_rate": 5.7575757575757586e-06, | |
| "loss": 1.9616905450820923, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.09174311926605505, | |
| "grad_norm": 0.21478985249996185, | |
| "learning_rate": 5.95959595959596e-06, | |
| "loss": 1.895378589630127, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09480122324159021, | |
| "grad_norm": 0.2936638593673706, | |
| "learning_rate": 6.1616161616161615e-06, | |
| "loss": 1.8279492855072021, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09785932721712538, | |
| "grad_norm": 0.3114721179008484, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 1.715104103088379, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.10091743119266056, | |
| "grad_norm": 0.32813334465026855, | |
| "learning_rate": 6.565656565656566e-06, | |
| "loss": 1.852712631225586, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.10397553516819572, | |
| "grad_norm": 0.37994885444641113, | |
| "learning_rate": 6.767676767676769e-06, | |
| "loss": 1.9753448963165283, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10703363914373089, | |
| "grad_norm": 0.5206537246704102, | |
| "learning_rate": 6.969696969696971e-06, | |
| "loss": 1.8388103246688843, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.11009174311926606, | |
| "grad_norm": 0.6430595517158508, | |
| "learning_rate": 7.171717171717172e-06, | |
| "loss": 2.0399489402770996, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.11314984709480122, | |
| "grad_norm": 0.5809399485588074, | |
| "learning_rate": 7.373737373737374e-06, | |
| "loss": 2.1389784812927246, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1162079510703364, | |
| "grad_norm": 1.2094364166259766, | |
| "learning_rate": 7.5757575757575764e-06, | |
| "loss": 1.9202568531036377, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11926605504587157, | |
| "grad_norm": 0.7485645413398743, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 2.2573585510253906, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.12232415902140673, | |
| "grad_norm": 0.47476136684417725, | |
| "learning_rate": 7.97979797979798e-06, | |
| "loss": 1.8947498798370361, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12538226299694188, | |
| "grad_norm": 0.24537041783332825, | |
| "learning_rate": 8.181818181818183e-06, | |
| "loss": 1.636450171470642, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.12844036697247707, | |
| "grad_norm": 0.4732670783996582, | |
| "learning_rate": 8.383838383838384e-06, | |
| "loss": 1.818341612815857, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.13149847094801223, | |
| "grad_norm": 0.37070026993751526, | |
| "learning_rate": 8.585858585858587e-06, | |
| "loss": 1.845613718032837, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1345565749235474, | |
| "grad_norm": 0.3881911635398865, | |
| "learning_rate": 8.787878787878788e-06, | |
| "loss": 1.7559518814086914, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.13761467889908258, | |
| "grad_norm": 0.45207998156547546, | |
| "learning_rate": 8.98989898989899e-06, | |
| "loss": 1.7992792129516602, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.14067278287461774, | |
| "grad_norm": 0.1907433420419693, | |
| "learning_rate": 9.191919191919193e-06, | |
| "loss": 1.8380980491638184, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.1437308868501529, | |
| "grad_norm": 0.2265041321516037, | |
| "learning_rate": 9.393939393939396e-06, | |
| "loss": 1.9353697299957275, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.14678899082568808, | |
| "grad_norm": 0.5571039319038391, | |
| "learning_rate": 9.595959595959597e-06, | |
| "loss": 1.861445665359497, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14984709480122324, | |
| "grad_norm": 0.318570613861084, | |
| "learning_rate": 9.797979797979798e-06, | |
| "loss": 1.7963485717773438, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1529051987767584, | |
| "grad_norm": 0.35685858130455017, | |
| "learning_rate": 1e-05, | |
| "loss": 1.955026626586914, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1559633027522936, | |
| "grad_norm": 0.7966809272766113, | |
| "learning_rate": 9.99997440729838e-06, | |
| "loss": 1.8856327533721924, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.15902140672782875, | |
| "grad_norm": 0.2650541663169861, | |
| "learning_rate": 9.999897629484621e-06, | |
| "loss": 1.814586877822876, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.1620795107033639, | |
| "grad_norm": 0.36088353395462036, | |
| "learning_rate": 9.999769667432037e-06, | |
| "loss": 1.8607715368270874, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.1651376146788991, | |
| "grad_norm": 0.6270299553871155, | |
| "learning_rate": 9.999590522596136e-06, | |
| "loss": 1.9078267812728882, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.16819571865443425, | |
| "grad_norm": 0.27504709362983704, | |
| "learning_rate": 9.999360197014607e-06, | |
| "loss": 1.9029535055160522, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1712538226299694, | |
| "grad_norm": 0.5007109642028809, | |
| "learning_rate": 9.999078693307296e-06, | |
| "loss": 1.7704020738601685, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1743119266055046, | |
| "grad_norm": 0.5426493883132935, | |
| "learning_rate": 9.99874601467618e-06, | |
| "loss": 1.8907287120819092, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.17737003058103976, | |
| "grad_norm": 0.26077231764793396, | |
| "learning_rate": 9.998362164905318e-06, | |
| "loss": 1.760542869567871, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.18042813455657492, | |
| "grad_norm": 0.37686067819595337, | |
| "learning_rate": 9.997927148360824e-06, | |
| "loss": 1.995668649673462, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1834862385321101, | |
| "grad_norm": 0.4259154498577118, | |
| "learning_rate": 9.99744096999081e-06, | |
| "loss": 1.8606561422348022, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.18654434250764526, | |
| "grad_norm": 0.3365345299243927, | |
| "learning_rate": 9.996903635325326e-06, | |
| "loss": 1.909229040145874, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.18960244648318042, | |
| "grad_norm": 0.25919589400291443, | |
| "learning_rate": 9.996315150476308e-06, | |
| "loss": 1.9200305938720703, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.1926605504587156, | |
| "grad_norm": 0.2932458221912384, | |
| "learning_rate": 9.995675522137492e-06, | |
| "loss": 1.8696832656860352, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.19571865443425077, | |
| "grad_norm": 0.38474535942077637, | |
| "learning_rate": 9.994984757584353e-06, | |
| "loss": 1.828667402267456, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.19877675840978593, | |
| "grad_norm": 0.3214952349662781, | |
| "learning_rate": 9.994242864674021e-06, | |
| "loss": 1.8718284368515015, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2018348623853211, | |
| "grad_norm": 0.33034268021583557, | |
| "learning_rate": 9.993449851845176e-06, | |
| "loss": 1.8226697444915771, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.20489296636085627, | |
| "grad_norm": 0.8973183631896973, | |
| "learning_rate": 9.992605728117972e-06, | |
| "loss": 1.9453703165054321, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.20795107033639143, | |
| "grad_norm": 0.6750196218490601, | |
| "learning_rate": 9.991710503093923e-06, | |
| "loss": 1.820605993270874, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.21100917431192662, | |
| "grad_norm": 0.2680327594280243, | |
| "learning_rate": 9.990764186955797e-06, | |
| "loss": 1.711888074874878, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.21406727828746178, | |
| "grad_norm": 0.3089163899421692, | |
| "learning_rate": 9.989766790467498e-06, | |
| "loss": 1.668878197669983, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.21712538226299694, | |
| "grad_norm": 0.5638787746429443, | |
| "learning_rate": 9.988718324973947e-06, | |
| "loss": 1.7612136602401733, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.22018348623853212, | |
| "grad_norm": 0.24349473416805267, | |
| "learning_rate": 9.98761880240095e-06, | |
| "loss": 1.6873559951782227, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.22324159021406728, | |
| "grad_norm": 0.3549518585205078, | |
| "learning_rate": 9.986468235255065e-06, | |
| "loss": 1.743373990058899, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.22629969418960244, | |
| "grad_norm": 0.44438421726226807, | |
| "learning_rate": 9.985266636623457e-06, | |
| "loss": 1.6509066820144653, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.22935779816513763, | |
| "grad_norm": 0.46152663230895996, | |
| "learning_rate": 9.984014020173748e-06, | |
| "loss": 1.8014967441558838, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2324159021406728, | |
| "grad_norm": 0.278169184923172, | |
| "learning_rate": 9.98271040015387e-06, | |
| "loss": 1.8622685670852661, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.23547400611620795, | |
| "grad_norm": 0.3168479800224304, | |
| "learning_rate": 9.981355791391891e-06, | |
| "loss": 1.8940097093582153, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.23853211009174313, | |
| "grad_norm": 0.3639688491821289, | |
| "learning_rate": 9.979950209295855e-06, | |
| "loss": 1.7917258739471436, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2415902140672783, | |
| "grad_norm": 0.40860888361930847, | |
| "learning_rate": 9.978493669853606e-06, | |
| "loss": 1.8766049146652222, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.24464831804281345, | |
| "grad_norm": 0.315494179725647, | |
| "learning_rate": 9.976986189632597e-06, | |
| "loss": 1.7932193279266357, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.24770642201834864, | |
| "grad_norm": 0.3525390923023224, | |
| "learning_rate": 9.975427785779717e-06, | |
| "loss": 1.9470767974853516, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.25076452599388377, | |
| "grad_norm": 0.33575552701950073, | |
| "learning_rate": 9.97381847602108e-06, | |
| "loss": 1.7163609266281128, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.25382262996941896, | |
| "grad_norm": 1.193529725074768, | |
| "learning_rate": 9.972158278661838e-06, | |
| "loss": 1.877960205078125, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.25688073394495414, | |
| "grad_norm": 0.348765105009079, | |
| "learning_rate": 9.970447212585961e-06, | |
| "loss": 1.6149842739105225, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.2599388379204893, | |
| "grad_norm": 0.5527969598770142, | |
| "learning_rate": 9.968685297256027e-06, | |
| "loss": 1.8597733974456787, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.26299694189602446, | |
| "grad_norm": 0.656193196773529, | |
| "learning_rate": 9.966872552713006e-06, | |
| "loss": 1.5253994464874268, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.26605504587155965, | |
| "grad_norm": 0.7701634764671326, | |
| "learning_rate": 9.965008999576018e-06, | |
| "loss": 1.5178442001342773, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2691131498470948, | |
| "grad_norm": 0.3889455795288086, | |
| "learning_rate": 9.963094659042113e-06, | |
| "loss": 1.7432003021240234, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.27217125382262997, | |
| "grad_norm": 0.7660208344459534, | |
| "learning_rate": 9.961129552886024e-06, | |
| "loss": 1.655880331993103, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.27522935779816515, | |
| "grad_norm": 0.7760636210441589, | |
| "learning_rate": 9.959113703459917e-06, | |
| "loss": 1.9860963821411133, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2782874617737003, | |
| "grad_norm": 1.5110101699829102, | |
| "learning_rate": 9.957047133693141e-06, | |
| "loss": 1.9139325618743896, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.28134556574923547, | |
| "grad_norm": 1.1153804063796997, | |
| "learning_rate": 9.954929867091961e-06, | |
| "loss": 1.7500460147857666, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.28440366972477066, | |
| "grad_norm": 0.3268054723739624, | |
| "learning_rate": 9.952761927739303e-06, | |
| "loss": 1.5284479856491089, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2874617737003058, | |
| "grad_norm": 0.2701658308506012, | |
| "learning_rate": 9.95054334029446e-06, | |
| "loss": 1.5575287342071533, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.290519877675841, | |
| "grad_norm": 0.5897979140281677, | |
| "learning_rate": 9.948274129992838e-06, | |
| "loss": 1.5360642671585083, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.29357798165137616, | |
| "grad_norm": 3.0125443935394287, | |
| "learning_rate": 9.945954322645643e-06, | |
| "loss": 1.7250124216079712, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2966360856269113, | |
| "grad_norm": 0.22849687933921814, | |
| "learning_rate": 9.9435839446396e-06, | |
| "loss": 1.7317864894866943, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2996941896024465, | |
| "grad_norm": 0.41497474908828735, | |
| "learning_rate": 9.941163022936659e-06, | |
| "loss": 1.7118513584136963, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.30275229357798167, | |
| "grad_norm": 0.43153518438339233, | |
| "learning_rate": 9.938691585073677e-06, | |
| "loss": 1.4813673496246338, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.3058103975535168, | |
| "grad_norm": 0.2877158522605896, | |
| "learning_rate": 9.936169659162105e-06, | |
| "loss": 1.5152385234832764, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.308868501529052, | |
| "grad_norm": 0.319741427898407, | |
| "learning_rate": 9.933597273887676e-06, | |
| "loss": 1.657623291015625, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.3119266055045872, | |
| "grad_norm": 0.4885481894016266, | |
| "learning_rate": 9.930974458510074e-06, | |
| "loss": 1.8340609073638916, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.3149847094801223, | |
| "grad_norm": 0.3470771312713623, | |
| "learning_rate": 9.9283012428626e-06, | |
| "loss": 1.8779006004333496, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.3180428134556575, | |
| "grad_norm": 0.21095849573612213, | |
| "learning_rate": 9.92557765735184e-06, | |
| "loss": 1.946405053138733, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.3211009174311927, | |
| "grad_norm": 0.4015672504901886, | |
| "learning_rate": 9.922803732957309e-06, | |
| "loss": 1.5457347631454468, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3241590214067278, | |
| "grad_norm": 0.2712498903274536, | |
| "learning_rate": 9.919979501231102e-06, | |
| "loss": 1.6519064903259277, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.327217125382263, | |
| "grad_norm": 0.24934278428554535, | |
| "learning_rate": 9.917104994297543e-06, | |
| "loss": 1.4617292881011963, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.3302752293577982, | |
| "grad_norm": 0.22483140230178833, | |
| "learning_rate": 9.914180244852804e-06, | |
| "loss": 1.3875129222869873, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.6217460632324219, | |
| "learning_rate": 9.911205286164553e-06, | |
| "loss": 1.8669204711914062, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.3363914373088685, | |
| "grad_norm": 0.4357741177082062, | |
| "learning_rate": 9.908180152071553e-06, | |
| "loss": 1.666574239730835, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3394495412844037, | |
| "grad_norm": 0.29025763273239136, | |
| "learning_rate": 9.9051048769833e-06, | |
| "loss": 1.810868263244629, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.3425076452599388, | |
| "grad_norm": 0.7838276624679565, | |
| "learning_rate": 9.901979495879612e-06, | |
| "loss": 1.3125014305114746, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.345565749235474, | |
| "grad_norm": 0.2543538212776184, | |
| "learning_rate": 9.898804044310245e-06, | |
| "loss": 1.6106175184249878, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.3486238532110092, | |
| "grad_norm": 0.4557286500930786, | |
| "learning_rate": 9.89557855839448e-06, | |
| "loss": 1.886078953742981, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.3516819571865443, | |
| "grad_norm": 0.2689090073108673, | |
| "learning_rate": 9.892303074820712e-06, | |
| "loss": 1.631593108177185, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3547400611620795, | |
| "grad_norm": 0.25291207432746887, | |
| "learning_rate": 9.888977630846048e-06, | |
| "loss": 1.7156798839569092, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3577981651376147, | |
| "grad_norm": 0.3357708752155304, | |
| "learning_rate": 9.88560226429586e-06, | |
| "loss": 1.6416988372802734, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.36085626911314983, | |
| "grad_norm": 0.3246925473213196, | |
| "learning_rate": 9.88217701356337e-06, | |
| "loss": 1.5658977031707764, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.363914373088685, | |
| "grad_norm": 0.2840614318847656, | |
| "learning_rate": 9.878701917609208e-06, | |
| "loss": 1.6534138917922974, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.3669724770642202, | |
| "grad_norm": 0.5397573709487915, | |
| "learning_rate": 9.875177015960973e-06, | |
| "loss": 1.7614964246749878, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.37003058103975534, | |
| "grad_norm": 0.28763291239738464, | |
| "learning_rate": 9.871602348712777e-06, | |
| "loss": 1.5937902927398682, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3730886850152905, | |
| "grad_norm": 0.21111302077770233, | |
| "learning_rate": 9.867977956524798e-06, | |
| "loss": 1.6914631128311157, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.3761467889908257, | |
| "grad_norm": 0.5114771723747253, | |
| "learning_rate": 9.864303880622806e-06, | |
| "loss": 1.8919175863265991, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.37920489296636084, | |
| "grad_norm": 0.4698966145515442, | |
| "learning_rate": 9.8605801627977e-06, | |
| "loss": 2.395404815673828, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.382262996941896, | |
| "grad_norm": 0.604468047618866, | |
| "learning_rate": 9.85680684540504e-06, | |
| "loss": 1.523594617843628, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3853211009174312, | |
| "grad_norm": 0.295039564371109, | |
| "learning_rate": 9.852983971364549e-06, | |
| "loss": 1.520268440246582, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.38837920489296635, | |
| "grad_norm": 0.2590586245059967, | |
| "learning_rate": 9.84911158415964e-06, | |
| "loss": 1.5712318420410156, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.39143730886850153, | |
| "grad_norm": 0.9178432822227478, | |
| "learning_rate": 9.845189727836914e-06, | |
| "loss": 1.7512378692626953, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3944954128440367, | |
| "grad_norm": 0.512359619140625, | |
| "learning_rate": 9.841218447005657e-06, | |
| "loss": 1.677209496498108, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.39755351681957185, | |
| "grad_norm": 0.8242136240005493, | |
| "learning_rate": 9.837197786837341e-06, | |
| "loss": 1.52079439163208, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.40061162079510704, | |
| "grad_norm": 0.5057528614997864, | |
| "learning_rate": 9.833127793065098e-06, | |
| "loss": 1.3776154518127441, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.4036697247706422, | |
| "grad_norm": 0.287590891122818, | |
| "learning_rate": 9.829008511983214e-06, | |
| "loss": 1.313464879989624, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.40672782874617736, | |
| "grad_norm": 0.22291725873947144, | |
| "learning_rate": 9.82483999044659e-06, | |
| "loss": 1.4770923852920532, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.40978593272171254, | |
| "grad_norm": 0.4278978109359741, | |
| "learning_rate": 9.820622275870219e-06, | |
| "loss": 1.713256597518921, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.41284403669724773, | |
| "grad_norm": 0.7735996246337891, | |
| "learning_rate": 9.816355416228636e-06, | |
| "loss": 1.7301435470581055, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.41590214067278286, | |
| "grad_norm": 0.36943763494491577, | |
| "learning_rate": 9.812039460055383e-06, | |
| "loss": 1.746875286102295, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.41896024464831805, | |
| "grad_norm": 0.30427658557891846, | |
| "learning_rate": 9.807674456442448e-06, | |
| "loss": 1.7644126415252686, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.42201834862385323, | |
| "grad_norm": 0.2680354416370392, | |
| "learning_rate": 9.80326045503972e-06, | |
| "loss": 1.6075056791305542, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.42507645259938837, | |
| "grad_norm": 0.5165081024169922, | |
| "learning_rate": 9.798797506054398e-06, | |
| "loss": 1.7466685771942139, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.42813455657492355, | |
| "grad_norm": 0.46960580348968506, | |
| "learning_rate": 9.794285660250457e-06, | |
| "loss": 1.6852364540100098, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.43119266055045874, | |
| "grad_norm": 0.3378291130065918, | |
| "learning_rate": 9.789724968948034e-06, | |
| "loss": 1.5493333339691162, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.43425076452599387, | |
| "grad_norm": 0.2972247004508972, | |
| "learning_rate": 9.78511548402287e-06, | |
| "loss": 1.5161151885986328, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.43730886850152906, | |
| "grad_norm": 0.3610173165798187, | |
| "learning_rate": 9.780457257905708e-06, | |
| "loss": 1.698796272277832, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.44036697247706424, | |
| "grad_norm": 0.4165475070476532, | |
| "learning_rate": 9.775750343581702e-06, | |
| "loss": 1.4344041347503662, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4434250764525994, | |
| "grad_norm": 0.565291702747345, | |
| "learning_rate": 9.770994794589804e-06, | |
| "loss": 1.6736053228378296, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.44648318042813456, | |
| "grad_norm": 0.22272102534770966, | |
| "learning_rate": 9.766190665022173e-06, | |
| "loss": 1.515446424484253, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.44954128440366975, | |
| "grad_norm": 0.292961061000824, | |
| "learning_rate": 9.761338009523542e-06, | |
| "loss": 1.5677558183670044, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4525993883792049, | |
| "grad_norm": 0.22576913237571716, | |
| "learning_rate": 9.756436883290608e-06, | |
| "loss": 1.6895636320114136, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.45565749235474007, | |
| "grad_norm": 0.514447808265686, | |
| "learning_rate": 9.751487342071394e-06, | |
| "loss": 1.6961359977722168, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.45871559633027525, | |
| "grad_norm": 0.4707038402557373, | |
| "learning_rate": 9.74648944216463e-06, | |
| "loss": 1.5364969968795776, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4617737003058104, | |
| "grad_norm": 0.3324492871761322, | |
| "learning_rate": 9.741443240419096e-06, | |
| "loss": 1.4445494413375854, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.4648318042813456, | |
| "grad_norm": 0.40139055252075195, | |
| "learning_rate": 9.736348794232986e-06, | |
| "loss": 1.631695032119751, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.46788990825688076, | |
| "grad_norm": 0.32826143503189087, | |
| "learning_rate": 9.731206161553253e-06, | |
| "loss": 1.5630545616149902, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.4709480122324159, | |
| "grad_norm": 0.7137564420700073, | |
| "learning_rate": 9.726015400874945e-06, | |
| "loss": 1.7077264785766602, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4740061162079511, | |
| "grad_norm": 0.5834897756576538, | |
| "learning_rate": 9.72077657124055e-06, | |
| "loss": 1.541429877281189, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.47706422018348627, | |
| "grad_norm": 0.30517715215682983, | |
| "learning_rate": 9.715489732239309e-06, | |
| "loss": 1.486952781677246, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4801223241590214, | |
| "grad_norm": 0.39915895462036133, | |
| "learning_rate": 9.710154944006558e-06, | |
| "loss": 1.4761033058166504, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4831804281345566, | |
| "grad_norm": 0.24902665615081787, | |
| "learning_rate": 9.70477226722302e-06, | |
| "loss": 1.555905818939209, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.48623853211009177, | |
| "grad_norm": 0.27528202533721924, | |
| "learning_rate": 9.699341763114142e-06, | |
| "loss": 1.5418330430984497, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.4892966360856269, | |
| "grad_norm": 0.37373027205467224, | |
| "learning_rate": 9.693863493449376e-06, | |
| "loss": 1.5460388660430908, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4923547400611621, | |
| "grad_norm": 0.3926723301410675, | |
| "learning_rate": 9.688337520541487e-06, | |
| "loss": 1.7003178596496582, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.4954128440366973, | |
| "grad_norm": 0.2708083987236023, | |
| "learning_rate": 9.68276390724584e-06, | |
| "loss": 1.8639323711395264, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.4984709480122324, | |
| "grad_norm": 0.3522673547267914, | |
| "learning_rate": 9.67714271695969e-06, | |
| "loss": 1.7603111267089844, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.5015290519877675, | |
| "grad_norm": 0.2736775279045105, | |
| "learning_rate": 9.671474013621461e-06, | |
| "loss": 1.7426960468292236, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.5045871559633027, | |
| "grad_norm": 0.34006989002227783, | |
| "learning_rate": 9.665757861710008e-06, | |
| "loss": 1.6802008152008057, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5076452599388379, | |
| "grad_norm": 0.7181631922721863, | |
| "learning_rate": 9.659994326243897e-06, | |
| "loss": 1.3610038757324219, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.5107033639143731, | |
| "grad_norm": 0.3209435045719147, | |
| "learning_rate": 9.654183472780655e-06, | |
| "loss": 1.3310749530792236, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.5137614678899083, | |
| "grad_norm": 0.3394523561000824, | |
| "learning_rate": 9.64832536741604e-06, | |
| "loss": 1.7552449703216553, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.5168195718654435, | |
| "grad_norm": 0.26636433601379395, | |
| "learning_rate": 9.642420076783266e-06, | |
| "loss": 1.7648036479949951, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.5198776758409785, | |
| "grad_norm": 0.4860476553440094, | |
| "learning_rate": 9.636467668052263e-06, | |
| "loss": 1.8371148109436035, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5229357798165137, | |
| "grad_norm": 0.3957999050617218, | |
| "learning_rate": 9.630468208928906e-06, | |
| "loss": 1.7691468000411987, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.5259938837920489, | |
| "grad_norm": 0.29553869366645813, | |
| "learning_rate": 9.624421767654247e-06, | |
| "loss": 1.8050150871276855, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5290519877675841, | |
| "grad_norm": 0.8523488640785217, | |
| "learning_rate": 9.618328413003742e-06, | |
| "loss": 1.7548258304595947, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5321100917431193, | |
| "grad_norm": 0.30288758873939514, | |
| "learning_rate": 9.612188214286457e-06, | |
| "loss": 1.652245044708252, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5351681957186545, | |
| "grad_norm": 0.44331154227256775, | |
| "learning_rate": 9.606001241344293e-06, | |
| "loss": 1.5749201774597168, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5382262996941896, | |
| "grad_norm": 0.3775594234466553, | |
| "learning_rate": 9.599767564551185e-06, | |
| "loss": 1.8136138916015625, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5412844036697247, | |
| "grad_norm": 0.6260164976119995, | |
| "learning_rate": 9.593487254812298e-06, | |
| "loss": 1.753260850906372, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5443425076452599, | |
| "grad_norm": 0.21940867602825165, | |
| "learning_rate": 9.587160383563235e-06, | |
| "loss": 1.2595834732055664, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5474006116207951, | |
| "grad_norm": 0.45921286940574646, | |
| "learning_rate": 9.580787022769205e-06, | |
| "loss": 1.8687834739685059, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5504587155963303, | |
| "grad_norm": 0.25323811173439026, | |
| "learning_rate": 9.574367244924216e-06, | |
| "loss": 1.87260901927948, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5535168195718655, | |
| "grad_norm": 0.3825606405735016, | |
| "learning_rate": 9.567901123050255e-06, | |
| "loss": 1.9380344152450562, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5565749235474006, | |
| "grad_norm": 0.8433843851089478, | |
| "learning_rate": 9.56138873069644e-06, | |
| "loss": 1.854411005973816, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5596330275229358, | |
| "grad_norm": 0.5623306035995483, | |
| "learning_rate": 9.554830141938201e-06, | |
| "loss": 1.8307363986968994, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5626911314984709, | |
| "grad_norm": 0.5833460688591003, | |
| "learning_rate": 9.54822543137643e-06, | |
| "loss": 1.691839575767517, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5657492354740061, | |
| "grad_norm": 0.7582941651344299, | |
| "learning_rate": 9.541574674136634e-06, | |
| "loss": 1.5816738605499268, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5688073394495413, | |
| "grad_norm": 0.5991274118423462, | |
| "learning_rate": 9.534877945868075e-06, | |
| "loss": 1.141850471496582, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5718654434250765, | |
| "grad_norm": 0.27493157982826233, | |
| "learning_rate": 9.528135322742916e-06, | |
| "loss": 1.1190171241760254, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5749235474006116, | |
| "grad_norm": 0.20014670491218567, | |
| "learning_rate": 9.521346881455356e-06, | |
| "loss": 1.4172542095184326, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5779816513761468, | |
| "grad_norm": 0.45737189054489136, | |
| "learning_rate": 9.514512699220751e-06, | |
| "loss": 1.3267741203308105, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.581039755351682, | |
| "grad_norm": 0.342574805021286, | |
| "learning_rate": 9.507632853774738e-06, | |
| "loss": 1.2848198413848877, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5840978593272171, | |
| "grad_norm": 0.2764483690261841, | |
| "learning_rate": 9.500707423372354e-06, | |
| "loss": 1.2696105241775513, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5871559633027523, | |
| "grad_norm": 0.5538342595100403, | |
| "learning_rate": 9.493736486787145e-06, | |
| "loss": 1.5733320713043213, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5902140672782875, | |
| "grad_norm": 0.5002435445785522, | |
| "learning_rate": 9.486720123310264e-06, | |
| "loss": 1.4811735153198242, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5932721712538226, | |
| "grad_norm": 0.2729179561138153, | |
| "learning_rate": 9.479658412749575e-06, | |
| "loss": 1.2759473323822021, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5963302752293578, | |
| "grad_norm": 0.422869473695755, | |
| "learning_rate": 9.472551435428751e-06, | |
| "loss": 1.6186537742614746, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.599388379204893, | |
| "grad_norm": 0.18889868259429932, | |
| "learning_rate": 9.465399272186341e-06, | |
| "loss": 1.5904256105422974, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.6024464831804281, | |
| "grad_norm": 0.4715130925178528, | |
| "learning_rate": 9.458202004374875e-06, | |
| "loss": 1.3664047718048096, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.6055045871559633, | |
| "grad_norm": 0.3192538321018219, | |
| "learning_rate": 9.450959713859918e-06, | |
| "loss": 1.5540097951889038, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.6085626911314985, | |
| "grad_norm": 0.48479557037353516, | |
| "learning_rate": 9.443672483019146e-06, | |
| "loss": 1.7298085689544678, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.6116207951070336, | |
| "grad_norm": 0.40212106704711914, | |
| "learning_rate": 9.436340394741424e-06, | |
| "loss": 1.2515219449996948, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6146788990825688, | |
| "grad_norm": 0.31416311860084534, | |
| "learning_rate": 9.428963532425832e-06, | |
| "loss": 1.5272061824798584, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.617737003058104, | |
| "grad_norm": 0.39595550298690796, | |
| "learning_rate": 9.421541979980743e-06, | |
| "loss": 1.584099531173706, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.6207951070336392, | |
| "grad_norm": 0.3684428632259369, | |
| "learning_rate": 9.414075821822862e-06, | |
| "loss": 1.5516374111175537, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.6238532110091743, | |
| "grad_norm": 0.2936325669288635, | |
| "learning_rate": 9.406565142876252e-06, | |
| "loss": 1.3937046527862549, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.6269113149847095, | |
| "grad_norm": 0.8210769295692444, | |
| "learning_rate": 9.399010028571394e-06, | |
| "loss": 1.0384480953216553, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6299694189602446, | |
| "grad_norm": 0.31836938858032227, | |
| "learning_rate": 9.391410564844189e-06, | |
| "loss": 1.6605589389801025, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.6330275229357798, | |
| "grad_norm": 0.4151877164840698, | |
| "learning_rate": 9.383766838134997e-06, | |
| "loss": 1.5902981758117676, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.636085626911315, | |
| "grad_norm": 0.29467517137527466, | |
| "learning_rate": 9.376078935387647e-06, | |
| "loss": 1.511544942855835, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.6391437308868502, | |
| "grad_norm": 0.4552344083786011, | |
| "learning_rate": 9.36834694404845e-06, | |
| "loss": 1.6092697381973267, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6422018348623854, | |
| "grad_norm": 0.3086092174053192, | |
| "learning_rate": 9.360570952065205e-06, | |
| "loss": 1.5458872318267822, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6452599388379205, | |
| "grad_norm": 0.29464077949523926, | |
| "learning_rate": 9.3527510478862e-06, | |
| "loss": 1.5201151371002197, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6483180428134556, | |
| "grad_norm": 0.35874319076538086, | |
| "learning_rate": 9.3448873204592e-06, | |
| "loss": 1.7184113264083862, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6513761467889908, | |
| "grad_norm": 0.6177545189857483, | |
| "learning_rate": 9.336979859230438e-06, | |
| "loss": 1.425230860710144, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.654434250764526, | |
| "grad_norm": 0.4207315742969513, | |
| "learning_rate": 9.329028754143606e-06, | |
| "loss": 1.1580491065979004, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6574923547400612, | |
| "grad_norm": 0.40215086936950684, | |
| "learning_rate": 9.321034095638816e-06, | |
| "loss": 1.776092767715454, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6605504587155964, | |
| "grad_norm": 0.48207205533981323, | |
| "learning_rate": 9.312995974651581e-06, | |
| "loss": 1.5432982444763184, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6636085626911316, | |
| "grad_norm": 0.9188543558120728, | |
| "learning_rate": 9.304914482611788e-06, | |
| "loss": 1.6913204193115234, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.0712273120880127, | |
| "learning_rate": 9.296789711442641e-06, | |
| "loss": 1.5286757946014404, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6697247706422018, | |
| "grad_norm": 0.4487042725086212, | |
| "learning_rate": 9.288621753559624e-06, | |
| "loss": 1.7271997928619385, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.672782874617737, | |
| "grad_norm": 0.4550405442714691, | |
| "learning_rate": 9.280410701869456e-06, | |
| "loss": 1.5852614641189575, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6758409785932722, | |
| "grad_norm": 0.8099808692932129, | |
| "learning_rate": 9.27215664976902e-06, | |
| "loss": 1.6332128047943115, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6788990825688074, | |
| "grad_norm": 0.5566719174385071, | |
| "learning_rate": 9.263859691144315e-06, | |
| "loss": 1.5285072326660156, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6819571865443425, | |
| "grad_norm": 0.3996361196041107, | |
| "learning_rate": 9.25551992036938e-06, | |
| "loss": 1.181262731552124, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.6850152905198776, | |
| "grad_norm": 0.7320879697799683, | |
| "learning_rate": 9.247137432305221e-06, | |
| "loss": 1.6381134986877441, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.6880733944954128, | |
| "grad_norm": 0.5473281741142273, | |
| "learning_rate": 9.238712322298733e-06, | |
| "loss": 1.623387098312378, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.691131498470948, | |
| "grad_norm": 0.2673215866088867, | |
| "learning_rate": 9.230244686181616e-06, | |
| "loss": 1.6147091388702393, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.6941896024464832, | |
| "grad_norm": 0.41044941544532776, | |
| "learning_rate": 9.22173462026929e-06, | |
| "loss": 1.6174466609954834, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.6972477064220184, | |
| "grad_norm": 0.3210803270339966, | |
| "learning_rate": 9.213182221359785e-06, | |
| "loss": 1.4634352922439575, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.7003058103975535, | |
| "grad_norm": 0.4366549551486969, | |
| "learning_rate": 9.204587586732653e-06, | |
| "loss": 1.6598728895187378, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.7033639143730887, | |
| "grad_norm": 0.6817240118980408, | |
| "learning_rate": 9.195950814147862e-06, | |
| "loss": 1.7457971572875977, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7064220183486238, | |
| "grad_norm": 1.429196834564209, | |
| "learning_rate": 9.187272001844673e-06, | |
| "loss": 1.4895765781402588, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.709480122324159, | |
| "grad_norm": 0.33415424823760986, | |
| "learning_rate": 9.178551248540534e-06, | |
| "loss": 1.7249622344970703, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.7125382262996942, | |
| "grad_norm": 0.5185303092002869, | |
| "learning_rate": 9.169788653429949e-06, | |
| "loss": 1.5071038007736206, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.7155963302752294, | |
| "grad_norm": 0.703040599822998, | |
| "learning_rate": 9.160984316183354e-06, | |
| "loss": 1.6332056522369385, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.7186544342507645, | |
| "grad_norm": 0.2760729491710663, | |
| "learning_rate": 9.152138336945985e-06, | |
| "loss": 1.5567004680633545, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7217125382262997, | |
| "grad_norm": 0.26987555623054504, | |
| "learning_rate": 9.143250816336733e-06, | |
| "loss": 1.6896016597747803, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.7247706422018348, | |
| "grad_norm": 0.4577353894710541, | |
| "learning_rate": 9.134321855447004e-06, | |
| "loss": 1.780794620513916, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.72782874617737, | |
| "grad_norm": 0.3506152629852295, | |
| "learning_rate": 9.125351555839568e-06, | |
| "loss": 1.676330327987671, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.7308868501529052, | |
| "grad_norm": 0.3420753479003906, | |
| "learning_rate": 9.116340019547403e-06, | |
| "loss": 1.53602933883667, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.7339449541284404, | |
| "grad_norm": 0.615734875202179, | |
| "learning_rate": 9.107287349072535e-06, | |
| "loss": 1.6315178871154785, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7370030581039755, | |
| "grad_norm": 0.3383826017379761, | |
| "learning_rate": 9.098193647384872e-06, | |
| "loss": 1.646344542503357, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7400611620795107, | |
| "grad_norm": 0.40700384974479675, | |
| "learning_rate": 9.089059017921034e-06, | |
| "loss": 1.6499868631362915, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.7431192660550459, | |
| "grad_norm": 0.4302765727043152, | |
| "learning_rate": 9.079883564583176e-06, | |
| "loss": 1.6223028898239136, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.746177370030581, | |
| "grad_norm": 0.2995837330818176, | |
| "learning_rate": 9.070667391737804e-06, | |
| "loss": 1.639768123626709, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7492354740061162, | |
| "grad_norm": 0.3183751702308655, | |
| "learning_rate": 9.061410604214588e-06, | |
| "loss": 1.4172444343566895, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7522935779816514, | |
| "grad_norm": 0.41883519291877747, | |
| "learning_rate": 9.052113307305178e-06, | |
| "loss": 1.5172092914581299, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7553516819571865, | |
| "grad_norm": 0.4170067310333252, | |
| "learning_rate": 9.04277560676199e-06, | |
| "loss": 1.4581788778305054, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7584097859327217, | |
| "grad_norm": 0.4589844346046448, | |
| "learning_rate": 9.033397608797015e-06, | |
| "loss": 1.5675625801086426, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.7614678899082569, | |
| "grad_norm": 0.4775915741920471, | |
| "learning_rate": 9.023979420080614e-06, | |
| "loss": 1.5760972499847412, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.764525993883792, | |
| "grad_norm": 0.4255703389644623, | |
| "learning_rate": 9.014521147740295e-06, | |
| "loss": 1.4211878776550293, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7675840978593272, | |
| "grad_norm": 0.2350740283727646, | |
| "learning_rate": 9.005022899359498e-06, | |
| "loss": 1.0600173473358154, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7706422018348624, | |
| "grad_norm": 0.25523892045021057, | |
| "learning_rate": 8.995484782976372e-06, | |
| "loss": 1.3498680591583252, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7737003058103975, | |
| "grad_norm": 0.25793585181236267, | |
| "learning_rate": 8.985906907082548e-06, | |
| "loss": 1.4128957986831665, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7767584097859327, | |
| "grad_norm": 0.2672351002693176, | |
| "learning_rate": 8.9762893806219e-06, | |
| "loss": 1.4579813480377197, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7798165137614679, | |
| "grad_norm": 0.3467871844768524, | |
| "learning_rate": 8.96663231298931e-06, | |
| "loss": 1.469613790512085, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7828746177370031, | |
| "grad_norm": 0.2631012797355652, | |
| "learning_rate": 8.956935814029426e-06, | |
| "loss": 1.5352952480316162, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.7859327217125383, | |
| "grad_norm": 0.42967817187309265, | |
| "learning_rate": 8.947199994035402e-06, | |
| "loss": 1.448859691619873, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7889908256880734, | |
| "grad_norm": 0.18720397353172302, | |
| "learning_rate": 8.937424963747656e-06, | |
| "loss": 1.4682276248931885, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.7920489296636085, | |
| "grad_norm": 0.2571136951446533, | |
| "learning_rate": 8.9276108343526e-06, | |
| "loss": 1.430220365524292, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7951070336391437, | |
| "grad_norm": 0.49666231870651245, | |
| "learning_rate": 8.917757717481388e-06, | |
| "loss": 1.4388704299926758, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7981651376146789, | |
| "grad_norm": 0.18454308807849884, | |
| "learning_rate": 8.90786572520863e-06, | |
| "loss": 1.3887765407562256, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.8012232415902141, | |
| "grad_norm": 0.19775497913360596, | |
| "learning_rate": 8.897934970051128e-06, | |
| "loss": 1.4397857189178467, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.8042813455657493, | |
| "grad_norm": 0.24946311116218567, | |
| "learning_rate": 8.8879655649666e-06, | |
| "loss": 1.3772547245025635, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.8073394495412844, | |
| "grad_norm": 0.1347188949584961, | |
| "learning_rate": 8.877957623352376e-06, | |
| "loss": 1.2148081064224243, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.8103975535168195, | |
| "grad_norm": 0.17375752329826355, | |
| "learning_rate": 8.867911259044134e-06, | |
| "loss": 1.2351716756820679, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.8134556574923547, | |
| "grad_norm": 0.12528319656848907, | |
| "learning_rate": 8.857826586314586e-06, | |
| "loss": 1.0168347358703613, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.8165137614678899, | |
| "grad_norm": 0.22279202938079834, | |
| "learning_rate": 8.847703719872184e-06, | |
| "loss": 1.3256959915161133, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.8195718654434251, | |
| "grad_norm": 0.22974777221679688, | |
| "learning_rate": 8.837542774859819e-06, | |
| "loss": 1.3868855237960815, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.8226299694189603, | |
| "grad_norm": 0.2833384871482849, | |
| "learning_rate": 8.827343866853505e-06, | |
| "loss": 1.4037737846374512, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.8256880733944955, | |
| "grad_norm": 0.20462170243263245, | |
| "learning_rate": 8.817107111861068e-06, | |
| "loss": 1.3688358068466187, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8287461773700305, | |
| "grad_norm": 0.21328498423099518, | |
| "learning_rate": 8.806832626320828e-06, | |
| "loss": 1.3812446594238281, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.8318042813455657, | |
| "grad_norm": 0.2749079465866089, | |
| "learning_rate": 8.796520527100268e-06, | |
| "loss": 1.3695695400238037, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.8348623853211009, | |
| "grad_norm": 0.17869983613491058, | |
| "learning_rate": 8.786170931494714e-06, | |
| "loss": 1.3381950855255127, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.8379204892966361, | |
| "grad_norm": 0.23981167376041412, | |
| "learning_rate": 8.775783957225991e-06, | |
| "loss": 1.409177541732788, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8409785932721713, | |
| "grad_norm": 0.4634632170200348, | |
| "learning_rate": 8.765359722441096e-06, | |
| "loss": 1.3826044797897339, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8440366972477065, | |
| "grad_norm": 0.19470739364624023, | |
| "learning_rate": 8.754898345710839e-06, | |
| "loss": 1.3529078960418701, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8470948012232415, | |
| "grad_norm": 0.21753935515880585, | |
| "learning_rate": 8.744399946028506e-06, | |
| "loss": 1.3324353694915771, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8501529051987767, | |
| "grad_norm": 0.24797090888023376, | |
| "learning_rate": 8.733864642808505e-06, | |
| "loss": 1.3469841480255127, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8532110091743119, | |
| "grad_norm": 0.2123066782951355, | |
| "learning_rate": 8.723292555884997e-06, | |
| "loss": 1.343614101409912, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8562691131498471, | |
| "grad_norm": 0.25072529911994934, | |
| "learning_rate": 8.712683805510547e-06, | |
| "loss": 1.305376648902893, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8593272171253823, | |
| "grad_norm": 0.3219304382801056, | |
| "learning_rate": 8.702038512354746e-06, | |
| "loss": 1.3584821224212646, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8623853211009175, | |
| "grad_norm": 0.3253892660140991, | |
| "learning_rate": 8.691356797502846e-06, | |
| "loss": 1.3929443359375, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8654434250764526, | |
| "grad_norm": 0.22387385368347168, | |
| "learning_rate": 8.680638782454373e-06, | |
| "loss": 1.3898614645004272, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8685015290519877, | |
| "grad_norm": 0.2767902612686157, | |
| "learning_rate": 8.669884589121756e-06, | |
| "loss": 1.3842121362686157, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8715596330275229, | |
| "grad_norm": 0.2403760552406311, | |
| "learning_rate": 8.659094339828934e-06, | |
| "loss": 1.3873755931854248, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8746177370030581, | |
| "grad_norm": 0.30079615116119385, | |
| "learning_rate": 8.648268157309964e-06, | |
| "loss": 1.3781442642211914, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8776758409785933, | |
| "grad_norm": 0.24510778486728668, | |
| "learning_rate": 8.637406164707628e-06, | |
| "loss": 1.4003241062164307, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8807339449541285, | |
| "grad_norm": 0.19053591787815094, | |
| "learning_rate": 8.62650848557203e-06, | |
| "loss": 1.318782091140747, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8837920489296636, | |
| "grad_norm": 0.5118341445922852, | |
| "learning_rate": 8.615575243859194e-06, | |
| "loss": 1.3740344047546387, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8868501529051988, | |
| "grad_norm": 0.2653733193874359, | |
| "learning_rate": 8.604606563929649e-06, | |
| "loss": 1.3240249156951904, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8899082568807339, | |
| "grad_norm": 0.2646930515766144, | |
| "learning_rate": 8.59360257054702e-06, | |
| "loss": 1.3533198833465576, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.8929663608562691, | |
| "grad_norm": 0.21842285990715027, | |
| "learning_rate": 8.582563388876602e-06, | |
| "loss": 1.3596748113632202, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.8960244648318043, | |
| "grad_norm": 0.2090519517660141, | |
| "learning_rate": 8.571489144483945e-06, | |
| "loss": 1.3835537433624268, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.8990825688073395, | |
| "grad_norm": 0.2362383008003235, | |
| "learning_rate": 8.560379963333416e-06, | |
| "loss": 1.368111252784729, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.9021406727828746, | |
| "grad_norm": 0.4883694350719452, | |
| "learning_rate": 8.549235971786777e-06, | |
| "loss": 1.3067984580993652, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.9051987767584098, | |
| "grad_norm": 0.3407292366027832, | |
| "learning_rate": 8.538057296601739e-06, | |
| "loss": 1.3290581703186035, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.908256880733945, | |
| "grad_norm": 0.21036434173583984, | |
| "learning_rate": 8.526844064930523e-06, | |
| "loss": 1.3695251941680908, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.9113149847094801, | |
| "grad_norm": 0.22752052545547485, | |
| "learning_rate": 8.515596404318415e-06, | |
| "loss": 1.3922007083892822, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.9143730886850153, | |
| "grad_norm": 0.23141705989837646, | |
| "learning_rate": 8.504314442702315e-06, | |
| "loss": 1.371009111404419, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.9174311926605505, | |
| "grad_norm": 0.18458011746406555, | |
| "learning_rate": 8.492998308409275e-06, | |
| "loss": 1.3468807935714722, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9204892966360856, | |
| "grad_norm": 0.2277638018131256, | |
| "learning_rate": 8.481648130155054e-06, | |
| "loss": 1.3067777156829834, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.9235474006116208, | |
| "grad_norm": 0.2761037051677704, | |
| "learning_rate": 8.470264037042639e-06, | |
| "loss": 1.3436920642852783, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.926605504587156, | |
| "grad_norm": 0.2718355059623718, | |
| "learning_rate": 8.458846158560787e-06, | |
| "loss": 1.368149995803833, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.9296636085626911, | |
| "grad_norm": 0.471161812543869, | |
| "learning_rate": 8.447394624582544e-06, | |
| "loss": 1.3190257549285889, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.9327217125382263, | |
| "grad_norm": 0.24170783162117004, | |
| "learning_rate": 8.435909565363772e-06, | |
| "loss": 1.3419578075408936, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.9357798165137615, | |
| "grad_norm": 0.26485109329223633, | |
| "learning_rate": 8.424391111541673e-06, | |
| "loss": 1.338409662246704, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.9388379204892966, | |
| "grad_norm": 0.23220610618591309, | |
| "learning_rate": 8.412839394133285e-06, | |
| "loss": 1.3877780437469482, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.9418960244648318, | |
| "grad_norm": 0.24310626089572906, | |
| "learning_rate": 8.401254544534018e-06, | |
| "loss": 1.4051454067230225, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.944954128440367, | |
| "grad_norm": 0.299958735704422, | |
| "learning_rate": 8.389636694516134e-06, | |
| "loss": 1.3702571392059326, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9480122324159022, | |
| "grad_norm": 0.449929803609848, | |
| "learning_rate": 8.377985976227265e-06, | |
| "loss": 1.379606008529663, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9510703363914373, | |
| "grad_norm": 0.24171197414398193, | |
| "learning_rate": 8.366302522188902e-06, | |
| "loss": 1.350182294845581, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.9541284403669725, | |
| "grad_norm": 0.2935427129268646, | |
| "learning_rate": 8.354586465294894e-06, | |
| "loss": 1.2931137084960938, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9571865443425076, | |
| "grad_norm": 0.23755374550819397, | |
| "learning_rate": 8.342837938809925e-06, | |
| "loss": 1.3183162212371826, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9602446483180428, | |
| "grad_norm": 0.3486945331096649, | |
| "learning_rate": 8.331057076368012e-06, | |
| "loss": 1.3358354568481445, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.963302752293578, | |
| "grad_norm": 0.3866771459579468, | |
| "learning_rate": 8.319244011970975e-06, | |
| "loss": 1.3079657554626465, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9663608562691132, | |
| "grad_norm": 0.23048752546310425, | |
| "learning_rate": 8.307398879986917e-06, | |
| "loss": 1.323075294494629, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9694189602446484, | |
| "grad_norm": 0.2808099687099457, | |
| "learning_rate": 8.295521815148697e-06, | |
| "loss": 1.376133918762207, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9724770642201835, | |
| "grad_norm": 0.3424737751483917, | |
| "learning_rate": 8.283612952552393e-06, | |
| "loss": 1.363619327545166, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.9755351681957186, | |
| "grad_norm": 0.23272113502025604, | |
| "learning_rate": 8.271672427655765e-06, | |
| "loss": 1.3780806064605713, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.9785932721712538, | |
| "grad_norm": 0.33965811133384705, | |
| "learning_rate": 8.259700376276724e-06, | |
| "loss": 1.3397910594940186, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.981651376146789, | |
| "grad_norm": 0.25269240140914917, | |
| "learning_rate": 8.247696934591774e-06, | |
| "loss": 1.3255189657211304, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.9847094801223242, | |
| "grad_norm": 1.2317392826080322, | |
| "learning_rate": 8.235662239134473e-06, | |
| "loss": 1.347729206085205, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.9877675840978594, | |
| "grad_norm": 0.37982505559921265, | |
| "learning_rate": 8.22359642679387e-06, | |
| "loss": 1.3894901275634766, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.9908256880733946, | |
| "grad_norm": 0.2849336564540863, | |
| "learning_rate": 8.211499634812966e-06, | |
| "loss": 1.429058313369751, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.9938837920489296, | |
| "grad_norm": 0.6233349442481995, | |
| "learning_rate": 8.199372000787126e-06, | |
| "loss": 2.095426082611084, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9969418960244648, | |
| "grad_norm": 0.6541375517845154, | |
| "learning_rate": 8.187213662662539e-06, | |
| "loss": 2.1073060035705566, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 11.037178039550781, | |
| "learning_rate": 8.175024758734636e-06, | |
| "loss": 2.095914840698242, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.003058103975535, | |
| "grad_norm": 0.3948424160480499, | |
| "learning_rate": 8.16280542764652e-06, | |
| "loss": 1.4957305192947388, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.0061162079510704, | |
| "grad_norm": 0.310005784034729, | |
| "learning_rate": 8.150555808387389e-06, | |
| "loss": 1.455479383468628, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.0091743119266054, | |
| "grad_norm": 0.26789844036102295, | |
| "learning_rate": 8.138276040290952e-06, | |
| "loss": 1.4779293537139893, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.0122324159021407, | |
| "grad_norm": 0.19781345129013062, | |
| "learning_rate": 8.125966263033852e-06, | |
| "loss": 1.4063279628753662, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.0152905198776758, | |
| "grad_norm": 0.21764519810676575, | |
| "learning_rate": 8.11362661663407e-06, | |
| "loss": 1.5875146389007568, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.018348623853211, | |
| "grad_norm": 0.25749847292900085, | |
| "learning_rate": 8.101257241449332e-06, | |
| "loss": 1.480888843536377, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.0214067278287462, | |
| "grad_norm": 0.26426374912261963, | |
| "learning_rate": 8.08885827817552e-06, | |
| "loss": 1.4235765933990479, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.0244648318042813, | |
| "grad_norm": 0.25188708305358887, | |
| "learning_rate": 8.07642986784506e-06, | |
| "loss": 1.5084459781646729, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.0275229357798166, | |
| "grad_norm": 0.6583337783813477, | |
| "learning_rate": 8.063972151825332e-06, | |
| "loss": 1.369026780128479, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.0305810397553516, | |
| "grad_norm": 0.21123117208480835, | |
| "learning_rate": 8.05148527181705e-06, | |
| "loss": 1.4445654153823853, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.033639143730887, | |
| "grad_norm": 0.293588787317276, | |
| "learning_rate": 8.038969369852654e-06, | |
| "loss": 1.555469274520874, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.036697247706422, | |
| "grad_norm": 0.27872779965400696, | |
| "learning_rate": 8.026424588294701e-06, | |
| "loss": 1.4869214296340942, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.039755351681957, | |
| "grad_norm": 0.23042356967926025, | |
| "learning_rate": 8.013851069834233e-06, | |
| "loss": 1.279091238975525, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.0428134556574924, | |
| "grad_norm": 0.289106547832489, | |
| "learning_rate": 8.001248957489164e-06, | |
| "loss": 1.4306490421295166, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.0458715596330275, | |
| "grad_norm": 0.5272045135498047, | |
| "learning_rate": 7.988618394602653e-06, | |
| "loss": 1.6781132221221924, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.0489296636085628, | |
| "grad_norm": 0.22576113045215607, | |
| "learning_rate": 7.975959524841464e-06, | |
| "loss": 1.3457372188568115, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.0519877675840978, | |
| "grad_norm": 0.5630601644515991, | |
| "learning_rate": 7.963272492194344e-06, | |
| "loss": 1.4807915687561035, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.0550458715596331, | |
| "grad_norm": 0.34389057755470276, | |
| "learning_rate": 7.950557440970377e-06, | |
| "loss": 1.368910789489746, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0581039755351682, | |
| "grad_norm": 0.21063481271266937, | |
| "learning_rate": 7.937814515797348e-06, | |
| "loss": 1.360002040863037, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.0611620795107033, | |
| "grad_norm": 0.20320424437522888, | |
| "learning_rate": 7.92504386162009e-06, | |
| "loss": 1.3675504922866821, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.0642201834862386, | |
| "grad_norm": 0.2813395857810974, | |
| "learning_rate": 7.912245623698846e-06, | |
| "loss": 1.395061731338501, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.0672782874617737, | |
| "grad_norm": 0.4647752046585083, | |
| "learning_rate": 7.899419947607611e-06, | |
| "loss": 1.5662283897399902, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.070336391437309, | |
| "grad_norm": 0.3765999972820282, | |
| "learning_rate": 7.886566979232471e-06, | |
| "loss": 1.5935697555541992, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.073394495412844, | |
| "grad_norm": 0.29083383083343506, | |
| "learning_rate": 7.873686864769955e-06, | |
| "loss": 1.434537649154663, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.0764525993883791, | |
| "grad_norm": 0.4763205349445343, | |
| "learning_rate": 7.860779750725362e-06, | |
| "loss": 1.4121177196502686, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.0795107033639144, | |
| "grad_norm": 0.33439531922340393, | |
| "learning_rate": 7.8478457839111e-06, | |
| "loss": 1.3943579196929932, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.0825688073394495, | |
| "grad_norm": 0.342690110206604, | |
| "learning_rate": 7.834885111445017e-06, | |
| "loss": 1.4776759147644043, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.0856269113149848, | |
| "grad_norm": 0.29185494780540466, | |
| "learning_rate": 7.82189788074872e-06, | |
| "loss": 1.4435069561004639, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0886850152905199, | |
| "grad_norm": 1.3288284540176392, | |
| "learning_rate": 7.80888423954591e-06, | |
| "loss": 1.4731531143188477, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.091743119266055, | |
| "grad_norm": 0.2119162380695343, | |
| "learning_rate": 7.795844335860691e-06, | |
| "loss": 1.4626476764678955, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.0948012232415902, | |
| "grad_norm": 0.20571930706501007, | |
| "learning_rate": 7.782778318015892e-06, | |
| "loss": 1.342850685119629, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.0978593272171253, | |
| "grad_norm": 0.22236645221710205, | |
| "learning_rate": 7.769686334631375e-06, | |
| "loss": 1.286208152770996, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.1009174311926606, | |
| "grad_norm": 0.18384046852588654, | |
| "learning_rate": 7.756568534622355e-06, | |
| "loss": 1.4446015357971191, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.1039755351681957, | |
| "grad_norm": 0.2486264407634735, | |
| "learning_rate": 7.743425067197693e-06, | |
| "loss": 1.5612818002700806, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.107033639143731, | |
| "grad_norm": 0.23211126029491425, | |
| "learning_rate": 7.730256081858207e-06, | |
| "loss": 1.3999545574188232, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.110091743119266, | |
| "grad_norm": 0.41483980417251587, | |
| "learning_rate": 7.717061728394968e-06, | |
| "loss": 1.591150164604187, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.1131498470948011, | |
| "grad_norm": 0.3113287091255188, | |
| "learning_rate": 7.7038421568876e-06, | |
| "loss": 1.620883584022522, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.1162079510703364, | |
| "grad_norm": 0.5611585378646851, | |
| "learning_rate": 7.690597517702569e-06, | |
| "loss": 1.3835599422454834, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.1192660550458715, | |
| "grad_norm": 0.5187618732452393, | |
| "learning_rate": 7.677327961491475e-06, | |
| "loss": 1.3614990711212158, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.1223241590214068, | |
| "grad_norm": 0.34465184807777405, | |
| "learning_rate": 7.664033639189336e-06, | |
| "loss": 1.467517614364624, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.1253822629969419, | |
| "grad_norm": 0.22211050987243652, | |
| "learning_rate": 7.650714702012876e-06, | |
| "loss": 1.287433385848999, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.1284403669724772, | |
| "grad_norm": 0.36259227991104126, | |
| "learning_rate": 7.637371301458797e-06, | |
| "loss": 1.367175817489624, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.1314984709480123, | |
| "grad_norm": 0.44571414589881897, | |
| "learning_rate": 7.6240035893020625e-06, | |
| "loss": 1.3308281898498535, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.1345565749235473, | |
| "grad_norm": 0.26124662160873413, | |
| "learning_rate": 7.610611717594173e-06, | |
| "loss": 1.3915913105010986, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.1376146788990826, | |
| "grad_norm": 0.3137398064136505, | |
| "learning_rate": 7.597195838661426e-06, | |
| "loss": 1.3188378810882568, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.1406727828746177, | |
| "grad_norm": 0.3484938144683838, | |
| "learning_rate": 7.583756105103195e-06, | |
| "loss": 1.3703608512878418, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.143730886850153, | |
| "grad_norm": 0.3699035942554474, | |
| "learning_rate": 7.570292669790186e-06, | |
| "loss": 1.5115067958831787, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.146788990825688, | |
| "grad_norm": 0.24170878529548645, | |
| "learning_rate": 7.556805685862703e-06, | |
| "loss": 1.3954684734344482, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1498470948012232, | |
| "grad_norm": 0.20038793981075287, | |
| "learning_rate": 7.543295306728904e-06, | |
| "loss": 1.345947027206421, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.1529051987767585, | |
| "grad_norm": 0.38949868083000183, | |
| "learning_rate": 7.529761686063056e-06, | |
| "loss": 1.5590949058532715, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.1559633027522935, | |
| "grad_norm": 0.33645766973495483, | |
| "learning_rate": 7.516204977803789e-06, | |
| "loss": 1.446972370147705, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.1590214067278288, | |
| "grad_norm": 0.18463970720767975, | |
| "learning_rate": 7.5026253361523435e-06, | |
| "loss": 1.3630192279815674, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.162079510703364, | |
| "grad_norm": 0.33572879433631897, | |
| "learning_rate": 7.489022915570813e-06, | |
| "loss": 1.457106113433838, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.165137614678899, | |
| "grad_norm": 0.2753995954990387, | |
| "learning_rate": 7.475397870780397e-06, | |
| "loss": 1.4502360820770264, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.1681957186544343, | |
| "grad_norm": 0.35596194863319397, | |
| "learning_rate": 7.4617503567596295e-06, | |
| "loss": 1.4977834224700928, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.1712538226299694, | |
| "grad_norm": 0.4726940095424652, | |
| "learning_rate": 7.448080528742624e-06, | |
| "loss": 1.3764468431472778, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.1743119266055047, | |
| "grad_norm": 0.26225268840789795, | |
| "learning_rate": 7.434388542217303e-06, | |
| "loss": 1.4741466045379639, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.1773700305810397, | |
| "grad_norm": 0.27619338035583496, | |
| "learning_rate": 7.420674552923638e-06, | |
| "loss": 1.3593350648880005, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.1804281345565748, | |
| "grad_norm": 0.3182947635650635, | |
| "learning_rate": 7.4069387168518615e-06, | |
| "loss": 1.673621654510498, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.18348623853211, | |
| "grad_norm": 0.28721779584884644, | |
| "learning_rate": 7.393181190240714e-06, | |
| "loss": 1.4450278282165527, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.1865443425076452, | |
| "grad_norm": 0.2768658399581909, | |
| "learning_rate": 7.379402129575645e-06, | |
| "loss": 1.5032843351364136, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.1896024464831805, | |
| "grad_norm": 0.3218024969100952, | |
| "learning_rate": 7.3656016915870545e-06, | |
| "loss": 1.4965013265609741, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.1926605504587156, | |
| "grad_norm": 0.4919971227645874, | |
| "learning_rate": 7.351780033248491e-06, | |
| "loss": 1.4509224891662598, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.1957186544342508, | |
| "grad_norm": 0.3981909155845642, | |
| "learning_rate": 7.33793731177488e-06, | |
| "loss": 1.4464759826660156, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.198776758409786, | |
| "grad_norm": 0.3076995611190796, | |
| "learning_rate": 7.324073684620726e-06, | |
| "loss": 1.4577126502990723, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.2018348623853212, | |
| "grad_norm": 0.28227174282073975, | |
| "learning_rate": 7.310189309478331e-06, | |
| "loss": 1.439997911453247, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.2048929663608563, | |
| "grad_norm": 0.26599401235580444, | |
| "learning_rate": 7.296284344275991e-06, | |
| "loss": 1.531783103942871, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.2079510703363914, | |
| "grad_norm": 0.69685959815979, | |
| "learning_rate": 7.282358947176207e-06, | |
| "loss": 1.4577662944793701, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.2110091743119267, | |
| "grad_norm": 0.25103896856307983, | |
| "learning_rate": 7.268413276573881e-06, | |
| "loss": 1.3561824560165405, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.2140672782874617, | |
| "grad_norm": 0.21765579283237457, | |
| "learning_rate": 7.25444749109452e-06, | |
| "loss": 1.3165652751922607, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.217125382262997, | |
| "grad_norm": 0.2564055919647217, | |
| "learning_rate": 7.2404617495924254e-06, | |
| "loss": 1.383346676826477, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.2201834862385321, | |
| "grad_norm": 0.40797773003578186, | |
| "learning_rate": 7.226456211148891e-06, | |
| "loss": 1.3315465450286865, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.2232415902140672, | |
| "grad_norm": 0.31532490253448486, | |
| "learning_rate": 7.212431035070391e-06, | |
| "loss": 1.3896580934524536, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.2262996941896025, | |
| "grad_norm": 0.25705334544181824, | |
| "learning_rate": 7.198386380886765e-06, | |
| "loss": 1.3460421562194824, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.2293577981651376, | |
| "grad_norm": 0.31377753615379333, | |
| "learning_rate": 7.1843224083494154e-06, | |
| "loss": 1.595191240310669, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.2324159021406729, | |
| "grad_norm": 0.2853119969367981, | |
| "learning_rate": 7.170239277429474e-06, | |
| "loss": 1.6170880794525146, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.235474006116208, | |
| "grad_norm": 0.44243165850639343, | |
| "learning_rate": 7.156137148315993e-06, | |
| "loss": 1.6550755500793457, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.238532110091743, | |
| "grad_norm": 0.3517357110977173, | |
| "learning_rate": 7.14201618141412e-06, | |
| "loss": 1.566192865371704, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.2415902140672783, | |
| "grad_norm": 0.2986673414707184, | |
| "learning_rate": 7.127876537343277e-06, | |
| "loss": 1.63118314743042, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.2446483180428134, | |
| "grad_norm": 0.3479074537754059, | |
| "learning_rate": 7.1137183769353225e-06, | |
| "loss": 1.5168559551239014, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.2477064220183487, | |
| "grad_norm": 0.4152420461177826, | |
| "learning_rate": 7.099541861232736e-06, | |
| "loss": 1.6398264169692993, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.2507645259938838, | |
| "grad_norm": 0.384573370218277, | |
| "learning_rate": 7.085347151486779e-06, | |
| "loss": 1.4128949642181396, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.2538226299694188, | |
| "grad_norm": 0.3804616630077362, | |
| "learning_rate": 7.071134409155659e-06, | |
| "loss": 1.557448148727417, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.2568807339449541, | |
| "grad_norm": 0.6236130595207214, | |
| "learning_rate": 7.056903795902701e-06, | |
| "loss": 1.3184959888458252, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.2599388379204892, | |
| "grad_norm": 0.7443933486938477, | |
| "learning_rate": 7.042655473594495e-06, | |
| "loss": 1.537932276725769, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.2629969418960245, | |
| "grad_norm": 0.5472233891487122, | |
| "learning_rate": 7.028389604299074e-06, | |
| "loss": 1.1561626195907593, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.2660550458715596, | |
| "grad_norm": 0.847542941570282, | |
| "learning_rate": 7.01410635028405e-06, | |
| "loss": 1.1249284744262695, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.2691131498470947, | |
| "grad_norm": 0.3495579957962036, | |
| "learning_rate": 6.9998058740147835e-06, | |
| "loss": 1.3474421501159668, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.27217125382263, | |
| "grad_norm": 0.4069005846977234, | |
| "learning_rate": 6.985488338152529e-06, | |
| "loss": 1.3892837762832642, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.2752293577981653, | |
| "grad_norm": 0.6165335178375244, | |
| "learning_rate": 6.971153905552587e-06, | |
| "loss": 1.524814248085022, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.2782874617737003, | |
| "grad_norm": 0.6481596827507019, | |
| "learning_rate": 6.956802739262446e-06, | |
| "loss": 1.464059829711914, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.2813455657492354, | |
| "grad_norm": 0.3051135241985321, | |
| "learning_rate": 6.942435002519938e-06, | |
| "loss": 1.212691307067871, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.2844036697247707, | |
| "grad_norm": 0.31896138191223145, | |
| "learning_rate": 6.9280508587513725e-06, | |
| "loss": 1.179284691810608, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.2874617737003058, | |
| "grad_norm": 0.2261551022529602, | |
| "learning_rate": 6.913650471569684e-06, | |
| "loss": 1.38997220993042, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.290519877675841, | |
| "grad_norm": 0.3368714451789856, | |
| "learning_rate": 6.899234004772566e-06, | |
| "loss": 1.3169426918029785, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.2935779816513762, | |
| "grad_norm": 0.49499788880348206, | |
| "learning_rate": 6.884801622340612e-06, | |
| "loss": 1.293768048286438, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.2966360856269112, | |
| "grad_norm": 0.2904210686683655, | |
| "learning_rate": 6.870353488435447e-06, | |
| "loss": 1.5008976459503174, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.2996941896024465, | |
| "grad_norm": 0.4230108857154846, | |
| "learning_rate": 6.855889767397863e-06, | |
| "loss": 1.4707106351852417, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3027522935779816, | |
| "grad_norm": 0.2836777865886688, | |
| "learning_rate": 6.841410623745944e-06, | |
| "loss": 1.182532548904419, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.305810397553517, | |
| "grad_norm": 0.3048684895038605, | |
| "learning_rate": 6.826916222173205e-06, | |
| "loss": 1.373314380645752, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.308868501529052, | |
| "grad_norm": 0.38874655961990356, | |
| "learning_rate": 6.812406727546713e-06, | |
| "loss": 1.5207183361053467, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.311926605504587, | |
| "grad_norm": 0.541847288608551, | |
| "learning_rate": 6.7978823049052046e-06, | |
| "loss": 1.6546745300292969, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.3149847094801224, | |
| "grad_norm": 0.3354927897453308, | |
| "learning_rate": 6.783343119457221e-06, | |
| "loss": 1.6852827072143555, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.3180428134556574, | |
| "grad_norm": 0.22799281775951385, | |
| "learning_rate": 6.768789336579224e-06, | |
| "loss": 1.7998615503311157, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.3211009174311927, | |
| "grad_norm": 0.2829393446445465, | |
| "learning_rate": 6.754221121813707e-06, | |
| "loss": 1.3555914163589478, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.3241590214067278, | |
| "grad_norm": 0.2552604377269745, | |
| "learning_rate": 6.739638640867332e-06, | |
| "loss": 1.44038724899292, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.3272171253822629, | |
| "grad_norm": 0.2328341007232666, | |
| "learning_rate": 6.72504205960902e-06, | |
| "loss": 1.2792387008666992, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.3302752293577982, | |
| "grad_norm": 0.19776956737041473, | |
| "learning_rate": 6.710431544068085e-06, | |
| "loss": 1.2014856338500977, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.2862965762615204, | |
| "learning_rate": 6.695807260432332e-06, | |
| "loss": 1.612195372581482, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.3363914373088686, | |
| "grad_norm": 0.2737024426460266, | |
| "learning_rate": 6.681169375046173e-06, | |
| "loss": 1.4856352806091309, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.3394495412844036, | |
| "grad_norm": 0.33617132902145386, | |
| "learning_rate": 6.666518054408734e-06, | |
| "loss": 1.6690922975540161, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.3425076452599387, | |
| "grad_norm": 0.33230748772621155, | |
| "learning_rate": 6.65185346517196e-06, | |
| "loss": 1.134220838546753, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.345565749235474, | |
| "grad_norm": 0.34520813822746277, | |
| "learning_rate": 6.637175774138722e-06, | |
| "loss": 1.3939542770385742, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.3486238532110093, | |
| "grad_norm": 0.3193676471710205, | |
| "learning_rate": 6.622485148260916e-06, | |
| "loss": 1.6689043045043945, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.3516819571865444, | |
| "grad_norm": 0.2586718499660492, | |
| "learning_rate": 6.607781754637567e-06, | |
| "loss": 1.3927881717681885, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.3547400611620795, | |
| "grad_norm": 0.36470475792884827, | |
| "learning_rate": 6.593065760512924e-06, | |
| "loss": 1.5524687767028809, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.3577981651376148, | |
| "grad_norm": 0.5333327054977417, | |
| "learning_rate": 6.578337333274566e-06, | |
| "loss": 1.4335553646087646, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.3608562691131498, | |
| "grad_norm": 0.24828922748565674, | |
| "learning_rate": 6.563596640451489e-06, | |
| "loss": 1.3478354215621948, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.3639143730886851, | |
| "grad_norm": 0.2684786021709442, | |
| "learning_rate": 6.548843849712206e-06, | |
| "loss": 1.4221248626708984, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.3669724770642202, | |
| "grad_norm": 0.2922813594341278, | |
| "learning_rate": 6.534079128862835e-06, | |
| "loss": 1.4792616367340088, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.3700305810397553, | |
| "grad_norm": 0.21960243582725525, | |
| "learning_rate": 6.5193026458452006e-06, | |
| "loss": 1.3363940715789795, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.3730886850152906, | |
| "grad_norm": 0.41456371545791626, | |
| "learning_rate": 6.50451456873491e-06, | |
| "loss": 1.4480544328689575, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.3761467889908257, | |
| "grad_norm": 0.6222192049026489, | |
| "learning_rate": 6.489715065739448e-06, | |
| "loss": 1.7465565204620361, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.379204892966361, | |
| "grad_norm": 0.5998108983039856, | |
| "learning_rate": 6.474904305196268e-06, | |
| "loss": 2.144679546356201, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.382262996941896, | |
| "grad_norm": 0.5612609386444092, | |
| "learning_rate": 6.4600824555708695e-06, | |
| "loss": 1.378048300743103, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.385321100917431, | |
| "grad_norm": 0.32021385431289673, | |
| "learning_rate": 6.445249685454885e-06, | |
| "loss": 1.361167073249817, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.3883792048929664, | |
| "grad_norm": 0.36393630504608154, | |
| "learning_rate": 6.4304061635641645e-06, | |
| "loss": 1.433903694152832, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.3914373088685015, | |
| "grad_norm": 0.7985405325889587, | |
| "learning_rate": 6.415552058736854e-06, | |
| "loss": 1.5466125011444092, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.3944954128440368, | |
| "grad_norm": 0.30912530422210693, | |
| "learning_rate": 6.4006875399314705e-06, | |
| "loss": 1.463235855102539, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.3975535168195719, | |
| "grad_norm": 0.2953026294708252, | |
| "learning_rate": 6.3858127762249945e-06, | |
| "loss": 1.3276557922363281, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.400611620795107, | |
| "grad_norm": 0.19828742742538452, | |
| "learning_rate": 6.3709279368109264e-06, | |
| "loss": 1.2300511598587036, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.4036697247706422, | |
| "grad_norm": 0.21878407895565033, | |
| "learning_rate": 6.356033190997386e-06, | |
| "loss": 1.1606783866882324, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.4067278287461773, | |
| "grad_norm": 0.19046013057231903, | |
| "learning_rate": 6.341128708205162e-06, | |
| "loss": 1.3056751489639282, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.4097859327217126, | |
| "grad_norm": 0.40108954906463623, | |
| "learning_rate": 6.326214657965804e-06, | |
| "loss": 1.5421757698059082, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.4128440366972477, | |
| "grad_norm": 0.46537211537361145, | |
| "learning_rate": 6.311291209919682e-06, | |
| "loss": 1.5684192180633545, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.4159021406727827, | |
| "grad_norm": 0.5733487606048584, | |
| "learning_rate": 6.296358533814065e-06, | |
| "loss": 1.5650339126586914, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.418960244648318, | |
| "grad_norm": 0.4306733310222626, | |
| "learning_rate": 6.281416799501188e-06, | |
| "loss": 1.5992372035980225, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.4220183486238533, | |
| "grad_norm": 0.407654732465744, | |
| "learning_rate": 6.266466176936313e-06, | |
| "loss": 1.4283607006072998, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.4250764525993884, | |
| "grad_norm": 4.419346332550049, | |
| "learning_rate": 6.251506836175807e-06, | |
| "loss": 1.5659562349319458, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.4281345565749235, | |
| "grad_norm": 0.7012003064155579, | |
| "learning_rate": 6.236538947375203e-06, | |
| "loss": 1.4677741527557373, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.4311926605504588, | |
| "grad_norm": 0.22764644026756287, | |
| "learning_rate": 6.221562680787258e-06, | |
| "loss": 1.374863624572754, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.4342507645259939, | |
| "grad_norm": 0.4946407973766327, | |
| "learning_rate": 6.20657820676003e-06, | |
| "loss": 1.3795430660247803, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.4373088685015292, | |
| "grad_norm": 1.4666649103164673, | |
| "learning_rate": 6.191585695734925e-06, | |
| "loss": 1.584106683731079, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.4403669724770642, | |
| "grad_norm": 0.9116813540458679, | |
| "learning_rate": 6.176585318244775e-06, | |
| "loss": 1.3207650184631348, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.4434250764525993, | |
| "grad_norm": 0.4549460709095001, | |
| "learning_rate": 6.161577244911883e-06, | |
| "loss": 1.5188086032867432, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.4464831804281346, | |
| "grad_norm": 0.6293279528617859, | |
| "learning_rate": 6.146561646446088e-06, | |
| "loss": 1.40483558177948, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.4495412844036697, | |
| "grad_norm": 0.5348030924797058, | |
| "learning_rate": 6.131538693642828e-06, | |
| "loss": 1.4180057048797607, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.452599388379205, | |
| "grad_norm": 0.7010774612426758, | |
| "learning_rate": 6.116508557381191e-06, | |
| "loss": 1.5555238723754883, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.45565749235474, | |
| "grad_norm": 0.3996182382106781, | |
| "learning_rate": 6.1014714086219725e-06, | |
| "loss": 1.5635944604873657, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.4587155963302751, | |
| "grad_norm": 0.3819827139377594, | |
| "learning_rate": 6.086427418405735e-06, | |
| "loss": 1.3868696689605713, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.4617737003058104, | |
| "grad_norm": 0.24838334321975708, | |
| "learning_rate": 6.071376757850858e-06, | |
| "loss": 1.3217381238937378, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.4648318042813455, | |
| "grad_norm": 0.5527139902114868, | |
| "learning_rate": 6.0563195981515885e-06, | |
| "loss": 1.456415057182312, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.4678899082568808, | |
| "grad_norm": 0.2822090983390808, | |
| "learning_rate": 6.0412561105761055e-06, | |
| "loss": 1.3990404605865479, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.470948012232416, | |
| "grad_norm": 0.370832234621048, | |
| "learning_rate": 6.026186466464562e-06, | |
| "loss": 1.5524400472640991, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.474006116207951, | |
| "grad_norm": 0.30970191955566406, | |
| "learning_rate": 6.011110837227138e-06, | |
| "loss": 1.4143943786621094, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.4770642201834863, | |
| "grad_norm": 0.3659932613372803, | |
| "learning_rate": 5.996029394342089e-06, | |
| "loss": 1.3726913928985596, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.4801223241590213, | |
| "grad_norm": 0.40378639101982117, | |
| "learning_rate": 5.980942309353803e-06, | |
| "loss": 1.3403112888336182, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.4831804281345566, | |
| "grad_norm": 0.2668818235397339, | |
| "learning_rate": 5.965849753870841e-06, | |
| "loss": 1.4581551551818848, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.4862385321100917, | |
| "grad_norm": 0.39147576689720154, | |
| "learning_rate": 5.950751899563989e-06, | |
| "loss": 1.4426075220108032, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.4892966360856268, | |
| "grad_norm": 0.4053312838077545, | |
| "learning_rate": 5.935648918164308e-06, | |
| "loss": 1.429807424545288, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.492354740061162, | |
| "grad_norm": 0.2912329435348511, | |
| "learning_rate": 5.9205409814611694e-06, | |
| "loss": 1.6015820503234863, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.4954128440366974, | |
| "grad_norm": 0.39581140875816345, | |
| "learning_rate": 5.9054282613003165e-06, | |
| "loss": 1.7901129722595215, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.4984709480122325, | |
| "grad_norm": 5.4772210121154785, | |
| "learning_rate": 5.890310929581899e-06, | |
| "loss": 1.665008544921875, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.5015290519877675, | |
| "grad_norm": 0.32753488421440125, | |
| "learning_rate": 5.875189158258521e-06, | |
| "loss": 1.658569574356079, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.5045871559633026, | |
| "grad_norm": 0.3322629928588867, | |
| "learning_rate": 5.860063119333287e-06, | |
| "loss": 1.568853735923767, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.507645259938838, | |
| "grad_norm": 0.3625146746635437, | |
| "learning_rate": 5.844932984857841e-06, | |
| "loss": 1.2555010318756104, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.5107033639143732, | |
| "grad_norm": 0.3967174291610718, | |
| "learning_rate": 5.829798926930411e-06, | |
| "loss": 1.2352030277252197, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.5137614678899083, | |
| "grad_norm": 0.92249995470047, | |
| "learning_rate": 5.814661117693856e-06, | |
| "loss": 1.6529834270477295, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.5168195718654434, | |
| "grad_norm": 0.43264713883399963, | |
| "learning_rate": 5.799519729333702e-06, | |
| "loss": 1.6510822772979736, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.5198776758409784, | |
| "grad_norm": 0.48226049542427063, | |
| "learning_rate": 5.784374934076188e-06, | |
| "loss": 1.7469120025634766, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.5229357798165137, | |
| "grad_norm": 0.6006577014923096, | |
| "learning_rate": 5.769226904186301e-06, | |
| "loss": 1.6751326322555542, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.525993883792049, | |
| "grad_norm": 0.417524129152298, | |
| "learning_rate": 5.754075811965826e-06, | |
| "loss": 1.7241541147232056, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.529051987767584, | |
| "grad_norm": 0.4846678674221039, | |
| "learning_rate": 5.738921829751374e-06, | |
| "loss": 1.5894498825073242, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5321100917431192, | |
| "grad_norm": 0.37620386481285095, | |
| "learning_rate": 5.723765129912433e-06, | |
| "loss": 1.5567536354064941, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.5351681957186545, | |
| "grad_norm": 0.9559251070022583, | |
| "learning_rate": 5.708605884849402e-06, | |
| "loss": 1.444126844406128, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.5382262996941896, | |
| "grad_norm": 0.4608314335346222, | |
| "learning_rate": 5.6934442669916315e-06, | |
| "loss": 1.7045128345489502, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.5412844036697249, | |
| "grad_norm": 0.5580506920814514, | |
| "learning_rate": 5.678280448795457e-06, | |
| "loss": 1.576319932937622, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.54434250764526, | |
| "grad_norm": 0.414983332157135, | |
| "learning_rate": 5.663114602742247e-06, | |
| "loss": 1.1866123676300049, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.547400611620795, | |
| "grad_norm": 0.5494526624679565, | |
| "learning_rate": 5.647946901336433e-06, | |
| "loss": 1.7420477867126465, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.5504587155963303, | |
| "grad_norm": 0.6842697262763977, | |
| "learning_rate": 5.632777517103552e-06, | |
| "loss": 1.7904109954833984, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.5535168195718656, | |
| "grad_norm": 0.43980666995048523, | |
| "learning_rate": 5.617606622588282e-06, | |
| "loss": 1.862006425857544, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.5565749235474007, | |
| "grad_norm": 0.3990402817726135, | |
| "learning_rate": 5.602434390352476e-06, | |
| "loss": 1.7830100059509277, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.5596330275229358, | |
| "grad_norm": 0.4031524360179901, | |
| "learning_rate": 5.58726099297321e-06, | |
| "loss": 1.7594141960144043, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.5626911314984708, | |
| "grad_norm": 0.6580591797828674, | |
| "learning_rate": 5.572086603040809e-06, | |
| "loss": 1.6219829320907593, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.5657492354740061, | |
| "grad_norm": 0.36656439304351807, | |
| "learning_rate": 5.556911393156885e-06, | |
| "loss": 1.4893901348114014, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.5688073394495414, | |
| "grad_norm": 0.6261524558067322, | |
| "learning_rate": 5.541735535932383e-06, | |
| "loss": 1.058058261871338, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.5718654434250765, | |
| "grad_norm": 0.3441345691680908, | |
| "learning_rate": 5.526559203985605e-06, | |
| "loss": 1.0509142875671387, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.5749235474006116, | |
| "grad_norm": 0.2408900260925293, | |
| "learning_rate": 5.511382569940258e-06, | |
| "loss": 1.2871123552322388, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.5779816513761467, | |
| "grad_norm": 0.45723816752433777, | |
| "learning_rate": 5.496205806423481e-06, | |
| "loss": 1.2235673666000366, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.581039755351682, | |
| "grad_norm": 0.3109905421733856, | |
| "learning_rate": 5.481029086063887e-06, | |
| "loss": 1.177577018737793, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.5840978593272173, | |
| "grad_norm": 0.20282985270023346, | |
| "learning_rate": 5.4658525814896014e-06, | |
| "loss": 1.2040612697601318, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.5871559633027523, | |
| "grad_norm": 0.43076759576797485, | |
| "learning_rate": 5.45067646532629e-06, | |
| "loss": 1.4584531784057617, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.5902140672782874, | |
| "grad_norm": 0.472885400056839, | |
| "learning_rate": 5.435500910195203e-06, | |
| "loss": 1.387641429901123, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.5932721712538225, | |
| "grad_norm": 3.1532437801361084, | |
| "learning_rate": 5.420326088711209e-06, | |
| "loss": 1.221092700958252, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.5963302752293578, | |
| "grad_norm": 0.6743189692497253, | |
| "learning_rate": 5.405152173480833e-06, | |
| "loss": 1.4836219549179077, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.599388379204893, | |
| "grad_norm": 0.20277228951454163, | |
| "learning_rate": 5.389979337100289e-06, | |
| "loss": 1.5031371116638184, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.6024464831804281, | |
| "grad_norm": 0.5120447874069214, | |
| "learning_rate": 5.374807752153522e-06, | |
| "loss": 1.282975673675537, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.6055045871559632, | |
| "grad_norm": 0.35753709077835083, | |
| "learning_rate": 5.359637591210242e-06, | |
| "loss": 1.4665361642837524, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.6085626911314985, | |
| "grad_norm": 0.7353309988975525, | |
| "learning_rate": 5.344469026823959e-06, | |
| "loss": 1.6730611324310303, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.6116207951070336, | |
| "grad_norm": 0.4338257610797882, | |
| "learning_rate": 5.329302231530029e-06, | |
| "loss": 1.186348795890808, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.614678899082569, | |
| "grad_norm": 0.42416566610336304, | |
| "learning_rate": 5.31413737784368e-06, | |
| "loss": 1.4430310726165771, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.617737003058104, | |
| "grad_norm": 0.2432592213153839, | |
| "learning_rate": 5.298974638258055e-06, | |
| "loss": 1.518967866897583, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.620795107033639, | |
| "grad_norm": 0.408245712518692, | |
| "learning_rate": 5.283814185242252e-06, | |
| "loss": 1.426690697669983, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.6238532110091743, | |
| "grad_norm": 0.2117079198360443, | |
| "learning_rate": 5.2686561912393606e-06, | |
| "loss": 1.2693121433258057, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.6269113149847096, | |
| "grad_norm": 4.30716609954834, | |
| "learning_rate": 5.253500828664501e-06, | |
| "loss": 0.9013931155204773, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.6299694189602447, | |
| "grad_norm": 0.38770049810409546, | |
| "learning_rate": 5.23834826990286e-06, | |
| "loss": 1.5694489479064941, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.6330275229357798, | |
| "grad_norm": 0.6700468063354492, | |
| "learning_rate": 5.223198687307733e-06, | |
| "loss": 1.503030776977539, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.6360856269113149, | |
| "grad_norm": 0.2767106294631958, | |
| "learning_rate": 5.208052253198564e-06, | |
| "loss": 1.3917062282562256, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.6391437308868502, | |
| "grad_norm": 0.3463125228881836, | |
| "learning_rate": 5.192909139858981e-06, | |
| "loss": 1.5068938732147217, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.6422018348623855, | |
| "grad_norm": 0.3212260603904724, | |
| "learning_rate": 5.177769519534846e-06, | |
| "loss": 1.4421181678771973, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.6452599388379205, | |
| "grad_norm": 0.4484805762767792, | |
| "learning_rate": 5.162633564432285e-06, | |
| "loss": 1.408212661743164, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.6483180428134556, | |
| "grad_norm": 0.4805358350276947, | |
| "learning_rate": 5.1475014467157325e-06, | |
| "loss": 1.6133791208267212, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.6513761467889907, | |
| "grad_norm": 0.5775420665740967, | |
| "learning_rate": 5.132373338505978e-06, | |
| "loss": 1.2856450080871582, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.654434250764526, | |
| "grad_norm": 0.32906994223594666, | |
| "learning_rate": 5.117249411878204e-06, | |
| "loss": 1.04205322265625, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.6574923547400613, | |
| "grad_norm": 0.5074779987335205, | |
| "learning_rate": 5.10212983886003e-06, | |
| "loss": 1.6698901653289795, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.6605504587155964, | |
| "grad_norm": 0.36449626088142395, | |
| "learning_rate": 5.087014791429552e-06, | |
| "loss": 1.449878215789795, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.6636085626911314, | |
| "grad_norm": 1.0477646589279175, | |
| "learning_rate": 5.071904441513393e-06, | |
| "loss": 1.5865240097045898, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.3797400891780853, | |
| "learning_rate": 5.056798960984741e-06, | |
| "loss": 1.4271771907806396, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.6697247706422018, | |
| "grad_norm": 0.3018883466720581, | |
| "learning_rate": 5.041698521661401e-06, | |
| "loss": 1.6418373584747314, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.6727828746177371, | |
| "grad_norm": 0.5908496379852295, | |
| "learning_rate": 5.026603295303833e-06, | |
| "loss": 1.5063586235046387, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.6758409785932722, | |
| "grad_norm": 0.5799764394760132, | |
| "learning_rate": 5.011513453613205e-06, | |
| "loss": 1.5312390327453613, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.6788990825688073, | |
| "grad_norm": 0.4648537337779999, | |
| "learning_rate": 4.996429168229432e-06, | |
| "loss": 1.4155495166778564, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.6819571865443423, | |
| "grad_norm": 0.3357274830341339, | |
| "learning_rate": 4.981350610729234e-06, | |
| "loss": 1.07462477684021, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6850152905198776, | |
| "grad_norm": 0.8209952712059021, | |
| "learning_rate": 4.966277952624179e-06, | |
| "loss": 1.532288670539856, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.688073394495413, | |
| "grad_norm": 0.6916195750236511, | |
| "learning_rate": 4.951211365358723e-06, | |
| "loss": 1.5015881061553955, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.691131498470948, | |
| "grad_norm": 0.6677690148353577, | |
| "learning_rate": 4.936151020308282e-06, | |
| "loss": 1.5166327953338623, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.694189602446483, | |
| "grad_norm": 0.7889437675476074, | |
| "learning_rate": 4.921097088777261e-06, | |
| "loss": 1.5232961177825928, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.6972477064220184, | |
| "grad_norm": 0.5421835780143738, | |
| "learning_rate": 4.906049741997119e-06, | |
| "loss": 1.3370258808135986, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.7003058103975535, | |
| "grad_norm": 0.28672778606414795, | |
| "learning_rate": 4.8910091511244115e-06, | |
| "loss": 1.5552886724472046, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.7033639143730888, | |
| "grad_norm": 0.8609727025032043, | |
| "learning_rate": 4.875975487238853e-06, | |
| "loss": 1.6477062702178955, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.7064220183486238, | |
| "grad_norm": 0.46577727794647217, | |
| "learning_rate": 4.860948921341366e-06, | |
| "loss": 1.3554713726043701, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.709480122324159, | |
| "grad_norm": 0.4357546865940094, | |
| "learning_rate": 4.845929624352136e-06, | |
| "loss": 1.616469383239746, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.7125382262996942, | |
| "grad_norm": 0.8016573786735535, | |
| "learning_rate": 4.830917767108666e-06, | |
| "loss": 1.4049677848815918, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.7155963302752295, | |
| "grad_norm": 0.34570103883743286, | |
| "learning_rate": 4.8159135203638394e-06, | |
| "loss": 1.5350430011749268, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.7186544342507646, | |
| "grad_norm": 0.6164813041687012, | |
| "learning_rate": 4.800917054783971e-06, | |
| "loss": 1.4737257957458496, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.7217125382262997, | |
| "grad_norm": 0.30021098256111145, | |
| "learning_rate": 4.785928540946869e-06, | |
| "loss": 1.59697425365448, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.7247706422018347, | |
| "grad_norm": 0.3294142782688141, | |
| "learning_rate": 4.770948149339897e-06, | |
| "loss": 1.6918811798095703, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.72782874617737, | |
| "grad_norm": 0.33221927285194397, | |
| "learning_rate": 4.755976050358026e-06, | |
| "loss": 1.581977128982544, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.7308868501529053, | |
| "grad_norm": 0.27995747327804565, | |
| "learning_rate": 4.741012414301907e-06, | |
| "loss": 1.42479407787323, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.7339449541284404, | |
| "grad_norm": 0.4526294767856598, | |
| "learning_rate": 4.726057411375927e-06, | |
| "loss": 1.5270183086395264, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.7370030581039755, | |
| "grad_norm": 0.6458525657653809, | |
| "learning_rate": 4.711111211686279e-06, | |
| "loss": 1.5350821018218994, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.7400611620795106, | |
| "grad_norm": 0.40516841411590576, | |
| "learning_rate": 4.6961739852390175e-06, | |
| "loss": 1.5310497283935547, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.7431192660550459, | |
| "grad_norm": 1.3104746341705322, | |
| "learning_rate": 4.681245901938134e-06, | |
| "loss": 1.5385562181472778, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.7461773700305812, | |
| "grad_norm": 0.40381914377212524, | |
| "learning_rate": 4.666327131583621e-06, | |
| "loss": 1.5392662286758423, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.7492354740061162, | |
| "grad_norm": 0.8844152688980103, | |
| "learning_rate": 4.65141784386954e-06, | |
| "loss": 1.333682894706726, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.7522935779816513, | |
| "grad_norm": 0.423922061920166, | |
| "learning_rate": 4.636518208382091e-06, | |
| "loss": 1.4100391864776611, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.7553516819571864, | |
| "grad_norm": 0.3589678406715393, | |
| "learning_rate": 4.621628394597687e-06, | |
| "loss": 1.341862440109253, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.7584097859327217, | |
| "grad_norm": 0.6498292088508606, | |
| "learning_rate": 4.606748571881018e-06, | |
| "loss": 1.4297010898590088, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.761467889908257, | |
| "grad_norm": 0.5506405234336853, | |
| "learning_rate": 4.59187890948314e-06, | |
| "loss": 1.4309487342834473, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.764525993883792, | |
| "grad_norm": 0.45955854654312134, | |
| "learning_rate": 4.577019576539527e-06, | |
| "loss": 1.2851155996322632, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.7675840978593271, | |
| "grad_norm": 0.28625011444091797, | |
| "learning_rate": 4.562170742068175e-06, | |
| "loss": 0.9397743940353394, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.7706422018348624, | |
| "grad_norm": 0.22773736715316772, | |
| "learning_rate": 4.547332574967653e-06, | |
| "loss": 1.237460732460022, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.7737003058103975, | |
| "grad_norm": 0.25427719950675964, | |
| "learning_rate": 4.5325052440151985e-06, | |
| "loss": 1.3028910160064697, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.7767584097859328, | |
| "grad_norm": 0.2875189781188965, | |
| "learning_rate": 4.517688917864794e-06, | |
| "loss": 1.3547457456588745, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.7798165137614679, | |
| "grad_norm": 0.21899199485778809, | |
| "learning_rate": 4.502883765045244e-06, | |
| "loss": 1.36411714553833, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.782874617737003, | |
| "grad_norm": 0.21183030307292938, | |
| "learning_rate": 4.488089953958264e-06, | |
| "loss": 1.4323028326034546, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.7859327217125383, | |
| "grad_norm": 0.22526955604553223, | |
| "learning_rate": 4.473307652876563e-06, | |
| "loss": 1.3429040908813477, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.7889908256880735, | |
| "grad_norm": 0.266107439994812, | |
| "learning_rate": 4.458537029941926e-06, | |
| "loss": 1.3663442134857178, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.7920489296636086, | |
| "grad_norm": 0.490496963262558, | |
| "learning_rate": 4.4437782531633074e-06, | |
| "loss": 1.3354597091674805, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.7951070336391437, | |
| "grad_norm": 0.1854841560125351, | |
| "learning_rate": 4.429031490414919e-06, | |
| "loss": 1.3446393013000488, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.7981651376146788, | |
| "grad_norm": 0.1960364729166031, | |
| "learning_rate": 4.414296909434311e-06, | |
| "loss": 1.3029416799545288, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.801223241590214, | |
| "grad_norm": 0.35048866271972656, | |
| "learning_rate": 4.399574677820481e-06, | |
| "loss": 1.348449945449829, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.8042813455657494, | |
| "grad_norm": 0.3793323040008545, | |
| "learning_rate": 4.384864963031952e-06, | |
| "loss": 1.297593593597412, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.8073394495412844, | |
| "grad_norm": 0.14626124501228333, | |
| "learning_rate": 4.370167932384873e-06, | |
| "loss": 1.1695170402526855, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.8103975535168195, | |
| "grad_norm": 0.16865181922912598, | |
| "learning_rate": 4.355483753051125e-06, | |
| "loss": 1.2123092412948608, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.8134556574923546, | |
| "grad_norm": 0.1931789070367813, | |
| "learning_rate": 4.340812592056401e-06, | |
| "loss": 0.9932126998901367, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.81651376146789, | |
| "grad_norm": 0.2547837793827057, | |
| "learning_rate": 4.326154616278326e-06, | |
| "loss": 1.2431546449661255, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.8195718654434252, | |
| "grad_norm": 0.23825769126415253, | |
| "learning_rate": 4.311509992444539e-06, | |
| "loss": 1.286515712738037, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.8226299694189603, | |
| "grad_norm": 0.25244706869125366, | |
| "learning_rate": 4.296878887130819e-06, | |
| "loss": 1.3000450134277344, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.8256880733944953, | |
| "grad_norm": 0.23451480269432068, | |
| "learning_rate": 4.282261466759165e-06, | |
| "loss": 1.2664532661437988, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.8287461773700304, | |
| "grad_norm": 0.2735919952392578, | |
| "learning_rate": 4.267657897595929e-06, | |
| "loss": 1.288360834121704, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.8318042813455657, | |
| "grad_norm": 0.18107269704341888, | |
| "learning_rate": 4.253068345749903e-06, | |
| "loss": 1.2625651359558105, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.834862385321101, | |
| "grad_norm": 0.2293253242969513, | |
| "learning_rate": 4.238492977170439e-06, | |
| "loss": 1.234043836593628, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.837920489296636, | |
| "grad_norm": 0.27160146832466125, | |
| "learning_rate": 4.223931957645566e-06, | |
| "loss": 1.300539493560791, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.8409785932721712, | |
| "grad_norm": 0.25112462043762207, | |
| "learning_rate": 4.2093854528000955e-06, | |
| "loss": 1.2719401121139526, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.8440366972477065, | |
| "grad_norm": 0.33997592329978943, | |
| "learning_rate": 4.194853628093742e-06, | |
| "loss": 1.2453508377075195, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.8470948012232415, | |
| "grad_norm": 0.6576793789863586, | |
| "learning_rate": 4.180336648819242e-06, | |
| "loss": 1.233917236328125, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.8501529051987768, | |
| "grad_norm": 0.26551222801208496, | |
| "learning_rate": 4.165834680100469e-06, | |
| "loss": 1.2595276832580566, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.853211009174312, | |
| "grad_norm": 0.2170596420764923, | |
| "learning_rate": 4.151347886890562e-06, | |
| "loss": 1.2505378723144531, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.856269113149847, | |
| "grad_norm": 0.2974804937839508, | |
| "learning_rate": 4.1368764339700404e-06, | |
| "loss": 1.2092756032943726, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.8593272171253823, | |
| "grad_norm": 0.2567199468612671, | |
| "learning_rate": 4.1224204859449425e-06, | |
| "loss": 1.2698951959609985, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.8623853211009176, | |
| "grad_norm": 0.23152267932891846, | |
| "learning_rate": 4.107980207244937e-06, | |
| "loss": 1.3027379512786865, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.8654434250764527, | |
| "grad_norm": 0.26830926537513733, | |
| "learning_rate": 4.093555762121469e-06, | |
| "loss": 1.308929443359375, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.8685015290519877, | |
| "grad_norm": 0.2566030025482178, | |
| "learning_rate": 4.07914731464588e-06, | |
| "loss": 1.2964577674865723, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.8715596330275228, | |
| "grad_norm": 0.4025701582431793, | |
| "learning_rate": 4.064755028707546e-06, | |
| "loss": 1.31220543384552, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.8746177370030581, | |
| "grad_norm": 0.25386303663253784, | |
| "learning_rate": 4.0503790680120136e-06, | |
| "loss": 1.299830436706543, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.8776758409785934, | |
| "grad_norm": 0.39947405457496643, | |
| "learning_rate": 4.036019596079136e-06, | |
| "loss": 1.3202039003372192, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.8807339449541285, | |
| "grad_norm": 0.23179592192173004, | |
| "learning_rate": 4.021676776241218e-06, | |
| "loss": 1.2405881881713867, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.8837920489296636, | |
| "grad_norm": 0.48796483874320984, | |
| "learning_rate": 4.007350771641151e-06, | |
| "loss": 1.288329005241394, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.8868501529051986, | |
| "grad_norm": 0.26645490527153015, | |
| "learning_rate": 3.993041745230562e-06, | |
| "loss": 1.2443333864212036, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.889908256880734, | |
| "grad_norm": 0.19715459644794464, | |
| "learning_rate": 3.978749859767961e-06, | |
| "loss": 1.2754254341125488, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.8929663608562692, | |
| "grad_norm": 0.2424282431602478, | |
| "learning_rate": 3.9644752778168836e-06, | |
| "loss": 1.2853577136993408, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.8960244648318043, | |
| "grad_norm": 0.22451399266719818, | |
| "learning_rate": 3.950218161744049e-06, | |
| "loss": 1.308832049369812, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.8990825688073394, | |
| "grad_norm": 0.38970160484313965, | |
| "learning_rate": 3.935978673717512e-06, | |
| "loss": 1.2945680618286133, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.9021406727828745, | |
| "grad_norm": 0.22287186980247498, | |
| "learning_rate": 3.921756975704809e-06, | |
| "loss": 1.2276027202606201, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.9051987767584098, | |
| "grad_norm": 0.2538350820541382, | |
| "learning_rate": 3.9075532294711326e-06, | |
| "loss": 1.2546557188034058, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.908256880733945, | |
| "grad_norm": 0.19810384511947632, | |
| "learning_rate": 3.893367596577475e-06, | |
| "loss": 1.2940235137939453, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.9113149847094801, | |
| "grad_norm": 0.20586298406124115, | |
| "learning_rate": 3.8792002383788044e-06, | |
| "loss": 1.3136601448059082, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.9143730886850152, | |
| "grad_norm": 0.2770041227340698, | |
| "learning_rate": 3.865051316022215e-06, | |
| "loss": 1.2952957153320312, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.9174311926605505, | |
| "grad_norm": 0.22728121280670166, | |
| "learning_rate": 3.85092099044511e-06, | |
| "loss": 1.271630048751831, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.9204892966360856, | |
| "grad_norm": 0.1984010934829712, | |
| "learning_rate": 3.836809422373354e-06, | |
| "loss": 1.2360022068023682, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.9235474006116209, | |
| "grad_norm": 0.24555295705795288, | |
| "learning_rate": 3.822716772319463e-06, | |
| "loss": 1.271683692932129, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.926605504587156, | |
| "grad_norm": 0.20771312713623047, | |
| "learning_rate": 3.8086432005807616e-06, | |
| "loss": 1.2962419986724854, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.929663608562691, | |
| "grad_norm": 0.268265962600708, | |
| "learning_rate": 3.794588867237574e-06, | |
| "loss": 1.2458467483520508, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.9327217125382263, | |
| "grad_norm": 0.3802253007888794, | |
| "learning_rate": 3.780553932151392e-06, | |
| "loss": 1.2733559608459473, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.9357798165137616, | |
| "grad_norm": 0.6309070587158203, | |
| "learning_rate": 3.766538554963062e-06, | |
| "loss": 1.270596981048584, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.9388379204892967, | |
| "grad_norm": 0.3053569793701172, | |
| "learning_rate": 3.752542895090969e-06, | |
| "loss": 1.3194211721420288, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.9418960244648318, | |
| "grad_norm": 0.21923166513442993, | |
| "learning_rate": 3.7385671117292245e-06, | |
| "loss": 1.3323618173599243, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.9449541284403669, | |
| "grad_norm": 0.2166883647441864, | |
| "learning_rate": 3.72461136384585e-06, | |
| "loss": 1.2965784072875977, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.9480122324159022, | |
| "grad_norm": 0.2825508117675781, | |
| "learning_rate": 3.710675810180977e-06, | |
| "loss": 1.3159446716308594, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.9510703363914375, | |
| "grad_norm": 0.299638956785202, | |
| "learning_rate": 3.696760609245035e-06, | |
| "loss": 1.2833199501037598, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.9541284403669725, | |
| "grad_norm": 0.2223178744316101, | |
| "learning_rate": 3.68286591931695e-06, | |
| "loss": 1.22653329372406, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.9571865443425076, | |
| "grad_norm": 0.2592408359050751, | |
| "learning_rate": 3.668991898442347e-06, | |
| "loss": 1.2542335987091064, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.9602446483180427, | |
| "grad_norm": 0.2755810618400574, | |
| "learning_rate": 3.6551387044317464e-06, | |
| "loss": 1.2745262384414673, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.963302752293578, | |
| "grad_norm": 0.21057268977165222, | |
| "learning_rate": 3.6413064948587773e-06, | |
| "loss": 1.2521765232086182, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.9663608562691133, | |
| "grad_norm": 0.34427741169929504, | |
| "learning_rate": 3.6274954270583797e-06, | |
| "loss": 1.263521432876587, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.9694189602446484, | |
| "grad_norm": 0.2196524441242218, | |
| "learning_rate": 3.6137056581250142e-06, | |
| "loss": 1.3154864311218262, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.9724770642201834, | |
| "grad_norm": 0.3191309869289398, | |
| "learning_rate": 3.599937344910872e-06, | |
| "loss": 1.2999801635742188, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.9755351681957185, | |
| "grad_norm": 0.22587168216705322, | |
| "learning_rate": 3.5861906440241057e-06, | |
| "loss": 1.3176116943359375, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.9785932721712538, | |
| "grad_norm": 0.2769485414028168, | |
| "learning_rate": 3.5724657118270344e-06, | |
| "loss": 1.273116111755371, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.981651376146789, | |
| "grad_norm": 0.3299882411956787, | |
| "learning_rate": 3.558762704434361e-06, | |
| "loss": 1.268465280532837, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.9847094801223242, | |
| "grad_norm": 0.26859885454177856, | |
| "learning_rate": 3.545081777711412e-06, | |
| "loss": 1.2919847965240479, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.9877675840978593, | |
| "grad_norm": 0.9502137899398804, | |
| "learning_rate": 3.5314230872723564e-06, | |
| "loss": 1.342604160308838, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.9908256880733946, | |
| "grad_norm": 0.2677958011627197, | |
| "learning_rate": 3.5177867884784334e-06, | |
| "loss": 1.3786706924438477, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.9938837920489296, | |
| "grad_norm": 0.40644171833992004, | |
| "learning_rate": 3.504173036436186e-06, | |
| "loss": 1.7326993942260742, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.996941896024465, | |
| "grad_norm": 0.45419755578041077, | |
| "learning_rate": 3.4905819859957002e-06, | |
| "loss": 1.7214076519012451, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9430392980575562, | |
| "learning_rate": 3.4770137917488454e-06, | |
| "loss": 1.8467901945114136, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.003058103975535, | |
| "grad_norm": 0.26824504137039185, | |
| "learning_rate": 3.463468608027505e-06, | |
| "loss": 1.4361066818237305, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.00611620795107, | |
| "grad_norm": 0.22578075528144836, | |
| "learning_rate": 3.4499465889018337e-06, | |
| "loss": 1.394030213356018, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.0091743119266057, | |
| "grad_norm": 0.26776137948036194, | |
| "learning_rate": 3.4364478881785002e-06, | |
| "loss": 1.4127156734466553, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.0122324159021407, | |
| "grad_norm": 0.3707635998725891, | |
| "learning_rate": 3.4229726593989353e-06, | |
| "loss": 1.340601921081543, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.015290519877676, | |
| "grad_norm": 0.23890726268291473, | |
| "learning_rate": 3.409521055837586e-06, | |
| "loss": 1.5300512313842773, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.018348623853211, | |
| "grad_norm": 0.21163959801197052, | |
| "learning_rate": 3.396093230500176e-06, | |
| "loss": 1.4162603616714478, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.021406727828746, | |
| "grad_norm": 0.3320009112358093, | |
| "learning_rate": 3.3826893361219614e-06, | |
| "loss": 1.3640984296798706, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.0244648318042815, | |
| "grad_norm": 0.2645728886127472, | |
| "learning_rate": 3.3693095251659975e-06, | |
| "loss": 1.4446080923080444, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.0275229357798166, | |
| "grad_norm": 0.2824868857860565, | |
| "learning_rate": 3.3559539498213965e-06, | |
| "loss": 1.3105710744857788, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.0305810397553516, | |
| "grad_norm": 0.23126038908958435, | |
| "learning_rate": 3.342622762001606e-06, | |
| "loss": 1.3857829570770264, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.0336391437308867, | |
| "grad_norm": 0.3670974671840668, | |
| "learning_rate": 3.3293161133426777e-06, | |
| "loss": 1.496924638748169, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.036697247706422, | |
| "grad_norm": 0.3528394401073456, | |
| "learning_rate": 3.3160341552015375e-06, | |
| "loss": 1.4135003089904785, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.0397553516819573, | |
| "grad_norm": 0.20478151738643646, | |
| "learning_rate": 3.3027770386542706e-06, | |
| "loss": 1.2156240940093994, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.0428134556574924, | |
| "grad_norm": 0.46617865562438965, | |
| "learning_rate": 3.289544914494403e-06, | |
| "loss": 1.3763898611068726, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.0458715596330275, | |
| "grad_norm": 0.3884037733078003, | |
| "learning_rate": 3.276337933231179e-06, | |
| "loss": 1.622403860092163, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.0489296636085625, | |
| "grad_norm": 0.25180479884147644, | |
| "learning_rate": 3.2631562450878597e-06, | |
| "loss": 1.2860331535339355, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.051987767584098, | |
| "grad_norm": 0.3756599426269531, | |
| "learning_rate": 3.2500000000000015e-06, | |
| "loss": 1.4189289808273315, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.055045871559633, | |
| "grad_norm": 0.32630693912506104, | |
| "learning_rate": 3.236869347613764e-06, | |
| "loss": 1.308931827545166, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.058103975535168, | |
| "grad_norm": 0.28512176871299744, | |
| "learning_rate": 3.2237644372842016e-06, | |
| "loss": 1.2988288402557373, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.0611620795107033, | |
| "grad_norm": 0.19952069222927094, | |
| "learning_rate": 3.2106854180735625e-06, | |
| "loss": 1.3092859983444214, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.0642201834862384, | |
| "grad_norm": 0.24031268060207367, | |
| "learning_rate": 3.1976324387495948e-06, | |
| "loss": 1.3389842510223389, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.067278287461774, | |
| "grad_norm": 0.26569297909736633, | |
| "learning_rate": 3.1846056477838572e-06, | |
| "loss": 1.5241750478744507, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.070336391437309, | |
| "grad_norm": 0.5251048803329468, | |
| "learning_rate": 3.171605193350028e-06, | |
| "loss": 1.542860507965088, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.073394495412844, | |
| "grad_norm": 0.34643858671188354, | |
| "learning_rate": 3.158631223322216e-06, | |
| "loss": 1.3612843751907349, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.076452599388379, | |
| "grad_norm": 0.2934923470020294, | |
| "learning_rate": 3.145683885273288e-06, | |
| "loss": 1.355604648590088, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.079510703363914, | |
| "grad_norm": 0.743224024772644, | |
| "learning_rate": 3.1327633264731806e-06, | |
| "loss": 1.341210126876831, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.0825688073394497, | |
| "grad_norm": 0.32269051671028137, | |
| "learning_rate": 3.11986969388723e-06, | |
| "loss": 1.4118154048919678, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.085626911314985, | |
| "grad_norm": 0.29159843921661377, | |
| "learning_rate": 3.1070031341744983e-06, | |
| "loss": 1.389265775680542, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.08868501529052, | |
| "grad_norm": 0.24911250174045563, | |
| "learning_rate": 3.094163793686108e-06, | |
| "loss": 1.422662377357483, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.091743119266055, | |
| "grad_norm": 0.21826767921447754, | |
| "learning_rate": 3.0813518184635737e-06, | |
| "loss": 1.4053363800048828, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.09480122324159, | |
| "grad_norm": 0.3076784610748291, | |
| "learning_rate": 3.0685673542371465e-06, | |
| "loss": 1.283433198928833, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.0978593272171255, | |
| "grad_norm": 0.17591321468353271, | |
| "learning_rate": 3.0558105464241466e-06, | |
| "loss": 1.237450361251831, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.1009174311926606, | |
| "grad_norm": 0.2663421332836151, | |
| "learning_rate": 3.0430815401273206e-06, | |
| "loss": 1.3944424390792847, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.1039755351681957, | |
| "grad_norm": 0.26904943585395813, | |
| "learning_rate": 3.030380480133186e-06, | |
| "loss": 1.5187671184539795, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.1070336391437308, | |
| "grad_norm": 0.6649749279022217, | |
| "learning_rate": 3.017707510910378e-06, | |
| "loss": 1.3504502773284912, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.1100917431192663, | |
| "grad_norm": 0.37516942620277405, | |
| "learning_rate": 3.0050627766080188e-06, | |
| "loss": 1.5420799255371094, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.1131498470948014, | |
| "grad_norm": 0.342439204454422, | |
| "learning_rate": 2.9924464210540717e-06, | |
| "loss": 1.5547534227371216, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.1162079510703364, | |
| "grad_norm": 0.48497647047042847, | |
| "learning_rate": 2.979858587753698e-06, | |
| "loss": 1.3153679370880127, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.1192660550458715, | |
| "grad_norm": 0.39512813091278076, | |
| "learning_rate": 2.96729941988764e-06, | |
| "loss": 1.2663487195968628, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.1223241590214066, | |
| "grad_norm": 0.3283194899559021, | |
| "learning_rate": 2.9547690603105774e-06, | |
| "loss": 1.4247238636016846, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.1253822629969417, | |
| "grad_norm": 0.3506661355495453, | |
| "learning_rate": 2.942267651549513e-06, | |
| "loss": 1.2393386363983154, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.128440366972477, | |
| "grad_norm": 0.3594140112400055, | |
| "learning_rate": 2.9297953358021487e-06, | |
| "loss": 1.317380666732788, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.1314984709480123, | |
| "grad_norm": 0.5971735715866089, | |
| "learning_rate": 2.9173522549352608e-06, | |
| "loss": 1.2773442268371582, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.1345565749235473, | |
| "grad_norm": 0.3666265606880188, | |
| "learning_rate": 2.9049385504830987e-06, | |
| "loss": 1.34925377368927, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.1376146788990824, | |
| "grad_norm": 0.31561410427093506, | |
| "learning_rate": 2.892554363645766e-06, | |
| "loss": 1.2674505710601807, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.140672782874618, | |
| "grad_norm": 0.2038232684135437, | |
| "learning_rate": 2.880199835287618e-06, | |
| "loss": 1.3169916868209839, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.143730886850153, | |
| "grad_norm": 0.25303685665130615, | |
| "learning_rate": 2.867875105935658e-06, | |
| "loss": 1.4587633609771729, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.146788990825688, | |
| "grad_norm": 0.31143543124198914, | |
| "learning_rate": 2.8555803157779384e-06, | |
| "loss": 1.3396885395050049, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.149847094801223, | |
| "grad_norm": 0.2281101942062378, | |
| "learning_rate": 2.8433156046619705e-06, | |
| "loss": 1.2936108112335205, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.1529051987767582, | |
| "grad_norm": 0.3648523688316345, | |
| "learning_rate": 2.831081112093129e-06, | |
| "loss": 1.5100679397583008, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.1559633027522938, | |
| "grad_norm": 0.278677374124527, | |
| "learning_rate": 2.8188769772330637e-06, | |
| "loss": 1.3869754076004028, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.159021406727829, | |
| "grad_norm": 0.21437983214855194, | |
| "learning_rate": 2.806703338898123e-06, | |
| "loss": 1.3129749298095703, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.162079510703364, | |
| "grad_norm": 0.24729043245315552, | |
| "learning_rate": 2.794560335557771e-06, | |
| "loss": 1.4099204540252686, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.165137614678899, | |
| "grad_norm": 0.3120039701461792, | |
| "learning_rate": 2.7824481053330154e-06, | |
| "loss": 1.3897459506988525, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.168195718654434, | |
| "grad_norm": 0.4525415897369385, | |
| "learning_rate": 2.770366785994827e-06, | |
| "loss": 1.445647954940796, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.1712538226299696, | |
| "grad_norm": 0.4266716241836548, | |
| "learning_rate": 2.758316514962585e-06, | |
| "loss": 1.3233726024627686, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.1743119266055047, | |
| "grad_norm": 0.28266647458076477, | |
| "learning_rate": 2.7462974293025112e-06, | |
| "loss": 1.4238274097442627, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.1773700305810397, | |
| "grad_norm": 0.3248072564601898, | |
| "learning_rate": 2.7343096657261e-06, | |
| "loss": 1.3104677200317383, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 2.180428134556575, | |
| "grad_norm": 0.3584449887275696, | |
| "learning_rate": 2.7223533605885784e-06, | |
| "loss": 1.6277508735656738, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 2.18348623853211, | |
| "grad_norm": 0.35764527320861816, | |
| "learning_rate": 2.710428649887348e-06, | |
| "loss": 1.3882687091827393, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.1865443425076454, | |
| "grad_norm": 0.24804551899433136, | |
| "learning_rate": 2.6985356692604336e-06, | |
| "loss": 1.4513651132583618, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.1896024464831805, | |
| "grad_norm": 0.2202014923095703, | |
| "learning_rate": 2.686674553984951e-06, | |
| "loss": 1.4342420101165771, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 2.1926605504587156, | |
| "grad_norm": 0.36250677704811096, | |
| "learning_rate": 2.6748454389755576e-06, | |
| "loss": 1.394620656967163, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 2.1957186544342506, | |
| "grad_norm": 0.3232296109199524, | |
| "learning_rate": 2.6630484587829265e-06, | |
| "loss": 1.3978071212768555, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 2.198776758409786, | |
| "grad_norm": 0.4420628547668457, | |
| "learning_rate": 2.651283747592211e-06, | |
| "loss": 1.4031468629837036, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 2.2018348623853212, | |
| "grad_norm": 0.6229142546653748, | |
| "learning_rate": 2.639551439221516e-06, | |
| "loss": 1.3914484977722168, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.2048929663608563, | |
| "grad_norm": 0.3233772814273834, | |
| "learning_rate": 2.627851667120387e-06, | |
| "loss": 1.476043701171875, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 2.2079510703363914, | |
| "grad_norm": 0.35107681155204773, | |
| "learning_rate": 2.6161845643682763e-06, | |
| "loss": 1.407777190208435, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 2.2110091743119265, | |
| "grad_norm": 0.3123028874397278, | |
| "learning_rate": 2.6045502636730457e-06, | |
| "loss": 1.3102259635925293, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 2.214067278287462, | |
| "grad_norm": 0.2534146308898926, | |
| "learning_rate": 2.5929488973694406e-06, | |
| "loss": 1.2788276672363281, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 2.217125382262997, | |
| "grad_norm": 0.24462664127349854, | |
| "learning_rate": 2.581380597417599e-06, | |
| "loss": 1.3362743854522705, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.220183486238532, | |
| "grad_norm": 0.2978283166885376, | |
| "learning_rate": 2.569845495401542e-06, | |
| "loss": 1.2902576923370361, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 2.223241590214067, | |
| "grad_norm": 0.299277126789093, | |
| "learning_rate": 2.5583437225276818e-06, | |
| "loss": 1.3449206352233887, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 2.2262996941896023, | |
| "grad_norm": 0.36601486802101135, | |
| "learning_rate": 2.546875409623324e-06, | |
| "loss": 1.3038407564163208, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 2.229357798165138, | |
| "grad_norm": 0.42299339175224304, | |
| "learning_rate": 2.5354406871351833e-06, | |
| "loss": 1.5554304122924805, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 2.232415902140673, | |
| "grad_norm": 0.32388123869895935, | |
| "learning_rate": 2.5240396851279043e-06, | |
| "loss": 1.5746049880981445, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.235474006116208, | |
| "grad_norm": 0.39095836877822876, | |
| "learning_rate": 2.5126725332825675e-06, | |
| "loss": 1.6094728708267212, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 2.238532110091743, | |
| "grad_norm": 0.5842258930206299, | |
| "learning_rate": 2.501339360895231e-06, | |
| "loss": 1.5279463529586792, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 2.241590214067278, | |
| "grad_norm": 0.3429890275001526, | |
| "learning_rate": 2.4900402968754504e-06, | |
| "loss": 1.5856099128723145, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 2.2446483180428136, | |
| "grad_norm": 0.35519224405288696, | |
| "learning_rate": 2.4787754697448153e-06, | |
| "loss": 1.4757394790649414, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 2.2477064220183487, | |
| "grad_norm": 0.46203580498695374, | |
| "learning_rate": 2.4675450076354822e-06, | |
| "loss": 1.584846019744873, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.2507645259938838, | |
| "grad_norm": 0.8099899888038635, | |
| "learning_rate": 2.4563490382887267e-06, | |
| "loss": 1.367172360420227, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 2.253822629969419, | |
| "grad_norm": 0.7287035584449768, | |
| "learning_rate": 2.4451876890534847e-06, | |
| "loss": 1.492293357849121, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 2.2568807339449544, | |
| "grad_norm": 0.3203519284725189, | |
| "learning_rate": 2.4340610868849e-06, | |
| "loss": 1.2751667499542236, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 2.2599388379204894, | |
| "grad_norm": 0.6493098139762878, | |
| "learning_rate": 2.4229693583428916e-06, | |
| "loss": 1.4823472499847412, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 2.2629969418960245, | |
| "grad_norm": 0.4101910889148712, | |
| "learning_rate": 2.4119126295906997e-06, | |
| "loss": 1.09395170211792, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.2660550458715596, | |
| "grad_norm": 0.4682796597480774, | |
| "learning_rate": 2.400891026393464e-06, | |
| "loss": 1.0601507425308228, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 2.2691131498470947, | |
| "grad_norm": 0.5146844387054443, | |
| "learning_rate": 2.3899046741167868e-06, | |
| "loss": 1.2724342346191406, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 2.2721712538226297, | |
| "grad_norm": 0.8610156178474426, | |
| "learning_rate": 2.3789536977253034e-06, | |
| "loss": 1.3352521657943726, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 2.2752293577981653, | |
| "grad_norm": 1.053831696510315, | |
| "learning_rate": 2.3680382217812685e-06, | |
| "loss": 1.4391016960144043, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 2.2782874617737003, | |
| "grad_norm": 0.6413374543190002, | |
| "learning_rate": 2.3571583704431355e-06, | |
| "loss": 1.3907897472381592, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.2813455657492354, | |
| "grad_norm": 0.30044737458229065, | |
| "learning_rate": 2.346314267464145e-06, | |
| "loss": 1.1618599891662598, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 2.2844036697247705, | |
| "grad_norm": 0.3427642285823822, | |
| "learning_rate": 2.3355060361909134e-06, | |
| "loss": 1.134230375289917, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 2.287461773700306, | |
| "grad_norm": 0.28166523575782776, | |
| "learning_rate": 2.3247337995620363e-06, | |
| "loss": 1.357274055480957, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 2.290519877675841, | |
| "grad_norm": 0.7598418593406677, | |
| "learning_rate": 2.313997680106686e-06, | |
| "loss": 1.2663555145263672, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 2.293577981651376, | |
| "grad_norm": 1.0048569440841675, | |
| "learning_rate": 2.3032977999432205e-06, | |
| "loss": 1.2259790897369385, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.2966360856269112, | |
| "grad_norm": 0.3067741096019745, | |
| "learning_rate": 2.2926342807777886e-06, | |
| "loss": 1.435164213180542, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 2.2996941896024463, | |
| "grad_norm": 0.5623937249183655, | |
| "learning_rate": 2.2820072439029524e-06, | |
| "loss": 1.4023568630218506, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 2.302752293577982, | |
| "grad_norm": 0.3359718918800354, | |
| "learning_rate": 2.271416810196308e-06, | |
| "loss": 1.1277801990509033, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 2.305810397553517, | |
| "grad_norm": 0.3305533528327942, | |
| "learning_rate": 2.2608631001190994e-06, | |
| "loss": 1.3414134979248047, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 2.308868501529052, | |
| "grad_norm": 0.28481531143188477, | |
| "learning_rate": 2.2503462337148642e-06, | |
| "loss": 1.4879052639007568, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.311926605504587, | |
| "grad_norm": 0.28595951199531555, | |
| "learning_rate": 2.239866330608057e-06, | |
| "loss": 1.6209688186645508, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 2.314984709480122, | |
| "grad_norm": 0.29558923840522766, | |
| "learning_rate": 2.2294235100026933e-06, | |
| "loss": 1.6481235027313232, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 2.3180428134556577, | |
| "grad_norm": 0.5758782029151917, | |
| "learning_rate": 2.21901789068099e-06, | |
| "loss": 1.7679166793823242, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 2.3211009174311927, | |
| "grad_norm": 0.3111439347267151, | |
| "learning_rate": 2.2086495910020192e-06, | |
| "loss": 1.3151183128356934, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 2.324159021406728, | |
| "grad_norm": 0.44918501377105713, | |
| "learning_rate": 2.1983187289003587e-06, | |
| "loss": 1.3933916091918945, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.327217125382263, | |
| "grad_norm": 0.3173042833805084, | |
| "learning_rate": 2.188025421884754e-06, | |
| "loss": 1.240437388420105, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 2.330275229357798, | |
| "grad_norm": 0.2350539118051529, | |
| "learning_rate": 2.1777697870367713e-06, | |
| "loss": 1.1647779941558838, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.3137843906879425, | |
| "learning_rate": 2.1675519410094803e-06, | |
| "loss": 1.5445265769958496, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 2.3363914373088686, | |
| "grad_norm": 0.5268841981887817, | |
| "learning_rate": 2.157372000026119e-06, | |
| "loss": 1.444595217704773, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 2.3394495412844036, | |
| "grad_norm": 0.3506692349910736, | |
| "learning_rate": 2.1472300798787746e-06, | |
| "loss": 1.6354224681854248, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.3425076452599387, | |
| "grad_norm": 0.3233583867549896, | |
| "learning_rate": 2.1371262959270594e-06, | |
| "loss": 1.1021732091903687, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 2.3455657492354742, | |
| "grad_norm": 0.29296091198921204, | |
| "learning_rate": 2.1270607630968104e-06, | |
| "loss": 1.3453254699707031, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 2.3486238532110093, | |
| "grad_norm": 0.3317727744579315, | |
| "learning_rate": 2.1170335958787736e-06, | |
| "loss": 1.607575535774231, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 2.3516819571865444, | |
| "grad_norm": 0.2295382171869278, | |
| "learning_rate": 2.1070449083273047e-06, | |
| "loss": 1.3497262001037598, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 2.3547400611620795, | |
| "grad_norm": 0.4568946957588196, | |
| "learning_rate": 2.0970948140590672e-06, | |
| "loss": 1.509822130203247, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.3577981651376145, | |
| "grad_norm": 0.34416595101356506, | |
| "learning_rate": 2.08718342625175e-06, | |
| "loss": 1.385573148727417, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 2.3608562691131496, | |
| "grad_norm": 0.33610644936561584, | |
| "learning_rate": 2.077310857642772e-06, | |
| "loss": 1.3133833408355713, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 2.363914373088685, | |
| "grad_norm": 0.332163006067276, | |
| "learning_rate": 2.067477220527998e-06, | |
| "loss": 1.3794035911560059, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 2.36697247706422, | |
| "grad_norm": 0.46091410517692566, | |
| "learning_rate": 2.05768262676047e-06, | |
| "loss": 1.4221172332763672, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 2.3700305810397553, | |
| "grad_norm": 0.2670794427394867, | |
| "learning_rate": 2.0479271877491278e-06, | |
| "loss": 1.2908828258514404, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.3730886850152904, | |
| "grad_norm": 0.31927385926246643, | |
| "learning_rate": 2.038211014457546e-06, | |
| "loss": 1.3988337516784668, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 2.376146788990826, | |
| "grad_norm": 0.4126211404800415, | |
| "learning_rate": 2.028534217402667e-06, | |
| "loss": 1.7016716003417969, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 2.379204892966361, | |
| "grad_norm": 0.6094360947608948, | |
| "learning_rate": 2.0188969066535484e-06, | |
| "loss": 2.0326876640319824, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 2.382262996941896, | |
| "grad_norm": 0.40967652201652527, | |
| "learning_rate": 2.0092991918301106e-06, | |
| "loss": 1.3301377296447754, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 2.385321100917431, | |
| "grad_norm": 0.6155174970626831, | |
| "learning_rate": 1.9997411821018885e-06, | |
| "loss": 1.319265604019165, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.388379204892966, | |
| "grad_norm": 0.4441206455230713, | |
| "learning_rate": 1.990222986186786e-06, | |
| "loss": 1.3922169208526611, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 2.3914373088685017, | |
| "grad_norm": 0.5924298167228699, | |
| "learning_rate": 1.980744712349849e-06, | |
| "loss": 1.4741730690002441, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 2.3944954128440368, | |
| "grad_norm": 0.42252296209335327, | |
| "learning_rate": 1.9713064684020262e-06, | |
| "loss": 1.4076108932495117, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 2.397553516819572, | |
| "grad_norm": 0.36031708121299744, | |
| "learning_rate": 1.9619083616989457e-06, | |
| "loss": 1.278861403465271, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 2.400611620795107, | |
| "grad_norm": 0.24064381420612335, | |
| "learning_rate": 1.952550499139689e-06, | |
| "loss": 1.19804048538208, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.4036697247706424, | |
| "grad_norm": 0.18197159469127655, | |
| "learning_rate": 1.9432329871655837e-06, | |
| "loss": 1.12447988986969, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 2.4067278287461775, | |
| "grad_norm": 0.30438297986984253, | |
| "learning_rate": 1.933955931758988e-06, | |
| "loss": 1.2643486261367798, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 2.4097859327217126, | |
| "grad_norm": 0.5426669120788574, | |
| "learning_rate": 1.9247194384420855e-06, | |
| "loss": 1.504340410232544, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 2.4128440366972477, | |
| "grad_norm": 0.6118716597557068, | |
| "learning_rate": 1.915523612275681e-06, | |
| "loss": 1.5359920263290405, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 2.4159021406727827, | |
| "grad_norm": 0.5290548801422119, | |
| "learning_rate": 1.9063685578580137e-06, | |
| "loss": 1.5219250917434692, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.418960244648318, | |
| "grad_norm": 0.348886638879776, | |
| "learning_rate": 1.8972543793235626e-06, | |
| "loss": 1.5620722770690918, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 2.4220183486238533, | |
| "grad_norm": 0.4480542838573456, | |
| "learning_rate": 1.8881811803418624e-06, | |
| "loss": 1.3870704174041748, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 2.4250764525993884, | |
| "grad_norm": 0.6594481468200684, | |
| "learning_rate": 1.8791490641163218e-06, | |
| "loss": 1.5246330499649048, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 2.4281345565749235, | |
| "grad_norm": 0.48964548110961914, | |
| "learning_rate": 1.870158133383055e-06, | |
| "loss": 1.4073295593261719, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 2.4311926605504586, | |
| "grad_norm": 0.40440455079078674, | |
| "learning_rate": 1.8612084904097117e-06, | |
| "loss": 1.329315423965454, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.434250764525994, | |
| "grad_norm": 0.3714819550514221, | |
| "learning_rate": 1.852300236994308e-06, | |
| "loss": 1.3444490432739258, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 2.437308868501529, | |
| "grad_norm": 0.5145377516746521, | |
| "learning_rate": 1.8434334744640763e-06, | |
| "loss": 1.5467479228973389, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 2.4403669724770642, | |
| "grad_norm": 0.46002912521362305, | |
| "learning_rate": 1.8346083036743104e-06, | |
| "loss": 1.289878249168396, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 2.4434250764525993, | |
| "grad_norm": 0.793483555316925, | |
| "learning_rate": 1.8258248250072158e-06, | |
| "loss": 1.4660496711730957, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 2.4464831804281344, | |
| "grad_norm": 0.44911351799964905, | |
| "learning_rate": 1.8170831383707683e-06, | |
| "loss": 1.3652875423431396, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.44954128440367, | |
| "grad_norm": 0.38207677006721497, | |
| "learning_rate": 1.8083833431975805e-06, | |
| "loss": 1.3762791156768799, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 2.452599388379205, | |
| "grad_norm": 0.4357513189315796, | |
| "learning_rate": 1.7997255384437695e-06, | |
| "loss": 1.5232503414154053, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 2.45565749235474, | |
| "grad_norm": 0.3423779308795929, | |
| "learning_rate": 1.7911098225878309e-06, | |
| "loss": 1.5271486043930054, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 2.458715596330275, | |
| "grad_norm": 5.960415363311768, | |
| "learning_rate": 1.7825362936295171e-06, | |
| "loss": 1.3485842943191528, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 2.46177370030581, | |
| "grad_norm": 0.36111417412757874, | |
| "learning_rate": 1.774005049088725e-06, | |
| "loss": 1.2900433540344238, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.4648318042813457, | |
| "grad_norm": 0.33147767186164856, | |
| "learning_rate": 1.7655161860043873e-06, | |
| "loss": 1.4210761785507202, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 2.467889908256881, | |
| "grad_norm": 0.3786766231060028, | |
| "learning_rate": 1.7570698009333664e-06, | |
| "loss": 1.370017409324646, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 2.470948012232416, | |
| "grad_norm": 1.8267617225646973, | |
| "learning_rate": 1.7486659899493537e-06, | |
| "loss": 1.5153461694717407, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 2.474006116207951, | |
| "grad_norm": 0.3199278712272644, | |
| "learning_rate": 1.740304848641787e-06, | |
| "loss": 1.3838684558868408, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 2.477064220183486, | |
| "grad_norm": 0.3670620322227478, | |
| "learning_rate": 1.731986472114751e-06, | |
| "loss": 1.33723783493042, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.4801223241590216, | |
| "grad_norm": 0.36861374974250793, | |
| "learning_rate": 1.7237109549859043e-06, | |
| "loss": 1.2932226657867432, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 2.4831804281345566, | |
| "grad_norm": 0.34438320994377136, | |
| "learning_rate": 1.7154783913853968e-06, | |
| "loss": 1.42689049243927, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 2.4862385321100917, | |
| "grad_norm": 0.23838122189044952, | |
| "learning_rate": 1.7072888749548033e-06, | |
| "loss": 1.4100431203842163, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 2.489296636085627, | |
| "grad_norm": 0.46484264731407166, | |
| "learning_rate": 1.6991424988460592e-06, | |
| "loss": 1.3829045295715332, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 2.4923547400611623, | |
| "grad_norm": 0.3008574843406677, | |
| "learning_rate": 1.6910393557203964e-06, | |
| "loss": 1.5693084001541138, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.4954128440366974, | |
| "grad_norm": 0.37115153670310974, | |
| "learning_rate": 1.6829795377472908e-06, | |
| "loss": 1.7590757608413696, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 2.4984709480122325, | |
| "grad_norm": 0.616698682308197, | |
| "learning_rate": 1.674963136603417e-06, | |
| "loss": 1.6397650241851807, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 2.5015290519877675, | |
| "grad_norm": 0.384959876537323, | |
| "learning_rate": 1.6669902434716046e-06, | |
| "loss": 1.6299896240234375, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.5045871559633026, | |
| "grad_norm": 0.8294275403022766, | |
| "learning_rate": 1.6590609490397958e-06, | |
| "loss": 1.5394856929779053, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.5076452599388377, | |
| "grad_norm": 0.40894415974617004, | |
| "learning_rate": 1.6511753435000205e-06, | |
| "loss": 1.2182371616363525, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.510703363914373, | |
| "grad_norm": 0.45905759930610657, | |
| "learning_rate": 1.6433335165473686e-06, | |
| "loss": 1.2023439407348633, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.5137614678899083, | |
| "grad_norm": 0.38532376289367676, | |
| "learning_rate": 1.635535557378968e-06, | |
| "loss": 1.6095008850097656, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.5168195718654434, | |
| "grad_norm": 1.44415283203125, | |
| "learning_rate": 1.6277815546929688e-06, | |
| "loss": 1.6082322597503662, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.5198776758409784, | |
| "grad_norm": 0.5093996524810791, | |
| "learning_rate": 1.6200715966875394e-06, | |
| "loss": 1.7141090631484985, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.522935779816514, | |
| "grad_norm": 0.5241023898124695, | |
| "learning_rate": 1.6124057710598603e-06, | |
| "loss": 1.6450610160827637, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.525993883792049, | |
| "grad_norm": 0.49204516410827637, | |
| "learning_rate": 1.6047841650051272e-06, | |
| "loss": 1.6974513530731201, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.529051987767584, | |
| "grad_norm": 0.8506813049316406, | |
| "learning_rate": 1.5972068652155554e-06, | |
| "loss": 1.5313912630081177, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.532110091743119, | |
| "grad_norm": 0.33754727244377136, | |
| "learning_rate": 1.5896739578794e-06, | |
| "loss": 1.5209699869155884, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.5351681957186543, | |
| "grad_norm": 0.7774704694747925, | |
| "learning_rate": 1.5821855286799742e-06, | |
| "loss": 1.4035563468933105, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.5382262996941893, | |
| "grad_norm": 0.6433319449424744, | |
| "learning_rate": 1.5747416627946673e-06, | |
| "loss": 1.665273666381836, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.541284403669725, | |
| "grad_norm": 0.6971220970153809, | |
| "learning_rate": 1.5673424448939887e-06, | |
| "loss": 1.5019344091415405, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.54434250764526, | |
| "grad_norm": 0.40314802527427673, | |
| "learning_rate": 1.5599879591405917e-06, | |
| "loss": 1.1620054244995117, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.547400611620795, | |
| "grad_norm": 0.48018017411231995, | |
| "learning_rate": 1.552678289188326e-06, | |
| "loss": 1.6923828125, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.5504587155963305, | |
| "grad_norm": 0.4809359312057495, | |
| "learning_rate": 1.545413518181283e-06, | |
| "loss": 1.7656713724136353, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.5535168195718656, | |
| "grad_norm": 0.40401753783226013, | |
| "learning_rate": 1.5381937287528449e-06, | |
| "loss": 1.8313161134719849, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.5565749235474007, | |
| "grad_norm": 0.4581202268600464, | |
| "learning_rate": 1.5310190030247546e-06, | |
| "loss": 1.7572789192199707, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.5596330275229358, | |
| "grad_norm": 0.9305920600891113, | |
| "learning_rate": 1.5238894226061737e-06, | |
| "loss": 1.7307026386260986, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.562691131498471, | |
| "grad_norm": 0.47380930185317993, | |
| "learning_rate": 1.5168050685927566e-06, | |
| "loss": 1.5947740077972412, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.565749235474006, | |
| "grad_norm": 1.2263463735580444, | |
| "learning_rate": 1.5097660215657306e-06, | |
| "loss": 1.4555588960647583, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.5688073394495414, | |
| "grad_norm": 0.43118909001350403, | |
| "learning_rate": 1.5027723615909745e-06, | |
| "loss": 1.0147868394851685, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.5718654434250765, | |
| "grad_norm": 0.5391921401023865, | |
| "learning_rate": 1.4958241682181137e-06, | |
| "loss": 1.0223249197006226, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.5749235474006116, | |
| "grad_norm": 0.2522028386592865, | |
| "learning_rate": 1.4889215204796082e-06, | |
| "loss": 1.250197172164917, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.5779816513761467, | |
| "grad_norm": 0.29159918427467346, | |
| "learning_rate": 1.4820644968898605e-06, | |
| "loss": 1.1835776567459106, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.581039755351682, | |
| "grad_norm": 0.2946909964084625, | |
| "learning_rate": 1.47525317544432e-06, | |
| "loss": 1.1374409198760986, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.5840978593272173, | |
| "grad_norm": 0.19036340713500977, | |
| "learning_rate": 1.468487633618594e-06, | |
| "loss": 1.1817882061004639, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.5871559633027523, | |
| "grad_norm": 1.4873279333114624, | |
| "learning_rate": 1.4617679483675673e-06, | |
| "loss": 1.4171775579452515, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.5902140672782874, | |
| "grad_norm": 0.32151684165000916, | |
| "learning_rate": 1.4550941961245288e-06, | |
| "loss": 1.3625459671020508, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.5932721712538225, | |
| "grad_norm": 0.26637983322143555, | |
| "learning_rate": 1.4484664528003026e-06, | |
| "loss": 1.2058180570602417, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.5963302752293576, | |
| "grad_norm": 0.5087877511978149, | |
| "learning_rate": 1.4418847937823784e-06, | |
| "loss": 1.425114631652832, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.599388379204893, | |
| "grad_norm": 0.9368872046470642, | |
| "learning_rate": 1.4353492939340618e-06, | |
| "loss": 1.4749643802642822, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.602446483180428, | |
| "grad_norm": 0.48912081122398376, | |
| "learning_rate": 1.4288600275936184e-06, | |
| "loss": 1.245436668395996, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.6055045871559632, | |
| "grad_norm": 0.4674423635005951, | |
| "learning_rate": 1.4224170685734303e-06, | |
| "loss": 1.4404422044754028, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.6085626911314987, | |
| "grad_norm": 0.7305318117141724, | |
| "learning_rate": 1.416020490159152e-06, | |
| "loss": 1.6482999324798584, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.611620795107034, | |
| "grad_norm": 0.5728065371513367, | |
| "learning_rate": 1.4096703651088848e-06, | |
| "loss": 1.1557910442352295, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.614678899082569, | |
| "grad_norm": 0.6479355096817017, | |
| "learning_rate": 1.4033667656523405e-06, | |
| "loss": 1.4093899726867676, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.617737003058104, | |
| "grad_norm": 1.1274484395980835, | |
| "learning_rate": 1.3971097634900262e-06, | |
| "loss": 1.4923943281173706, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.620795107033639, | |
| "grad_norm": 0.5374640822410583, | |
| "learning_rate": 1.3908994297924275e-06, | |
| "loss": 1.3800336122512817, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.623853211009174, | |
| "grad_norm": 0.6038364171981812, | |
| "learning_rate": 1.3847358351991945e-06, | |
| "loss": 1.2194199562072754, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.6269113149847096, | |
| "grad_norm": 0.7064008712768555, | |
| "learning_rate": 1.3786190498183446e-06, | |
| "loss": 0.8604775667190552, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.6299694189602447, | |
| "grad_norm": 0.3798482418060303, | |
| "learning_rate": 1.3725491432254627e-06, | |
| "loss": 1.5459158420562744, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.63302752293578, | |
| "grad_norm": 0.47553232312202454, | |
| "learning_rate": 1.3665261844629053e-06, | |
| "loss": 1.466538429260254, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.636085626911315, | |
| "grad_norm": 0.3397771716117859, | |
| "learning_rate": 1.360550242039024e-06, | |
| "loss": 1.3562582731246948, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.6391437308868504, | |
| "grad_norm": 0.282279908657074, | |
| "learning_rate": 1.354621383927379e-06, | |
| "loss": 1.4752657413482666, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.6422018348623855, | |
| "grad_norm": 0.3183048963546753, | |
| "learning_rate": 1.3487396775659691e-06, | |
| "loss": 1.4154858589172363, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.6452599388379205, | |
| "grad_norm": 0.4210142493247986, | |
| "learning_rate": 1.3429051898564623e-06, | |
| "loss": 1.3750901222229004, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.6483180428134556, | |
| "grad_norm": 0.6870266795158386, | |
| "learning_rate": 1.337117987163439e-06, | |
| "loss": 1.5814931392669678, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.6513761467889907, | |
| "grad_norm": 0.4824894964694977, | |
| "learning_rate": 1.3313781353136329e-06, | |
| "loss": 1.2281584739685059, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.6544342507645258, | |
| "grad_norm": 0.2543982267379761, | |
| "learning_rate": 1.3256856995951852e-06, | |
| "loss": 1.0042641162872314, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.6574923547400613, | |
| "grad_norm": 0.39150846004486084, | |
| "learning_rate": 1.3200407447568985e-06, | |
| "loss": 1.6282243728637695, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.6605504587155964, | |
| "grad_norm": 0.43744921684265137, | |
| "learning_rate": 1.3144433350075045e-06, | |
| "loss": 1.419670820236206, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.6636085626911314, | |
| "grad_norm": 0.5169599652290344, | |
| "learning_rate": 1.3088935340149312e-06, | |
| "loss": 1.5492973327636719, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.3686998188495636, | |
| "learning_rate": 1.3033914049055776e-06, | |
| "loss": 1.390296459197998, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.669724770642202, | |
| "grad_norm": 0.3961811363697052, | |
| "learning_rate": 1.2979370102636001e-06, | |
| "loss": 1.6185352802276611, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.672782874617737, | |
| "grad_norm": 0.4181622266769409, | |
| "learning_rate": 1.2925304121301956e-06, | |
| "loss": 1.47446608543396, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.675840978593272, | |
| "grad_norm": 0.5175849199295044, | |
| "learning_rate": 1.2871716720029001e-06, | |
| "loss": 1.4941065311431885, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.6788990825688073, | |
| "grad_norm": 0.4671924412250519, | |
| "learning_rate": 1.2818608508348831e-06, | |
| "loss": 1.3738720417022705, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.6819571865443423, | |
| "grad_norm": 0.31229135394096375, | |
| "learning_rate": 1.2765980090342638e-06, | |
| "loss": 1.0343739986419678, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.6850152905198774, | |
| "grad_norm": 0.5780667662620544, | |
| "learning_rate": 1.2713832064634127e-06, | |
| "loss": 1.4987692832946777, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.688073394495413, | |
| "grad_norm": 0.29605942964553833, | |
| "learning_rate": 1.2662165024382813e-06, | |
| "loss": 1.4711230993270874, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.691131498470948, | |
| "grad_norm": 0.4572795629501343, | |
| "learning_rate": 1.2610979557277186e-06, | |
| "loss": 1.4898228645324707, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.694189602446483, | |
| "grad_norm": 0.5139583945274353, | |
| "learning_rate": 1.2560276245528099e-06, | |
| "loss": 1.4924449920654297, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.6972477064220186, | |
| "grad_norm": 0.3455151319503784, | |
| "learning_rate": 1.251005566586209e-06, | |
| "loss": 1.3008229732513428, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.7003058103975537, | |
| "grad_norm": 0.5034812092781067, | |
| "learning_rate": 1.2460318389514868e-06, | |
| "loss": 1.5259795188903809, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.7033639143730888, | |
| "grad_norm": 0.55739825963974, | |
| "learning_rate": 1.241106498222476e-06, | |
| "loss": 1.610971212387085, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.706422018348624, | |
| "grad_norm": 0.3922676146030426, | |
| "learning_rate": 1.2362296004226327e-06, | |
| "loss": 1.3188968896865845, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.709480122324159, | |
| "grad_norm": 0.4953126311302185, | |
| "learning_rate": 1.2314012010243973e-06, | |
| "loss": 1.5828558206558228, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.712538226299694, | |
| "grad_norm": 0.6791023015975952, | |
| "learning_rate": 1.2266213549485638e-06, | |
| "loss": 1.3703022003173828, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.7155963302752295, | |
| "grad_norm": 0.37211811542510986, | |
| "learning_rate": 1.2218901165636526e-06, | |
| "loss": 1.504420280456543, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.7186544342507646, | |
| "grad_norm": 0.2997111678123474, | |
| "learning_rate": 1.2172075396852972e-06, | |
| "loss": 1.442054271697998, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.7217125382262997, | |
| "grad_norm": 0.3290131390094757, | |
| "learning_rate": 1.212573677575627e-06, | |
| "loss": 1.5728079080581665, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.7247706422018347, | |
| "grad_norm": 0.3726375102996826, | |
| "learning_rate": 1.2079885829426653e-06, | |
| "loss": 1.6637623310089111, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.7278287461773703, | |
| "grad_norm": 0.7502315640449524, | |
| "learning_rate": 1.2034523079397264e-06, | |
| "loss": 1.550297737121582, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.7308868501529053, | |
| "grad_norm": 0.3677420914173126, | |
| "learning_rate": 1.1989649041648244e-06, | |
| "loss": 1.3913054466247559, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.7339449541284404, | |
| "grad_norm": 0.6194299459457397, | |
| "learning_rate": 1.1945264226600878e-06, | |
| "loss": 1.49534010887146, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.7370030581039755, | |
| "grad_norm": 0.42255425453186035, | |
| "learning_rate": 1.1901369139111737e-06, | |
| "loss": 1.5017262697219849, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.7400611620795106, | |
| "grad_norm": 0.39475998282432556, | |
| "learning_rate": 1.1857964278467003e-06, | |
| "loss": 1.4985376596450806, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.7431192660550456, | |
| "grad_norm": 0.4835125207901001, | |
| "learning_rate": 1.1815050138376731e-06, | |
| "loss": 1.513980746269226, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.746177370030581, | |
| "grad_norm": 0.27400922775268555, | |
| "learning_rate": 1.1772627206969286e-06, | |
| "loss": 1.5117716789245605, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.7492354740061162, | |
| "grad_norm": 0.35452115535736084, | |
| "learning_rate": 1.1730695966785726e-06, | |
| "loss": 1.3024158477783203, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.7522935779816513, | |
| "grad_norm": 0.45254552364349365, | |
| "learning_rate": 1.1689256894774384e-06, | |
| "loss": 1.3760697841644287, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.7553516819571864, | |
| "grad_norm": 0.6041072010993958, | |
| "learning_rate": 1.1648310462285386e-06, | |
| "loss": 1.298436164855957, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.758409785932722, | |
| "grad_norm": 0.555728554725647, | |
| "learning_rate": 1.1607857135065337e-06, | |
| "loss": 1.3885629177093506, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.761467889908257, | |
| "grad_norm": 0.5937597751617432, | |
| "learning_rate": 1.1567897373251967e-06, | |
| "loss": 1.3754394054412842, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.764525993883792, | |
| "grad_norm": 0.35898932814598083, | |
| "learning_rate": 1.1528431631368957e-06, | |
| "loss": 1.2469127178192139, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.767584097859327, | |
| "grad_norm": 0.24282048642635345, | |
| "learning_rate": 1.1489460358320728e-06, | |
| "loss": 0.9015558958053589, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.770642201834862, | |
| "grad_norm": 0.27484798431396484, | |
| "learning_rate": 1.1450983997387365e-06, | |
| "loss": 1.2076148986816406, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.7737003058103973, | |
| "grad_norm": 0.29970651865005493, | |
| "learning_rate": 1.1413002986219528e-06, | |
| "loss": 1.2744965553283691, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.776758409785933, | |
| "grad_norm": 0.26047366857528687, | |
| "learning_rate": 1.1375517756833534e-06, | |
| "loss": 1.3271204233169556, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.779816513761468, | |
| "grad_norm": 0.3544829785823822, | |
| "learning_rate": 1.1338528735606391e-06, | |
| "loss": 1.3407413959503174, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.782874617737003, | |
| "grad_norm": 0.24868814647197723, | |
| "learning_rate": 1.1302036343270996e-06, | |
| "loss": 1.4030461311340332, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.7859327217125385, | |
| "grad_norm": 0.30862292647361755, | |
| "learning_rate": 1.12660409949113e-06, | |
| "loss": 1.3144700527191162, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.7889908256880735, | |
| "grad_norm": 0.9225071668624878, | |
| "learning_rate": 1.1230543099957608e-06, | |
| "loss": 1.338538646697998, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.7920489296636086, | |
| "grad_norm": 0.32354745268821716, | |
| "learning_rate": 1.1195543062181954e-06, | |
| "loss": 1.310173749923706, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.7951070336391437, | |
| "grad_norm": 0.24064457416534424, | |
| "learning_rate": 1.1161041279693445e-06, | |
| "loss": 1.3204376697540283, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.7981651376146788, | |
| "grad_norm": 0.23651309311389923, | |
| "learning_rate": 1.1127038144933787e-06, | |
| "loss": 1.281717300415039, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.801223241590214, | |
| "grad_norm": 0.21533581614494324, | |
| "learning_rate": 1.1093534044672796e-06, | |
| "loss": 1.3252437114715576, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.8042813455657494, | |
| "grad_norm": 0.38182252645492554, | |
| "learning_rate": 1.1060529360004003e-06, | |
| "loss": 1.27931809425354, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.8073394495412844, | |
| "grad_norm": 0.12391169369220734, | |
| "learning_rate": 1.1028024466340305e-06, | |
| "loss": 1.1552488803863525, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.8103975535168195, | |
| "grad_norm": 0.17293956875801086, | |
| "learning_rate": 1.0996019733409732e-06, | |
| "loss": 1.2036254405975342, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.8134556574923546, | |
| "grad_norm": 0.21059419214725494, | |
| "learning_rate": 1.096451552525121e-06, | |
| "loss": 0.9850409030914307, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.81651376146789, | |
| "grad_norm": 0.2714180648326874, | |
| "learning_rate": 1.093351220021043e-06, | |
| "loss": 1.2215778827667236, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.819571865443425, | |
| "grad_norm": 0.22156941890716553, | |
| "learning_rate": 1.090301011093575e-06, | |
| "loss": 1.2629544734954834, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.8226299694189603, | |
| "grad_norm": 0.20625340938568115, | |
| "learning_rate": 1.0873009604374246e-06, | |
| "loss": 1.2778034210205078, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.8256880733944953, | |
| "grad_norm": 0.29442811012268066, | |
| "learning_rate": 1.084351102176769e-06, | |
| "loss": 1.2413357496261597, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.8287461773700304, | |
| "grad_norm": 0.18544712662696838, | |
| "learning_rate": 1.081451469864872e-06, | |
| "loss": 1.2637240886688232, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.8318042813455655, | |
| "grad_norm": 0.22874392569065094, | |
| "learning_rate": 1.0786020964836991e-06, | |
| "loss": 1.2410205602645874, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.834862385321101, | |
| "grad_norm": 0.2457342892885208, | |
| "learning_rate": 1.075803014443546e-06, | |
| "loss": 1.2094589471817017, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.837920489296636, | |
| "grad_norm": 0.22759026288986206, | |
| "learning_rate": 1.0730542555826654e-06, | |
| "loss": 1.274350643157959, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.840978593272171, | |
| "grad_norm": 0.206235870718956, | |
| "learning_rate": 1.07035585116691e-06, | |
| "loss": 1.245356559753418, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.8440366972477067, | |
| "grad_norm": 0.49194467067718506, | |
| "learning_rate": 1.0677078318893716e-06, | |
| "loss": 1.2151732444763184, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.8470948012232418, | |
| "grad_norm": 0.33920061588287354, | |
| "learning_rate": 1.0651102278700364e-06, | |
| "loss": 1.2073887586593628, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.850152905198777, | |
| "grad_norm": 0.25718092918395996, | |
| "learning_rate": 1.062563068655439e-06, | |
| "loss": 1.2325494289398193, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.853211009174312, | |
| "grad_norm": 0.24365228414535522, | |
| "learning_rate": 1.0600663832183293e-06, | |
| "loss": 1.2226455211639404, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.856269113149847, | |
| "grad_norm": 0.19332216680049896, | |
| "learning_rate": 1.0576201999573405e-06, | |
| "loss": 1.1831451654434204, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.859327217125382, | |
| "grad_norm": 0.25319862365722656, | |
| "learning_rate": 1.0552245466966678e-06, | |
| "loss": 1.2440452575683594, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.8623853211009176, | |
| "grad_norm": 0.27022072672843933, | |
| "learning_rate": 1.0528794506857508e-06, | |
| "loss": 1.2725245952606201, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.8654434250764527, | |
| "grad_norm": 0.3112826943397522, | |
| "learning_rate": 1.050584938598963e-06, | |
| "loss": 1.282654047012329, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.8685015290519877, | |
| "grad_norm": 0.2421792596578598, | |
| "learning_rate": 1.048341036535311e-06, | |
| "loss": 1.273242712020874, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.871559633027523, | |
| "grad_norm": 0.23541022837162018, | |
| "learning_rate": 1.0461477700181355e-06, | |
| "loss": 1.2899906635284424, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.8746177370030583, | |
| "grad_norm": 0.2772025167942047, | |
| "learning_rate": 1.044005163994821e-06, | |
| "loss": 1.2756202220916748, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.8776758409785934, | |
| "grad_norm": 0.47361937165260315, | |
| "learning_rate": 1.0419132428365116e-06, | |
| "loss": 1.2930552959442139, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.8807339449541285, | |
| "grad_norm": 0.18241485953330994, | |
| "learning_rate": 1.0398720303378374e-06, | |
| "loss": 1.223031997680664, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.8837920489296636, | |
| "grad_norm": 0.40437427163124084, | |
| "learning_rate": 1.0378815497166385e-06, | |
| "loss": 1.2670063972473145, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.8868501529051986, | |
| "grad_norm": 0.22389701008796692, | |
| "learning_rate": 1.0359418236137047e-06, | |
| "loss": 1.2270456552505493, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.8899082568807337, | |
| "grad_norm": 0.29309970140457153, | |
| "learning_rate": 1.0340528740925169e-06, | |
| "loss": 1.2563271522521973, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.8929663608562692, | |
| "grad_norm": 0.24637004733085632, | |
| "learning_rate": 1.0322147226389952e-06, | |
| "loss": 1.2668583393096924, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.8960244648318043, | |
| "grad_norm": 0.5765001177787781, | |
| "learning_rate": 1.0304273901612566e-06, | |
| "loss": 1.2873437404632568, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.8990825688073394, | |
| "grad_norm": 0.3287610411643982, | |
| "learning_rate": 1.028690896989375e-06, | |
| "loss": 1.274024248123169, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.9021406727828745, | |
| "grad_norm": 0.2688363492488861, | |
| "learning_rate": 1.027005262875151e-06, | |
| "loss": 1.20585036277771, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.90519877675841, | |
| "grad_norm": 0.3984238803386688, | |
| "learning_rate": 1.0253705069918865e-06, | |
| "loss": 1.2360919713974, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.908256880733945, | |
| "grad_norm": 0.27637046575546265, | |
| "learning_rate": 1.0237866479341687e-06, | |
| "loss": 1.2752952575683594, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.91131498470948, | |
| "grad_norm": 0.5071486234664917, | |
| "learning_rate": 1.0222537037176572e-06, | |
| "loss": 1.2954089641571045, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.914373088685015, | |
| "grad_norm": 0.22012606263160706, | |
| "learning_rate": 1.0207716917788768e-06, | |
| "loss": 1.2765629291534424, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.9174311926605503, | |
| "grad_norm": 0.20149464905261993, | |
| "learning_rate": 1.019340628975023e-06, | |
| "loss": 1.2535219192504883, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.9204892966360854, | |
| "grad_norm": 0.227265864610672, | |
| "learning_rate": 1.0179605315837695e-06, | |
| "loss": 1.2175259590148926, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.923547400611621, | |
| "grad_norm": 0.2566111087799072, | |
| "learning_rate": 1.0166314153030799e-06, | |
| "loss": 1.255599856376648, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.926605504587156, | |
| "grad_norm": 0.38341450691223145, | |
| "learning_rate": 1.0153532952510328e-06, | |
| "loss": 1.2794301509857178, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.929663608562691, | |
| "grad_norm": 0.28000977635383606, | |
| "learning_rate": 1.0141261859656484e-06, | |
| "loss": 1.2272768020629883, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.9327217125382266, | |
| "grad_norm": 0.2550158202648163, | |
| "learning_rate": 1.0129501014047236e-06, | |
| "loss": 1.2561171054840088, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.9357798165137616, | |
| "grad_norm": 0.21566316485404968, | |
| "learning_rate": 1.0118250549456717e-06, | |
| "loss": 1.2545552253723145, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.9388379204892967, | |
| "grad_norm": 0.36798691749572754, | |
| "learning_rate": 1.0107510593853716e-06, | |
| "loss": 1.3016841411590576, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.941896024464832, | |
| "grad_norm": 0.29115161299705505, | |
| "learning_rate": 1.0097281269400234e-06, | |
| "loss": 1.3122904300689697, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.944954128440367, | |
| "grad_norm": 0.42286819219589233, | |
| "learning_rate": 1.0087562692450062e-06, | |
| "loss": 1.2751294374465942, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.948012232415902, | |
| "grad_norm": 0.29917454719543457, | |
| "learning_rate": 1.0078354973547484e-06, | |
| "loss": 1.2971951961517334, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.9510703363914375, | |
| "grad_norm": 0.28312069177627563, | |
| "learning_rate": 1.0069658217426017e-06, | |
| "loss": 1.2662827968597412, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.9541284403669725, | |
| "grad_norm": 0.2748239040374756, | |
| "learning_rate": 1.0061472523007213e-06, | |
| "loss": 1.209917664527893, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.9571865443425076, | |
| "grad_norm": 0.36147835850715637, | |
| "learning_rate": 1.0053797983399524e-06, | |
| "loss": 1.2387361526489258, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.9602446483180427, | |
| "grad_norm": 0.34865546226501465, | |
| "learning_rate": 1.004663468589726e-06, | |
| "loss": 1.2596259117126465, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.963302752293578, | |
| "grad_norm": 0.23798368871212006, | |
| "learning_rate": 1.0039982711979603e-06, | |
| "loss": 1.239612340927124, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.9663608562691133, | |
| "grad_norm": 0.31115320324897766, | |
| "learning_rate": 1.0033842137309649e-06, | |
| "loss": 1.2498747110366821, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.9694189602446484, | |
| "grad_norm": 0.37815067172050476, | |
| "learning_rate": 1.0028213031733578e-06, | |
| "loss": 1.3014090061187744, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.9724770642201834, | |
| "grad_norm": 0.26476937532424927, | |
| "learning_rate": 1.0023095459279838e-06, | |
| "loss": 1.2854735851287842, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.9755351681957185, | |
| "grad_norm": 0.3802984952926636, | |
| "learning_rate": 1.0018489478158434e-06, | |
| "loss": 1.3032188415527344, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.9785932721712536, | |
| "grad_norm": 0.3544924855232239, | |
| "learning_rate": 1.0014395140760255e-06, | |
| "loss": 1.2610487937927246, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.981651376146789, | |
| "grad_norm": 0.30221831798553467, | |
| "learning_rate": 1.0010812493656488e-06, | |
| "loss": 1.2582671642303467, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.984709480122324, | |
| "grad_norm": 0.2731051743030548, | |
| "learning_rate": 1.000774157759806e-06, | |
| "loss": 1.2794151306152344, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.9877675840978593, | |
| "grad_norm": 0.3089560270309448, | |
| "learning_rate": 1.0005182427515222e-06, | |
| "loss": 1.334507703781128, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.9908256880733948, | |
| "grad_norm": 0.31155917048454285, | |
| "learning_rate": 1.0003135072517108e-06, | |
| "loss": 1.3732435703277588, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.99388379204893, | |
| "grad_norm": 0.3963629901409149, | |
| "learning_rate": 1.000159953589143e-06, | |
| "loss": 1.6014021635055542, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.996941896024465, | |
| "grad_norm": 0.8739917278289795, | |
| "learning_rate": 1.00005758351042e-06, | |
| "loss": 1.5767264366149902, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.2575660943984985, | |
| "learning_rate": 1.0000063981799541e-06, | |
| "loss": 1.7074545621871948, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1962, | |
| "total_flos": 2.4882019125669396e+18, | |
| "train_loss": 1.4736498374943825, | |
| "train_runtime": 8380.6004, | |
| "train_samples_per_second": 3.746, | |
| "train_steps_per_second": 0.234 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1962, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 9999999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.4882019125669396e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |