Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-114 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-114 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-114") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-114") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-114") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-114 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-114" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-114", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-114
- SGLang
How to use furproxy/9b-114 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-114" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-114", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-114" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-114", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-114 with Docker Model Runner:
docker model run hf.co/furproxy/9b-114
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2457, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002442002442002442, | |
| "grad_norm": 2.3893706798553467, | |
| "learning_rate": 8.130081300813009e-08, | |
| "loss": 1.9268020391464233, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004884004884004884, | |
| "grad_norm": 2.7525486946105957, | |
| "learning_rate": 2.439024390243903e-07, | |
| "loss": 2.098186492919922, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007326007326007326, | |
| "grad_norm": 11.947481155395508, | |
| "learning_rate": 4.0650406504065046e-07, | |
| "loss": 2.4138333797454834, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.009768009768009768, | |
| "grad_norm": 2.4891531467437744, | |
| "learning_rate": 5.691056910569106e-07, | |
| "loss": 1.953867793083191, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01221001221001221, | |
| "grad_norm": 3.49385666847229, | |
| "learning_rate": 7.317073170731707e-07, | |
| "loss": 2.1249871253967285, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014652014652014652, | |
| "grad_norm": 9.621060371398926, | |
| "learning_rate": 8.94308943089431e-07, | |
| "loss": 1.870603322982788, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.017094017094017096, | |
| "grad_norm": 1.2622815370559692, | |
| "learning_rate": 1.0569105691056912e-06, | |
| "loss": 1.646697998046875, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.019536019536019536, | |
| "grad_norm": 16.232858657836914, | |
| "learning_rate": 1.2195121951219514e-06, | |
| "loss": 1.6898235082626343, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 10.059346199035645, | |
| "learning_rate": 1.3821138211382116e-06, | |
| "loss": 1.8439620733261108, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02442002442002442, | |
| "grad_norm": 3.1513400077819824, | |
| "learning_rate": 1.5447154471544717e-06, | |
| "loss": 1.6882305145263672, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.026862026862026864, | |
| "grad_norm": 5.707210540771484, | |
| "learning_rate": 1.707317073170732e-06, | |
| "loss": 1.4086613655090332, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.029304029304029304, | |
| "grad_norm": 1.5729589462280273, | |
| "learning_rate": 1.8699186991869919e-06, | |
| "loss": 1.22359037399292, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.031746031746031744, | |
| "grad_norm": 1.6034835577011108, | |
| "learning_rate": 2.0325203252032523e-06, | |
| "loss": 1.6794222593307495, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03418803418803419, | |
| "grad_norm": 4.907107353210449, | |
| "learning_rate": 2.1951219512195125e-06, | |
| "loss": 1.7425767183303833, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 3.0787065029144287, | |
| "learning_rate": 2.3577235772357727e-06, | |
| "loss": 1.1433881521224976, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03907203907203907, | |
| "grad_norm": 2.307734966278076, | |
| "learning_rate": 2.5203252032520324e-06, | |
| "loss": 1.0014692544937134, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04151404151404151, | |
| "grad_norm": 2.102328062057495, | |
| "learning_rate": 2.682926829268293e-06, | |
| "loss": 1.558118224143982, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 2.508723020553589, | |
| "learning_rate": 2.845528455284553e-06, | |
| "loss": 1.2752659320831299, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0463980463980464, | |
| "grad_norm": 1.2697498798370361, | |
| "learning_rate": 3.0081300813008134e-06, | |
| "loss": 1.5238615274429321, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04884004884004884, | |
| "grad_norm": 9.991412162780762, | |
| "learning_rate": 3.1707317073170736e-06, | |
| "loss": 1.3837251663208008, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 1.6207857131958008, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.4677042961120605, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05372405372405373, | |
| "grad_norm": 1.4186246395111084, | |
| "learning_rate": 3.495934959349594e-06, | |
| "loss": 1.4204754829406738, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.05616605616605617, | |
| "grad_norm": 3.8226752281188965, | |
| "learning_rate": 3.6585365853658537e-06, | |
| "loss": 1.3837416172027588, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05860805860805861, | |
| "grad_norm": 2.2121241092681885, | |
| "learning_rate": 3.821138211382115e-06, | |
| "loss": 1.0739210844039917, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06105006105006105, | |
| "grad_norm": 2.6986029148101807, | |
| "learning_rate": 3.983739837398374e-06, | |
| "loss": 0.9827917814254761, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06349206349206349, | |
| "grad_norm": 1.9821456670761108, | |
| "learning_rate": 4.146341463414634e-06, | |
| "loss": 1.3439877033233643, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06593406593406594, | |
| "grad_norm": 20.799144744873047, | |
| "learning_rate": 4.308943089430894e-06, | |
| "loss": 1.0445363521575928, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06837606837606838, | |
| "grad_norm": 2.018078088760376, | |
| "learning_rate": 4.471544715447155e-06, | |
| "loss": 1.3443750143051147, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07081807081807082, | |
| "grad_norm": 1.7203161716461182, | |
| "learning_rate": 4.634146341463416e-06, | |
| "loss": 1.3475321531295776, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 1.489026665687561, | |
| "learning_rate": 4.796747967479675e-06, | |
| "loss": 1.4037724733352661, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0757020757020757, | |
| "grad_norm": 5.072752475738525, | |
| "learning_rate": 4.959349593495935e-06, | |
| "loss": 1.0292410850524902, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07814407814407814, | |
| "grad_norm": 1.9865821599960327, | |
| "learning_rate": 5.121951219512195e-06, | |
| "loss": 1.6453887224197388, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08058608058608059, | |
| "grad_norm": 8.150779724121094, | |
| "learning_rate": 5.2845528455284555e-06, | |
| "loss": 1.1829452514648438, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08302808302808302, | |
| "grad_norm": 1.6878677606582642, | |
| "learning_rate": 5.447154471544716e-06, | |
| "loss": 1.1011936664581299, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 0.6670809388160706, | |
| "learning_rate": 5.609756097560977e-06, | |
| "loss": 1.1616472005844116, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08791208791208792, | |
| "grad_norm": 1.4617451429367065, | |
| "learning_rate": 5.772357723577237e-06, | |
| "loss": 1.3839240074157715, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09035409035409035, | |
| "grad_norm": 1.9361579418182373, | |
| "learning_rate": 5.934959349593496e-06, | |
| "loss": 1.4121818542480469, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0927960927960928, | |
| "grad_norm": 3.0737693309783936, | |
| "learning_rate": 6.0975609756097564e-06, | |
| "loss": 1.416529893875122, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 1.4835634231567383, | |
| "learning_rate": 6.260162601626017e-06, | |
| "loss": 1.222318410873413, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.09768009768009768, | |
| "grad_norm": 3.82383394241333, | |
| "learning_rate": 6.422764227642278e-06, | |
| "loss": 1.1652414798736572, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10012210012210013, | |
| "grad_norm": 13.640969276428223, | |
| "learning_rate": 6.585365853658538e-06, | |
| "loss": 1.0345308780670166, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 9.584553718566895, | |
| "learning_rate": 6.747967479674797e-06, | |
| "loss": 1.0803658962249756, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.10500610500610501, | |
| "grad_norm": 1.267685055732727, | |
| "learning_rate": 6.910569105691057e-06, | |
| "loss": 1.249168038368225, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.10744810744810745, | |
| "grad_norm": 1.5764743089675903, | |
| "learning_rate": 7.0731707317073175e-06, | |
| "loss": 1.4062129259109497, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.6901059746742249, | |
| "learning_rate": 7.2357723577235786e-06, | |
| "loss": 1.1516574621200562, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11233211233211234, | |
| "grad_norm": 2.498300552368164, | |
| "learning_rate": 7.398373983739838e-06, | |
| "loss": 1.4968205690383911, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11477411477411477, | |
| "grad_norm": 8.051702499389648, | |
| "learning_rate": 7.560975609756098e-06, | |
| "loss": 1.1234135627746582, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.11721611721611722, | |
| "grad_norm": 2.431464433670044, | |
| "learning_rate": 7.723577235772358e-06, | |
| "loss": 1.4027729034423828, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.11965811965811966, | |
| "grad_norm": 2.105727434158325, | |
| "learning_rate": 7.886178861788618e-06, | |
| "loss": 1.3487744331359863, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1221001221001221, | |
| "grad_norm": 2.983010768890381, | |
| "learning_rate": 8.048780487804879e-06, | |
| "loss": 1.074942708015442, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12454212454212454, | |
| "grad_norm": 3.559720039367676, | |
| "learning_rate": 8.21138211382114e-06, | |
| "loss": 1.0268707275390625, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.12698412698412698, | |
| "grad_norm": 1.4598705768585205, | |
| "learning_rate": 8.373983739837399e-06, | |
| "loss": 0.9993240833282471, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.12942612942612944, | |
| "grad_norm": 1.1988660097122192, | |
| "learning_rate": 8.536585365853658e-06, | |
| "loss": 0.9525761604309082, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13186813186813187, | |
| "grad_norm": 27.485122680664062, | |
| "learning_rate": 8.69918699186992e-06, | |
| "loss": 1.3531205654144287, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1343101343101343, | |
| "grad_norm": 2.1461856365203857, | |
| "learning_rate": 8.86178861788618e-06, | |
| "loss": 1.6010525226593018, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.13675213675213677, | |
| "grad_norm": 2.505549430847168, | |
| "learning_rate": 9.02439024390244e-06, | |
| "loss": 1.41323721408844, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1391941391941392, | |
| "grad_norm": 1.478813648223877, | |
| "learning_rate": 9.1869918699187e-06, | |
| "loss": 1.1879425048828125, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.14163614163614163, | |
| "grad_norm": 1.3980270624160767, | |
| "learning_rate": 9.34959349593496e-06, | |
| "loss": 1.3165570497512817, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.14407814407814407, | |
| "grad_norm": 3.0620999336242676, | |
| "learning_rate": 9.51219512195122e-06, | |
| "loss": 1.2242571115493774, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 1.2746002674102783, | |
| "learning_rate": 9.67479674796748e-06, | |
| "loss": 1.3259317874908447, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.14896214896214896, | |
| "grad_norm": 1.4318238496780396, | |
| "learning_rate": 9.837398373983741e-06, | |
| "loss": 1.3270224332809448, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1514041514041514, | |
| "grad_norm": 2.3391611576080322, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3085572719573975, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 1.5901521444320679, | |
| "learning_rate": 1.0162601626016262e-05, | |
| "loss": 1.1765004396438599, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1562881562881563, | |
| "grad_norm": 1.361893892288208, | |
| "learning_rate": 1.0325203252032521e-05, | |
| "loss": 1.4069057703018188, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 2.779815673828125, | |
| "learning_rate": 1.0487804878048782e-05, | |
| "loss": 1.2615665197372437, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16117216117216118, | |
| "grad_norm": 1.3855739831924438, | |
| "learning_rate": 1.065040650406504e-05, | |
| "loss": 1.0511776208877563, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.16361416361416362, | |
| "grad_norm": 1.6709128618240356, | |
| "learning_rate": 1.0813008130081301e-05, | |
| "loss": 1.1562919616699219, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.16605616605616605, | |
| "grad_norm": 2.7451095581054688, | |
| "learning_rate": 1.0975609756097562e-05, | |
| "loss": 1.1669853925704956, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1684981684981685, | |
| "grad_norm": 1.3229765892028809, | |
| "learning_rate": 1.1138211382113821e-05, | |
| "loss": 1.132803201675415, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 1.1329905986785889, | |
| "learning_rate": 1.1300813008130082e-05, | |
| "loss": 1.078572392463684, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.17338217338217338, | |
| "grad_norm": 1.5809731483459473, | |
| "learning_rate": 1.1463414634146342e-05, | |
| "loss": 1.3616305589675903, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.17582417582417584, | |
| "grad_norm": 1.0317999124526978, | |
| "learning_rate": 1.1626016260162603e-05, | |
| "loss": 1.1173185110092163, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.17826617826617827, | |
| "grad_norm": 2.4350783824920654, | |
| "learning_rate": 1.1788617886178864e-05, | |
| "loss": 0.9900561571121216, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1807081807081807, | |
| "grad_norm": 1.8645095825195312, | |
| "learning_rate": 1.1951219512195123e-05, | |
| "loss": 0.9898566007614136, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 2.060671091079712, | |
| "learning_rate": 1.2113821138211384e-05, | |
| "loss": 1.3455116748809814, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1855921855921856, | |
| "grad_norm": 4.001134395599365, | |
| "learning_rate": 1.2276422764227642e-05, | |
| "loss": 1.4696322679519653, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.18803418803418803, | |
| "grad_norm": 1.4498295783996582, | |
| "learning_rate": 1.2439024390243903e-05, | |
| "loss": 1.343530297279358, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 6.848788261413574, | |
| "learning_rate": 1.2601626016260164e-05, | |
| "loss": 1.6466281414031982, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.19291819291819293, | |
| "grad_norm": 3.0440895557403564, | |
| "learning_rate": 1.2764227642276423e-05, | |
| "loss": 1.1969726085662842, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.19536019536019536, | |
| "grad_norm": 3.5766472816467285, | |
| "learning_rate": 1.2926829268292684e-05, | |
| "loss": 0.961052656173706, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1978021978021978, | |
| "grad_norm": 4.323490619659424, | |
| "learning_rate": 1.3089430894308943e-05, | |
| "loss": 1.4117612838745117, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.20024420024420025, | |
| "grad_norm": 1.282266616821289, | |
| "learning_rate": 1.3252032520325204e-05, | |
| "loss": 1.319150447845459, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2026862026862027, | |
| "grad_norm": 1.7024965286254883, | |
| "learning_rate": 1.3414634146341466e-05, | |
| "loss": 1.3318078517913818, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 4.461455821990967, | |
| "learning_rate": 1.3577235772357725e-05, | |
| "loss": 1.1935322284698486, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.20757020757020758, | |
| "grad_norm": 4.874426364898682, | |
| "learning_rate": 1.3739837398373986e-05, | |
| "loss": 0.9753493666648865, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.21001221001221002, | |
| "grad_norm": 1.221576452255249, | |
| "learning_rate": 1.3902439024390244e-05, | |
| "loss": 1.0886809825897217, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.21245421245421245, | |
| "grad_norm": 1.046645998954773, | |
| "learning_rate": 1.4065040650406505e-05, | |
| "loss": 1.3587074279785156, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2148962148962149, | |
| "grad_norm": 1.0372843742370605, | |
| "learning_rate": 1.4227642276422766e-05, | |
| "loss": 1.2677640914916992, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.21733821733821734, | |
| "grad_norm": 3.766371250152588, | |
| "learning_rate": 1.4390243902439025e-05, | |
| "loss": 1.3696472644805908, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 0.8646840453147888, | |
| "learning_rate": 1.4552845528455286e-05, | |
| "loss": 1.0324885845184326, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 2.4293770790100098, | |
| "learning_rate": 1.4715447154471545e-05, | |
| "loss": 0.9206986427307129, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.22466422466422467, | |
| "grad_norm": 1.333274006843567, | |
| "learning_rate": 1.4878048780487806e-05, | |
| "loss": 1.4569449424743652, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2271062271062271, | |
| "grad_norm": 10.24266529083252, | |
| "learning_rate": 1.5040650406504067e-05, | |
| "loss": 1.3317943811416626, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.22954822954822954, | |
| "grad_norm": 1.4686931371688843, | |
| "learning_rate": 1.5203252032520327e-05, | |
| "loss": 1.4440950155258179, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.231990231990232, | |
| "grad_norm": 3.4735898971557617, | |
| "learning_rate": 1.5365853658536586e-05, | |
| "loss": 1.4389160871505737, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.23443223443223443, | |
| "grad_norm": 1.2915067672729492, | |
| "learning_rate": 1.5528455284552847e-05, | |
| "loss": 1.383222222328186, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.23687423687423687, | |
| "grad_norm": 1.2586745023727417, | |
| "learning_rate": 1.5691056910569108e-05, | |
| "loss": 1.3772218227386475, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.23931623931623933, | |
| "grad_norm": 5.940347194671631, | |
| "learning_rate": 1.585365853658537e-05, | |
| "loss": 1.152698040008545, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.24175824175824176, | |
| "grad_norm": 1.017399787902832, | |
| "learning_rate": 1.6016260162601627e-05, | |
| "loss": 1.3445426225662231, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2442002442002442, | |
| "grad_norm": 2.1003332138061523, | |
| "learning_rate": 1.6178861788617888e-05, | |
| "loss": 1.4353071451187134, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.24664224664224665, | |
| "grad_norm": 1.2850189208984375, | |
| "learning_rate": 1.6341463414634145e-05, | |
| "loss": 1.3451241254806519, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.2490842490842491, | |
| "grad_norm": 1.576464295387268, | |
| "learning_rate": 1.6504065040650406e-05, | |
| "loss": 1.0413107872009277, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2515262515262515, | |
| "grad_norm": 2.5853071212768555, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 1.319067120552063, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.25396825396825395, | |
| "grad_norm": 1.365488052368164, | |
| "learning_rate": 1.682926829268293e-05, | |
| "loss": 1.1813554763793945, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 2.6422295570373535, | |
| "learning_rate": 1.699186991869919e-05, | |
| "loss": 0.9516135454177856, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2588522588522589, | |
| "grad_norm": 3.197498321533203, | |
| "learning_rate": 1.7154471544715447e-05, | |
| "loss": 0.7080205678939819, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2612942612942613, | |
| "grad_norm": 0.6656016111373901, | |
| "learning_rate": 1.7317073170731708e-05, | |
| "loss": 1.2077349424362183, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.26373626373626374, | |
| "grad_norm": 1.1304625272750854, | |
| "learning_rate": 1.747967479674797e-05, | |
| "loss": 1.340700626373291, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2661782661782662, | |
| "grad_norm": 1.5265967845916748, | |
| "learning_rate": 1.7642276422764227e-05, | |
| "loss": 1.2717061042785645, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2686202686202686, | |
| "grad_norm": 1.4525116682052612, | |
| "learning_rate": 1.7804878048780488e-05, | |
| "loss": 1.3473409414291382, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.27106227106227104, | |
| "grad_norm": 1.32387113571167, | |
| "learning_rate": 1.796747967479675e-05, | |
| "loss": 1.0149593353271484, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.27350427350427353, | |
| "grad_norm": 3.1132187843322754, | |
| "learning_rate": 1.813008130081301e-05, | |
| "loss": 1.0153287649154663, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.27594627594627597, | |
| "grad_norm": 1.0930202007293701, | |
| "learning_rate": 1.829268292682927e-05, | |
| "loss": 1.3861629962921143, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2783882783882784, | |
| "grad_norm": 1.287597417831421, | |
| "learning_rate": 1.845528455284553e-05, | |
| "loss": 1.4393969774246216, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.28083028083028083, | |
| "grad_norm": 2.620121717453003, | |
| "learning_rate": 1.861788617886179e-05, | |
| "loss": 1.3899247646331787, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.28327228327228327, | |
| "grad_norm": 8.519104957580566, | |
| "learning_rate": 1.878048780487805e-05, | |
| "loss": 1.5776143074035645, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 1.6813069581985474, | |
| "learning_rate": 1.8943089430894312e-05, | |
| "loss": 1.1832704544067383, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.28815628815628813, | |
| "grad_norm": 0.8178291320800781, | |
| "learning_rate": 1.9105691056910573e-05, | |
| "loss": 1.4064499139785767, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2905982905982906, | |
| "grad_norm": 1.324826717376709, | |
| "learning_rate": 1.926829268292683e-05, | |
| "loss": 1.3489106893539429, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 1.8617048263549805, | |
| "learning_rate": 1.943089430894309e-05, | |
| "loss": 1.2338833808898926, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2954822954822955, | |
| "grad_norm": 1.1062006950378418, | |
| "learning_rate": 1.959349593495935e-05, | |
| "loss": 1.1017166376113892, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2979242979242979, | |
| "grad_norm": 1.151795744895935, | |
| "learning_rate": 1.975609756097561e-05, | |
| "loss": 1.4224822521209717, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.30036630036630035, | |
| "grad_norm": 1.1693967580795288, | |
| "learning_rate": 1.991869918699187e-05, | |
| "loss": 0.9105122089385986, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3028083028083028, | |
| "grad_norm": 2.165432929992676, | |
| "learning_rate": 1.9999990914795638e-05, | |
| "loss": 1.3537715673446655, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3052503052503053, | |
| "grad_norm": 2.523041009902954, | |
| "learning_rate": 1.9999918233270764e-05, | |
| "loss": 1.1235604286193848, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 4.936850547790527, | |
| "learning_rate": 1.999977287080797e-05, | |
| "loss": 1.2547414302825928, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.31013431013431014, | |
| "grad_norm": 1.219511866569519, | |
| "learning_rate": 1.9999554828581173e-05, | |
| "loss": 1.4373202323913574, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3125763125763126, | |
| "grad_norm": 1.137669324874878, | |
| "learning_rate": 1.9999264108351216e-05, | |
| "loss": 1.3956284523010254, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.315018315018315, | |
| "grad_norm": 1.6814566850662231, | |
| "learning_rate": 1.999890071246588e-05, | |
| "loss": 1.4139020442962646, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 1.1596673727035522, | |
| "learning_rate": 1.9998464643859853e-05, | |
| "loss": 1.3567984104156494, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3199023199023199, | |
| "grad_norm": 2.8471524715423584, | |
| "learning_rate": 1.999795590605471e-05, | |
| "loss": 1.6041795015335083, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.32234432234432236, | |
| "grad_norm": 3.1703484058380127, | |
| "learning_rate": 1.9997374503158877e-05, | |
| "loss": 0.9505234956741333, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3247863247863248, | |
| "grad_norm": 1.568231463432312, | |
| "learning_rate": 1.9996720439867617e-05, | |
| "loss": 1.1375908851623535, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.32722832722832723, | |
| "grad_norm": 1.1084926128387451, | |
| "learning_rate": 1.9995993721462966e-05, | |
| "loss": 1.5744917392730713, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 2.8259096145629883, | |
| "learning_rate": 1.9995194353813707e-05, | |
| "loss": 1.1887890100479126, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3321123321123321, | |
| "grad_norm": 1.1199963092803955, | |
| "learning_rate": 1.999432234337532e-05, | |
| "loss": 1.4438523054122925, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.33455433455433453, | |
| "grad_norm": 3.761988878250122, | |
| "learning_rate": 1.999337769718993e-05, | |
| "loss": 1.2220399379730225, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.336996336996337, | |
| "grad_norm": 1.841293454170227, | |
| "learning_rate": 1.9992360422886246e-05, | |
| "loss": 1.1481637954711914, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.33943833943833945, | |
| "grad_norm": 1.215539813041687, | |
| "learning_rate": 1.9991270528679508e-05, | |
| "loss": 1.5834959745407104, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 0.9015586972236633, | |
| "learning_rate": 1.9990108023371403e-05, | |
| "loss": 1.4441936016082764, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3443223443223443, | |
| "grad_norm": 1.5563743114471436, | |
| "learning_rate": 1.9988872916350022e-05, | |
| "loss": 1.376705288887024, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.34676434676434675, | |
| "grad_norm": 7.034574031829834, | |
| "learning_rate": 1.9987565217589756e-05, | |
| "loss": 1.4534231424331665, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3492063492063492, | |
| "grad_norm": 0.9118156433105469, | |
| "learning_rate": 1.9986184937651227e-05, | |
| "loss": 1.2641198635101318, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3516483516483517, | |
| "grad_norm": 3.323513984680176, | |
| "learning_rate": 1.9984732087681215e-05, | |
| "loss": 1.834381341934204, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3540903540903541, | |
| "grad_norm": 3.959578037261963, | |
| "learning_rate": 1.9983206679412542e-05, | |
| "loss": 1.1039708852767944, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.35653235653235654, | |
| "grad_norm": 0.8664885759353638, | |
| "learning_rate": 1.9981608725164002e-05, | |
| "loss": 1.4267356395721436, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 1.372922658920288, | |
| "learning_rate": 1.9979938237840247e-05, | |
| "loss": 1.198704481124878, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3614163614163614, | |
| "grad_norm": 2.058027982711792, | |
| "learning_rate": 1.9978195230931686e-05, | |
| "loss": 1.1538225412368774, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.36385836385836384, | |
| "grad_norm": 4.946676254272461, | |
| "learning_rate": 1.997637971851438e-05, | |
| "loss": 1.5473830699920654, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 2.0882294178009033, | |
| "learning_rate": 1.9974491715249917e-05, | |
| "loss": 1.357876181602478, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.36874236874236876, | |
| "grad_norm": 3.573915958404541, | |
| "learning_rate": 1.9972531236385314e-05, | |
| "loss": 1.0178381204605103, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3711843711843712, | |
| "grad_norm": 1.7873722314834595, | |
| "learning_rate": 1.997049829775287e-05, | |
| "loss": 1.327938199043274, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.37362637362637363, | |
| "grad_norm": 1.3761481046676636, | |
| "learning_rate": 1.996839291577006e-05, | |
| "loss": 1.4819612503051758, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.37606837606837606, | |
| "grad_norm": 1.9104338884353638, | |
| "learning_rate": 1.996621510743938e-05, | |
| "loss": 1.418102741241455, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3785103785103785, | |
| "grad_norm": 1.1609731912612915, | |
| "learning_rate": 1.9963964890348236e-05, | |
| "loss": 1.4227708578109741, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 3.0023646354675293, | |
| "learning_rate": 1.9961642282668776e-05, | |
| "loss": 1.1034045219421387, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.3833943833943834, | |
| "grad_norm": 4.011119842529297, | |
| "learning_rate": 1.9959247303157763e-05, | |
| "loss": 1.4926037788391113, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.38583638583638585, | |
| "grad_norm": 4.095101356506348, | |
| "learning_rate": 1.995677997115641e-05, | |
| "loss": 0.8862283229827881, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3882783882783883, | |
| "grad_norm": 1.9095430374145508, | |
| "learning_rate": 1.9954240306590235e-05, | |
| "loss": 1.15045166015625, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3907203907203907, | |
| "grad_norm": 1.4787174463272095, | |
| "learning_rate": 1.9951628329968885e-05, | |
| "loss": 1.4402953386306763, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.39316239316239315, | |
| "grad_norm": 2.3421995639801025, | |
| "learning_rate": 1.9948944062385994e-05, | |
| "loss": 1.456636667251587, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.3956043956043956, | |
| "grad_norm": 1.1577990055084229, | |
| "learning_rate": 1.9946187525518986e-05, | |
| "loss": 1.4146589040756226, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.398046398046398, | |
| "grad_norm": 1.954940676689148, | |
| "learning_rate": 1.994335874162892e-05, | |
| "loss": 1.3673632144927979, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4004884004884005, | |
| "grad_norm": 1.90268874168396, | |
| "learning_rate": 1.9940457733560293e-05, | |
| "loss": 1.3601889610290527, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 4.163765907287598, | |
| "learning_rate": 1.993748452474088e-05, | |
| "loss": 0.9897390007972717, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4053724053724054, | |
| "grad_norm": 3.35142183303833, | |
| "learning_rate": 1.9934439139181516e-05, | |
| "loss": 0.6906993389129639, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4078144078144078, | |
| "grad_norm": 1.405617356300354, | |
| "learning_rate": 1.993132160147593e-05, | |
| "loss": 1.1328214406967163, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 1.5693705081939697, | |
| "learning_rate": 1.9928131936800514e-05, | |
| "loss": 1.4789706468582153, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4126984126984127, | |
| "grad_norm": 1.2349439859390259, | |
| "learning_rate": 1.9924870170914157e-05, | |
| "loss": 1.0828137397766113, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.41514041514041516, | |
| "grad_norm": 5.380833148956299, | |
| "learning_rate": 1.9921536330158007e-05, | |
| "loss": 1.1599012613296509, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4175824175824176, | |
| "grad_norm": 2.7839314937591553, | |
| "learning_rate": 1.9918130441455273e-05, | |
| "loss": 1.6682945489883423, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.42002442002442003, | |
| "grad_norm": 1.93392813205719, | |
| "learning_rate": 1.9914652532311005e-05, | |
| "loss": 0.9947870969772339, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.42246642246642246, | |
| "grad_norm": 1.5755698680877686, | |
| "learning_rate": 1.991110263081186e-05, | |
| "loss": 1.315640926361084, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.4249084249084249, | |
| "grad_norm": 2.16658878326416, | |
| "learning_rate": 1.9907480765625906e-05, | |
| "loss": 1.39967679977417, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 2.711895704269409, | |
| "learning_rate": 1.9903786966002352e-05, | |
| "loss": 0.9204920530319214, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4297924297924298, | |
| "grad_norm": 2.3947465419769287, | |
| "learning_rate": 1.9900021261771348e-05, | |
| "loss": 1.1823644638061523, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.43223443223443225, | |
| "grad_norm": 1.8362082242965698, | |
| "learning_rate": 1.9896183683343706e-05, | |
| "loss": 1.3596951961517334, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4346764346764347, | |
| "grad_norm": 2.1142735481262207, | |
| "learning_rate": 1.989227426171069e-05, | |
| "loss": 1.03623628616333, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.4371184371184371, | |
| "grad_norm": 4.102887153625488, | |
| "learning_rate": 1.9888293028443747e-05, | |
| "loss": 1.240249514579773, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 1.5868562459945679, | |
| "learning_rate": 1.9884240015694248e-05, | |
| "loss": 1.281577467918396, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.442002442002442, | |
| "grad_norm": 1.967654824256897, | |
| "learning_rate": 1.988011525619325e-05, | |
| "loss": 1.1424391269683838, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 1.7741585969924927, | |
| "learning_rate": 1.9875918783251207e-05, | |
| "loss": 1.2371528148651123, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4468864468864469, | |
| "grad_norm": 2.0856261253356934, | |
| "learning_rate": 1.9871650630757716e-05, | |
| "loss": 1.4550820589065552, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.44932844932844934, | |
| "grad_norm": 1.4822794198989868, | |
| "learning_rate": 1.9867310833181234e-05, | |
| "loss": 1.1890130043029785, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4517704517704518, | |
| "grad_norm": 0.6963343620300293, | |
| "learning_rate": 1.986289942556881e-05, | |
| "loss": 1.2029908895492554, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4542124542124542, | |
| "grad_norm": 1.5182689428329468, | |
| "learning_rate": 1.9858416443545794e-05, | |
| "loss": 1.3851736783981323, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.45665445665445664, | |
| "grad_norm": 1.8810380697250366, | |
| "learning_rate": 1.9853861923315555e-05, | |
| "loss": 1.0434424877166748, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4590964590964591, | |
| "grad_norm": 1.6905688047409058, | |
| "learning_rate": 1.984923590165918e-05, | |
| "loss": 1.301484227180481, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 2.4930872917175293, | |
| "learning_rate": 1.9844538415935187e-05, | |
| "loss": 1.0400949716567993, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.463980463980464, | |
| "grad_norm": 4.205483913421631, | |
| "learning_rate": 1.983976950407922e-05, | |
| "loss": 0.9666699767112732, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.46642246642246643, | |
| "grad_norm": 1.9438555240631104, | |
| "learning_rate": 1.983492920460373e-05, | |
| "loss": 1.3446414470672607, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.46886446886446886, | |
| "grad_norm": 1.6146860122680664, | |
| "learning_rate": 1.983001755659769e-05, | |
| "loss": 1.2357232570648193, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4713064713064713, | |
| "grad_norm": 2.4254696369171143, | |
| "learning_rate": 1.9825034599726263e-05, | |
| "loss": 1.2619645595550537, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.47374847374847373, | |
| "grad_norm": 2.857746124267578, | |
| "learning_rate": 1.9819980374230468e-05, | |
| "loss": 1.6904096603393555, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 1.4608720541000366, | |
| "learning_rate": 1.981485492092689e-05, | |
| "loss": 0.9965710639953613, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.47863247863247865, | |
| "grad_norm": 61.0188102722168, | |
| "learning_rate": 1.9809658281207318e-05, | |
| "loss": 0.9120445251464844, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4810744810744811, | |
| "grad_norm": 2.121208429336548, | |
| "learning_rate": 1.980439049703843e-05, | |
| "loss": 1.0203512907028198, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.4835164835164835, | |
| "grad_norm": 1.042589783668518, | |
| "learning_rate": 1.979905161096144e-05, | |
| "loss": 1.3058192729949951, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.48595848595848595, | |
| "grad_norm": 1.507728934288025, | |
| "learning_rate": 1.9793641666091773e-05, | |
| "loss": 1.3444452285766602, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4884004884004884, | |
| "grad_norm": 4.658176422119141, | |
| "learning_rate": 1.9788160706118698e-05, | |
| "loss": 0.6673938035964966, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4908424908424908, | |
| "grad_norm": 1.1496187448501587, | |
| "learning_rate": 1.978260877530499e-05, | |
| "loss": 1.3050227165222168, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4932844932844933, | |
| "grad_norm": 0.8402596712112427, | |
| "learning_rate": 1.9776985918486552e-05, | |
| "loss": 1.4215201139450073, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.49572649572649574, | |
| "grad_norm": 8.28558349609375, | |
| "learning_rate": 1.9771292181072076e-05, | |
| "loss": 0.8944355845451355, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4981684981684982, | |
| "grad_norm": 2.735724449157715, | |
| "learning_rate": 1.9765527609042676e-05, | |
| "loss": 1.0254771709442139, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5006105006105006, | |
| "grad_norm": 1.9720531702041626, | |
| "learning_rate": 1.9759692248951482e-05, | |
| "loss": 1.3571816682815552, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.503052503052503, | |
| "grad_norm": 14.514373779296875, | |
| "learning_rate": 1.975378614792332e-05, | |
| "loss": 0.6523332595825195, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5054945054945055, | |
| "grad_norm": 1.5351808071136475, | |
| "learning_rate": 1.9747809353654276e-05, | |
| "loss": 1.3964738845825195, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5079365079365079, | |
| "grad_norm": 1.1067290306091309, | |
| "learning_rate": 1.974176191441135e-05, | |
| "loss": 1.3599458932876587, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5103785103785103, | |
| "grad_norm": 0.968450665473938, | |
| "learning_rate": 1.973564387903204e-05, | |
| "loss": 1.1259132623672485, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.7555665373802185, | |
| "learning_rate": 1.972945529692398e-05, | |
| "loss": 1.3250101804733276, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5152625152625152, | |
| "grad_norm": 1.294765830039978, | |
| "learning_rate": 1.97231962180645e-05, | |
| "loss": 1.3246148824691772, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5177045177045178, | |
| "grad_norm": 3.749925374984741, | |
| "learning_rate": 1.9716866693000248e-05, | |
| "loss": 1.3295143842697144, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5201465201465202, | |
| "grad_norm": 1.7079066038131714, | |
| "learning_rate": 1.9710466772846784e-05, | |
| "loss": 1.1310526132583618, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5225885225885226, | |
| "grad_norm": 1.0455013513565063, | |
| "learning_rate": 1.9703996509288153e-05, | |
| "loss": 1.341339111328125, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.525030525030525, | |
| "grad_norm": 2.6277689933776855, | |
| "learning_rate": 1.9697455954576478e-05, | |
| "loss": 0.984380841255188, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5274725274725275, | |
| "grad_norm": 3.6414973735809326, | |
| "learning_rate": 1.9690845161531532e-05, | |
| "loss": 0.6374328136444092, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5299145299145299, | |
| "grad_norm": 0.9854040741920471, | |
| "learning_rate": 1.968416418354032e-05, | |
| "loss": 1.363136887550354, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5323565323565324, | |
| "grad_norm": 1.02694833278656, | |
| "learning_rate": 1.967741307455663e-05, | |
| "loss": 1.3728197813034058, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5347985347985348, | |
| "grad_norm": 1.2664965391159058, | |
| "learning_rate": 1.967059188910062e-05, | |
| "loss": 1.3319021463394165, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5372405372405372, | |
| "grad_norm": 0.8867588043212891, | |
| "learning_rate": 1.9663700682258367e-05, | |
| "loss": 1.299553394317627, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5396825396825397, | |
| "grad_norm": 13.338286399841309, | |
| "learning_rate": 1.9656739509681413e-05, | |
| "loss": 1.1493945121765137, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5421245421245421, | |
| "grad_norm": 2.412151575088501, | |
| "learning_rate": 1.9649708427586333e-05, | |
| "loss": 1.0136598348617554, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5445665445665445, | |
| "grad_norm": 2.4818806648254395, | |
| "learning_rate": 1.964260749275427e-05, | |
| "loss": 1.1629705429077148, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5470085470085471, | |
| "grad_norm": 1.1341965198516846, | |
| "learning_rate": 1.963543676253048e-05, | |
| "loss": 1.1858645677566528, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 1.6893372535705566, | |
| "learning_rate": 1.962819629482386e-05, | |
| "loss": 1.1235462427139282, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5518925518925519, | |
| "grad_norm": 1.6189004182815552, | |
| "learning_rate": 1.9620886148106498e-05, | |
| "loss": 0.9178623557090759, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5543345543345544, | |
| "grad_norm": 1.3195807933807373, | |
| "learning_rate": 1.9613506381413194e-05, | |
| "loss": 1.377665400505066, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5567765567765568, | |
| "grad_norm": 1.4087958335876465, | |
| "learning_rate": 1.960605705434097e-05, | |
| "loss": 1.3081351518630981, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5592185592185592, | |
| "grad_norm": 0.652862012386322, | |
| "learning_rate": 1.95985382270486e-05, | |
| "loss": 0.8939856290817261, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5616605616605617, | |
| "grad_norm": 3.38787579536438, | |
| "learning_rate": 1.9590949960256132e-05, | |
| "loss": 1.266584873199463, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 1.0980466604232788, | |
| "learning_rate": 1.9583292315244383e-05, | |
| "loss": 1.2569012641906738, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5665445665445665, | |
| "grad_norm": 4.170780181884766, | |
| "learning_rate": 1.9575565353854448e-05, | |
| "loss": 0.641703724861145, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.568986568986569, | |
| "grad_norm": 1.1431292295455933, | |
| "learning_rate": 1.9567769138487208e-05, | |
| "loss": 1.567794680595398, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 1.7932583093643188, | |
| "learning_rate": 1.955990373210281e-05, | |
| "loss": 1.3980201482772827, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5738705738705738, | |
| "grad_norm": 2.625420570373535, | |
| "learning_rate": 1.9551969198220188e-05, | |
| "loss": 1.1457037925720215, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5763125763125763, | |
| "grad_norm": 4.886669635772705, | |
| "learning_rate": 1.954396560091652e-05, | |
| "loss": 1.344892144203186, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5787545787545788, | |
| "grad_norm": 2.250831127166748, | |
| "learning_rate": 1.953589300482671e-05, | |
| "loss": 0.9534360766410828, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5811965811965812, | |
| "grad_norm": 2.8664050102233887, | |
| "learning_rate": 1.9527751475142904e-05, | |
| "loss": 1.0838558673858643, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5836385836385837, | |
| "grad_norm": 0.9391406774520874, | |
| "learning_rate": 1.951954107761391e-05, | |
| "loss": 1.2320207357406616, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 1.4157171249389648, | |
| "learning_rate": 1.9511261878544715e-05, | |
| "loss": 1.3821120262145996, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5885225885225885, | |
| "grad_norm": 4.214658737182617, | |
| "learning_rate": 1.950291394479592e-05, | |
| "loss": 0.5741876363754272, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.590964590964591, | |
| "grad_norm": 2.0429494380950928, | |
| "learning_rate": 1.9494497343783212e-05, | |
| "loss": 1.1259833574295044, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5934065934065934, | |
| "grad_norm": 0.9556084275245667, | |
| "learning_rate": 1.9486012143476813e-05, | |
| "loss": 1.1523076295852661, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5958485958485958, | |
| "grad_norm": 5.83870792388916, | |
| "learning_rate": 1.9477458412400934e-05, | |
| "loss": 1.0496693849563599, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5982905982905983, | |
| "grad_norm": 1.3661986589431763, | |
| "learning_rate": 1.946883621963323e-05, | |
| "loss": 1.1105148792266846, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6007326007326007, | |
| "grad_norm": 1.4116313457489014, | |
| "learning_rate": 1.946014563480422e-05, | |
| "loss": 0.9300603866577148, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6031746031746031, | |
| "grad_norm": 1.300858974456787, | |
| "learning_rate": 1.9451386728096758e-05, | |
| "loss": 1.0661330223083496, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6056166056166056, | |
| "grad_norm": 1.8178846836090088, | |
| "learning_rate": 1.9442559570245433e-05, | |
| "loss": 1.304194450378418, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.608058608058608, | |
| "grad_norm": 1.6697763204574585, | |
| "learning_rate": 1.9433664232536014e-05, | |
| "loss": 0.6469916105270386, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.6105006105006106, | |
| "grad_norm": 1.206526279449463, | |
| "learning_rate": 1.9424700786804877e-05, | |
| "loss": 0.9863432049751282, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.612942612942613, | |
| "grad_norm": 1.7002737522125244, | |
| "learning_rate": 1.9415669305438413e-05, | |
| "loss": 1.2856956720352173, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.4255826473236084, | |
| "learning_rate": 1.9406569861372466e-05, | |
| "loss": 1.3286441564559937, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6178266178266179, | |
| "grad_norm": 1.0831611156463623, | |
| "learning_rate": 1.9397402528091707e-05, | |
| "loss": 1.3130193948745728, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6202686202686203, | |
| "grad_norm": 1.9533292055130005, | |
| "learning_rate": 1.9388167379629076e-05, | |
| "loss": 1.380988597869873, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 3.5476789474487305, | |
| "learning_rate": 1.9378864490565172e-05, | |
| "loss": 1.3338630199432373, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6251526251526252, | |
| "grad_norm": 3.0227179527282715, | |
| "learning_rate": 1.9369493936027642e-05, | |
| "loss": 1.2690256834030151, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.6275946275946276, | |
| "grad_norm": 0.7818955779075623, | |
| "learning_rate": 1.9360055791690584e-05, | |
| "loss": 1.1770192384719849, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.63003663003663, | |
| "grad_norm": 0.7348341941833496, | |
| "learning_rate": 1.935055013377393e-05, | |
| "loss": 1.119304895401001, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.6324786324786325, | |
| "grad_norm": 1.465811848640442, | |
| "learning_rate": 1.934097703904284e-05, | |
| "loss": 1.34721040725708, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 1.2145129442214966, | |
| "learning_rate": 1.933133658480707e-05, | |
| "loss": 0.9806722402572632, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6373626373626373, | |
| "grad_norm": 2.869335174560547, | |
| "learning_rate": 1.9321628848920358e-05, | |
| "loss": 1.0333569049835205, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6398046398046398, | |
| "grad_norm": 2.509185552597046, | |
| "learning_rate": 1.9311853909779785e-05, | |
| "loss": 1.087817907333374, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6422466422466423, | |
| "grad_norm": 1.7746318578720093, | |
| "learning_rate": 1.9302011846325156e-05, | |
| "loss": 1.3438972234725952, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6446886446886447, | |
| "grad_norm": 0.9185584783554077, | |
| "learning_rate": 1.9292102738038347e-05, | |
| "loss": 1.38664972782135, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6471306471306472, | |
| "grad_norm": 1.1560609340667725, | |
| "learning_rate": 1.9282126664942667e-05, | |
| "loss": 1.1136956214904785, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6495726495726496, | |
| "grad_norm": 1.5920125246047974, | |
| "learning_rate": 1.927208370760223e-05, | |
| "loss": 1.0266146659851074, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.652014652014652, | |
| "grad_norm": 2.174090623855591, | |
| "learning_rate": 1.9261973947121273e-05, | |
| "loss": 1.6666396856307983, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6544566544566545, | |
| "grad_norm": 1.7790899276733398, | |
| "learning_rate": 1.925179746514352e-05, | |
| "loss": 0.9882057309150696, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6568986568986569, | |
| "grad_norm": 1.3070317506790161, | |
| "learning_rate": 1.9241554343851537e-05, | |
| "loss": 1.368809461593628, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 1.5976839065551758, | |
| "learning_rate": 1.923124466596602e-05, | |
| "loss": 1.3585935831069946, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6617826617826618, | |
| "grad_norm": 1.836732268333435, | |
| "learning_rate": 1.922086851474519e-05, | |
| "loss": 1.0160579681396484, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6642246642246642, | |
| "grad_norm": 4.108547687530518, | |
| "learning_rate": 1.9210425973984074e-05, | |
| "loss": 1.3244247436523438, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 1.7101798057556152, | |
| "learning_rate": 1.9199917128013836e-05, | |
| "loss": 1.2471184730529785, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6691086691086691, | |
| "grad_norm": 1.3308701515197754, | |
| "learning_rate": 1.918934206170112e-05, | |
| "loss": 1.3621915578842163, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6715506715506715, | |
| "grad_norm": 1.1020407676696777, | |
| "learning_rate": 1.917870086044734e-05, | |
| "loss": 1.230018973350525, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.673992673992674, | |
| "grad_norm": 19.01947021484375, | |
| "learning_rate": 1.9167993610187988e-05, | |
| "loss": 1.0613629817962646, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6764346764346765, | |
| "grad_norm": 1.0684137344360352, | |
| "learning_rate": 1.915722039739197e-05, | |
| "loss": 1.1644939184188843, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6788766788766789, | |
| "grad_norm": 1.4123005867004395, | |
| "learning_rate": 1.9146381309060874e-05, | |
| "loss": 0.9099707007408142, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6813186813186813, | |
| "grad_norm": 3.2105636596679688, | |
| "learning_rate": 1.913547643272828e-05, | |
| "loss": 1.228736400604248, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 0.4815189242362976, | |
| "learning_rate": 1.912450585645907e-05, | |
| "loss": 1.3034601211547852, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6862026862026862, | |
| "grad_norm": 2.001192569732666, | |
| "learning_rate": 1.9113469668848675e-05, | |
| "loss": 1.072668433189392, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6886446886446886, | |
| "grad_norm": 1.3243483304977417, | |
| "learning_rate": 1.9102367959022417e-05, | |
| "loss": 1.3628251552581787, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6910866910866911, | |
| "grad_norm": 1.6034096479415894, | |
| "learning_rate": 1.909120081663473e-05, | |
| "loss": 1.1910985708236694, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6935286935286935, | |
| "grad_norm": 1.6782633066177368, | |
| "learning_rate": 1.9079968331868487e-05, | |
| "loss": 1.4165751934051514, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 0.8705784678459167, | |
| "learning_rate": 1.9068670595434228e-05, | |
| "loss": 1.1330338716506958, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6984126984126984, | |
| "grad_norm": 3.466735601425171, | |
| "learning_rate": 1.9057307698569458e-05, | |
| "loss": 1.0612688064575195, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7008547008547008, | |
| "grad_norm": 2.736870765686035, | |
| "learning_rate": 1.9045879733037907e-05, | |
| "loss": 1.4824306964874268, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.7032967032967034, | |
| "grad_norm": 1.9692933559417725, | |
| "learning_rate": 1.9034386791128766e-05, | |
| "loss": 1.28273606300354, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7057387057387058, | |
| "grad_norm": 0.8525418043136597, | |
| "learning_rate": 1.9022828965655975e-05, | |
| "loss": 1.2495508193969727, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.7081807081807082, | |
| "grad_norm": 0.8721325993537903, | |
| "learning_rate": 1.9011206349957444e-05, | |
| "loss": 1.2048630714416504, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7106227106227107, | |
| "grad_norm": 1.3199268579483032, | |
| "learning_rate": 1.899951903789431e-05, | |
| "loss": 1.2845754623413086, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.7130647130647131, | |
| "grad_norm": 1.1963062286376953, | |
| "learning_rate": 1.8987767123850197e-05, | |
| "loss": 1.2032135725021362, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7155067155067155, | |
| "grad_norm": 1.1792757511138916, | |
| "learning_rate": 1.8975950702730425e-05, | |
| "loss": 1.375983715057373, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 1.8274788856506348, | |
| "learning_rate": 1.8964069869961254e-05, | |
| "loss": 1.1112651824951172, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.7203907203907204, | |
| "grad_norm": 1.0463271141052246, | |
| "learning_rate": 1.8952124721489115e-05, | |
| "loss": 1.0283359289169312, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7228327228327228, | |
| "grad_norm": 1.1223207712173462, | |
| "learning_rate": 1.8940115353779847e-05, | |
| "loss": 0.9025493860244751, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7252747252747253, | |
| "grad_norm": 1.450899600982666, | |
| "learning_rate": 1.8928041863817896e-05, | |
| "loss": 1.2699706554412842, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7277167277167277, | |
| "grad_norm": 2.5641753673553467, | |
| "learning_rate": 1.891590434910554e-05, | |
| "loss": 1.0194693803787231, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7301587301587301, | |
| "grad_norm": 0.7553045153617859, | |
| "learning_rate": 1.890370290766212e-05, | |
| "loss": 1.160589337348938, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 1.1860005855560303, | |
| "learning_rate": 1.8891437638023212e-05, | |
| "loss": 1.2648638486862183, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7350427350427351, | |
| "grad_norm": 1.1435580253601074, | |
| "learning_rate": 1.8879108639239864e-05, | |
| "loss": 1.3810834884643555, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.7374847374847375, | |
| "grad_norm": 0.9142278432846069, | |
| "learning_rate": 1.8866716010877774e-05, | |
| "loss": 1.2209972143173218, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.73992673992674, | |
| "grad_norm": 3.2111129760742188, | |
| "learning_rate": 1.885425985301651e-05, | |
| "loss": 1.510741949081421, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7423687423687424, | |
| "grad_norm": 1.5610990524291992, | |
| "learning_rate": 1.884174026624868e-05, | |
| "loss": 1.3180582523345947, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7448107448107448, | |
| "grad_norm": 0.7449647188186646, | |
| "learning_rate": 1.8829157351679116e-05, | |
| "loss": 0.9663639664649963, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7472527472527473, | |
| "grad_norm": 1.3256258964538574, | |
| "learning_rate": 1.881651121092408e-05, | |
| "loss": 1.2966718673706055, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.7496947496947497, | |
| "grad_norm": 1.0234135389328003, | |
| "learning_rate": 1.880380194611044e-05, | |
| "loss": 1.2717726230621338, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7521367521367521, | |
| "grad_norm": 2.811690092086792, | |
| "learning_rate": 1.8791029659874817e-05, | |
| "loss": 1.0650262832641602, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7545787545787546, | |
| "grad_norm": 1.469228744506836, | |
| "learning_rate": 1.877819445536279e-05, | |
| "loss": 1.6179522275924683, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.757020757020757, | |
| "grad_norm": 2.5131025314331055, | |
| "learning_rate": 1.8765296436228043e-05, | |
| "loss": 1.1963871717453003, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7594627594627594, | |
| "grad_norm": 1.2842845916748047, | |
| "learning_rate": 1.875233570663154e-05, | |
| "loss": 0.9286983013153076, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 1.0976072549819946, | |
| "learning_rate": 1.8739312371240678e-05, | |
| "loss": 1.2990517616271973, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7643467643467643, | |
| "grad_norm": 1.3670490980148315, | |
| "learning_rate": 1.8726226535228425e-05, | |
| "loss": 1.352059006690979, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7667887667887668, | |
| "grad_norm": 2.016474485397339, | |
| "learning_rate": 1.871307830427251e-05, | |
| "loss": 1.1491894721984863, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 1.5183488130569458, | |
| "learning_rate": 1.8699867784554537e-05, | |
| "loss": 1.3350757360458374, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7716727716727717, | |
| "grad_norm": 0.8359405398368835, | |
| "learning_rate": 1.868659508275914e-05, | |
| "loss": 1.0210474729537964, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7741147741147741, | |
| "grad_norm": 1.0358965396881104, | |
| "learning_rate": 1.867326030607311e-05, | |
| "loss": 1.0034987926483154, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7765567765567766, | |
| "grad_norm": 2.3178768157958984, | |
| "learning_rate": 1.8659863562184552e-05, | |
| "loss": 1.3230623006820679, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.778998778998779, | |
| "grad_norm": 1.5217390060424805, | |
| "learning_rate": 1.8646404959281986e-05, | |
| "loss": 1.3143547773361206, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7814407814407814, | |
| "grad_norm": 1.7523036003112793, | |
| "learning_rate": 1.8632884606053506e-05, | |
| "loss": 0.9751634001731873, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7838827838827839, | |
| "grad_norm": 2.0202057361602783, | |
| "learning_rate": 1.861930261168587e-05, | |
| "loss": 1.1349761486053467, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7863247863247863, | |
| "grad_norm": 0.9345976710319519, | |
| "learning_rate": 1.860565908586365e-05, | |
| "loss": 1.2226810455322266, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7887667887667887, | |
| "grad_norm": 1.210115909576416, | |
| "learning_rate": 1.859195413876831e-05, | |
| "loss": 1.0119144916534424, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7912087912087912, | |
| "grad_norm": 1.0988825559616089, | |
| "learning_rate": 1.857818788107734e-05, | |
| "loss": 1.26012122631073, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 4.83104944229126, | |
| "learning_rate": 1.856436042396338e-05, | |
| "loss": 0.5898873209953308, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.796092796092796, | |
| "grad_norm": 1.161339282989502, | |
| "learning_rate": 1.8550471879093275e-05, | |
| "loss": 0.8887655138969421, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7985347985347986, | |
| "grad_norm": 1.4048727750778198, | |
| "learning_rate": 1.8536522358627205e-05, | |
| "loss": 1.2602205276489258, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.800976800976801, | |
| "grad_norm": 2.1626598834991455, | |
| "learning_rate": 1.852251197521778e-05, | |
| "loss": 1.2750191688537598, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8034188034188035, | |
| "grad_norm": 2.365673065185547, | |
| "learning_rate": 1.8508440842009113e-05, | |
| "loss": 0.5839018225669861, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 1.4860225915908813, | |
| "learning_rate": 1.849430907263592e-05, | |
| "loss": 1.297167181968689, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8083028083028083, | |
| "grad_norm": 1.04447603225708, | |
| "learning_rate": 1.8480116781222604e-05, | |
| "loss": 1.2555423974990845, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.8107448107448108, | |
| "grad_norm": 0.8101674318313599, | |
| "learning_rate": 1.846586408238232e-05, | |
| "loss": 1.3545968532562256, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8131868131868132, | |
| "grad_norm": 1.1193162202835083, | |
| "learning_rate": 1.8451551091216064e-05, | |
| "loss": 0.9384480118751526, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.8156288156288156, | |
| "grad_norm": 1.269223928451538, | |
| "learning_rate": 1.8437177923311728e-05, | |
| "loss": 1.0872721672058105, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.818070818070818, | |
| "grad_norm": 1.7073310613632202, | |
| "learning_rate": 1.842274469474318e-05, | |
| "loss": 1.4501525163650513, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 1.2747077941894531, | |
| "learning_rate": 1.8408251522069323e-05, | |
| "loss": 1.296190857887268, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8229548229548229, | |
| "grad_norm": 1.145330786705017, | |
| "learning_rate": 1.8393698522333158e-05, | |
| "loss": 1.076781153678894, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.8253968253968254, | |
| "grad_norm": 1.0505316257476807, | |
| "learning_rate": 1.837908581306082e-05, | |
| "loss": 0.963850200176239, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8278388278388278, | |
| "grad_norm": 4.262927055358887, | |
| "learning_rate": 1.8364413512260656e-05, | |
| "loss": 1.2688353061676025, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.8302808302808303, | |
| "grad_norm": 2.2526209354400635, | |
| "learning_rate": 1.8349681738422245e-05, | |
| "loss": 1.3245513439178467, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8327228327228328, | |
| "grad_norm": 1.7615208625793457, | |
| "learning_rate": 1.8334890610515465e-05, | |
| "loss": 1.2618424892425537, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.8351648351648352, | |
| "grad_norm": 2.4765729904174805, | |
| "learning_rate": 1.8320040247989516e-05, | |
| "loss": 0.9116923213005066, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8376068376068376, | |
| "grad_norm": 2.0831899642944336, | |
| "learning_rate": 1.8305130770771966e-05, | |
| "loss": 1.4006067514419556, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.8400488400488401, | |
| "grad_norm": 3.837216854095459, | |
| "learning_rate": 1.829016229926777e-05, | |
| "loss": 1.3707760572433472, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 12.806596755981445, | |
| "learning_rate": 1.827513495435831e-05, | |
| "loss": 1.0350643396377563, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8449328449328449, | |
| "grad_norm": 1.426324486732483, | |
| "learning_rate": 1.826004885740042e-05, | |
| "loss": 1.3101565837860107, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.8473748473748474, | |
| "grad_norm": 0.7182126045227051, | |
| "learning_rate": 1.8244904130225383e-05, | |
| "loss": 1.1183477640151978, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8498168498168498, | |
| "grad_norm": 1.0692784786224365, | |
| "learning_rate": 1.8229700895137977e-05, | |
| "loss": 1.2185040712356567, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.8522588522588522, | |
| "grad_norm": 1.405985951423645, | |
| "learning_rate": 1.821443927491548e-05, | |
| "loss": 1.0439921617507935, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "grad_norm": 0.9861589074134827, | |
| "learning_rate": 1.819911939280665e-05, | |
| "loss": 1.179707646369934, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 1.3593485355377197, | |
| "learning_rate": 1.8183741372530778e-05, | |
| "loss": 1.1061705350875854, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.8595848595848596, | |
| "grad_norm": 2.342923402786255, | |
| "learning_rate": 1.816830533827665e-05, | |
| "loss": 1.0052831172943115, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8620268620268621, | |
| "grad_norm": 1.4813743829727173, | |
| "learning_rate": 1.815281141470155e-05, | |
| "loss": 0.5395532250404358, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8644688644688645, | |
| "grad_norm": 1.3919825553894043, | |
| "learning_rate": 1.8137259726930283e-05, | |
| "loss": 1.2419100999832153, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.8669108669108669, | |
| "grad_norm": 3.034050464630127, | |
| "learning_rate": 1.8121650400554125e-05, | |
| "loss": 0.9318399429321289, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8693528693528694, | |
| "grad_norm": 4.048087120056152, | |
| "learning_rate": 1.8105983561629827e-05, | |
| "loss": 1.4534571170806885, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 13.133171081542969, | |
| "learning_rate": 1.8090259336678598e-05, | |
| "loss": 1.6200733184814453, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.8742368742368742, | |
| "grad_norm": 1.3102926015853882, | |
| "learning_rate": 1.8074477852685088e-05, | |
| "loss": 1.4871742725372314, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8766788766788767, | |
| "grad_norm": 0.9029149413108826, | |
| "learning_rate": 1.805863923709635e-05, | |
| "loss": 1.0001909732818604, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 0.828899621963501, | |
| "learning_rate": 1.8042743617820814e-05, | |
| "loss": 1.2416490316390991, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8815628815628815, | |
| "grad_norm": 2.1641383171081543, | |
| "learning_rate": 1.8026791123227255e-05, | |
| "loss": 0.8903718590736389, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.884004884004884, | |
| "grad_norm": 1.445026159286499, | |
| "learning_rate": 1.8010781882143773e-05, | |
| "loss": 1.285760521888733, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8864468864468864, | |
| "grad_norm": 0.9921174645423889, | |
| "learning_rate": 1.799471602385672e-05, | |
| "loss": 1.2185858488082886, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 1.5229535102844238, | |
| "learning_rate": 1.797859367810968e-05, | |
| "loss": 1.2078474760055542, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8913308913308914, | |
| "grad_norm": 1.6648898124694824, | |
| "learning_rate": 1.7962414975102416e-05, | |
| "loss": 1.4831866025924683, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8937728937728938, | |
| "grad_norm": 3.4526195526123047, | |
| "learning_rate": 1.794618004548982e-05, | |
| "loss": 1.2522797584533691, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8962148962148963, | |
| "grad_norm": 1.0352317094802856, | |
| "learning_rate": 1.7929889020380842e-05, | |
| "loss": 1.0359210968017578, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8986568986568987, | |
| "grad_norm": 0.8629250526428223, | |
| "learning_rate": 1.791354203133746e-05, | |
| "loss": 0.8198949098587036, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9010989010989011, | |
| "grad_norm": 4.816531658172607, | |
| "learning_rate": 1.7897139210373594e-05, | |
| "loss": 0.9690486788749695, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.9035409035409036, | |
| "grad_norm": 2.7800450325012207, | |
| "learning_rate": 1.7880680689954047e-05, | |
| "loss": 1.0706011056900024, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.905982905982906, | |
| "grad_norm": 1.3503133058547974, | |
| "learning_rate": 1.786416660299344e-05, | |
| "loss": 0.9173503518104553, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.9084249084249084, | |
| "grad_norm": 0.9783918261528015, | |
| "learning_rate": 1.7847597082855133e-05, | |
| "loss": 0.9544399976730347, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9108669108669109, | |
| "grad_norm": 1.6359418630599976, | |
| "learning_rate": 1.7830972263350142e-05, | |
| "loss": 1.2056411504745483, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.9133089133089133, | |
| "grad_norm": 1.5760291814804077, | |
| "learning_rate": 1.7814292278736084e-05, | |
| "loss": 0.9109166264533997, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 1.4765530824661255, | |
| "learning_rate": 1.7797557263716054e-05, | |
| "loss": 1.401995301246643, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9181929181929182, | |
| "grad_norm": 0.7756912708282471, | |
| "learning_rate": 1.7780767353437573e-05, | |
| "loss": 1.2727299928665161, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9206349206349206, | |
| "grad_norm": 0.8636785745620728, | |
| "learning_rate": 1.7763922683491476e-05, | |
| "loss": 1.2869514226913452, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.8454907536506653, | |
| "learning_rate": 1.7747023389910815e-05, | |
| "loss": 1.2656826972961426, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9255189255189256, | |
| "grad_norm": 1.3287631273269653, | |
| "learning_rate": 1.773006960916978e-05, | |
| "loss": 1.3375307321548462, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.927960927960928, | |
| "grad_norm": 1.5437045097351074, | |
| "learning_rate": 1.7713061478182582e-05, | |
| "loss": 0.8308702111244202, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9304029304029304, | |
| "grad_norm": 3.6134531497955322, | |
| "learning_rate": 1.7695999134302348e-05, | |
| "loss": 1.2227895259857178, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.9328449328449329, | |
| "grad_norm": 1.38361394405365, | |
| "learning_rate": 1.767888271532001e-05, | |
| "loss": 0.9452077150344849, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9352869352869353, | |
| "grad_norm": 1.8651083707809448, | |
| "learning_rate": 1.7661712359463202e-05, | |
| "loss": 0.6139346957206726, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.9377289377289377, | |
| "grad_norm": 1.1716833114624023, | |
| "learning_rate": 1.7644488205395136e-05, | |
| "loss": 0.9175626039505005, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.9401709401709402, | |
| "grad_norm": 4.235447406768799, | |
| "learning_rate": 1.7627210392213484e-05, | |
| "loss": 0.7235321402549744, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.9426129426129426, | |
| "grad_norm": 0.8599190711975098, | |
| "learning_rate": 1.7609879059449256e-05, | |
| "loss": 1.1240880489349365, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.945054945054945, | |
| "grad_norm": 3.387906789779663, | |
| "learning_rate": 1.7592494347065667e-05, | |
| "loss": 1.3139581680297852, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.9474969474969475, | |
| "grad_norm": 1.6255816221237183, | |
| "learning_rate": 1.7575056395457017e-05, | |
| "loss": 1.2285006046295166, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.9499389499389499, | |
| "grad_norm": 2.0512325763702393, | |
| "learning_rate": 1.7557565345447548e-05, | |
| "loss": 0.9121115207672119, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 1.003928542137146, | |
| "learning_rate": 1.754002133829031e-05, | |
| "loss": 1.1289280652999878, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9548229548229549, | |
| "grad_norm": 6.144791126251221, | |
| "learning_rate": 1.752242451566603e-05, | |
| "loss": 1.1398252248764038, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.9572649572649573, | |
| "grad_norm": 0.8303928375244141, | |
| "learning_rate": 1.7504775019681946e-05, | |
| "loss": 1.263461709022522, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.9597069597069597, | |
| "grad_norm": 1.0771842002868652, | |
| "learning_rate": 1.7487072992870683e-05, | |
| "loss": 1.2938859462738037, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.9621489621489622, | |
| "grad_norm": 1.3151885271072388, | |
| "learning_rate": 1.746931857818908e-05, | |
| "loss": 1.3971589803695679, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.9645909645909646, | |
| "grad_norm": 2.2546122074127197, | |
| "learning_rate": 1.7451511919017054e-05, | |
| "loss": 1.341101884841919, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.967032967032967, | |
| "grad_norm": 0.76347416639328, | |
| "learning_rate": 1.743365315915643e-05, | |
| "loss": 1.0966370105743408, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.9694749694749695, | |
| "grad_norm": 1.2820730209350586, | |
| "learning_rate": 1.7415742442829792e-05, | |
| "loss": 1.3368990421295166, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9719169719169719, | |
| "grad_norm": 0.7520409226417542, | |
| "learning_rate": 1.7397779914679303e-05, | |
| "loss": 1.2155550718307495, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 0.652754545211792, | |
| "learning_rate": 1.7379765719765542e-05, | |
| "loss": 1.2150750160217285, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9768009768009768, | |
| "grad_norm": 0.8119310736656189, | |
| "learning_rate": 1.7361700003566348e-05, | |
| "loss": 1.2871735095977783, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9792429792429792, | |
| "grad_norm": 2.2065281867980957, | |
| "learning_rate": 1.734358291197562e-05, | |
| "loss": 0.9395040273666382, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.9816849816849816, | |
| "grad_norm": 0.936976432800293, | |
| "learning_rate": 1.732541459130215e-05, | |
| "loss": 1.1477895975112915, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9841269841269841, | |
| "grad_norm": 1.3355202674865723, | |
| "learning_rate": 1.730719518826846e-05, | |
| "loss": 1.573718547821045, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9865689865689866, | |
| "grad_norm": 4.425434112548828, | |
| "learning_rate": 1.7288924850009576e-05, | |
| "loss": 0.9391233325004578, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 0.7173460721969604, | |
| "learning_rate": 1.7270603724071876e-05, | |
| "loss": 1.364790916442871, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9914529914529915, | |
| "grad_norm": 0.7534496784210205, | |
| "learning_rate": 1.725223195841189e-05, | |
| "loss": 1.2704541683197021, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9938949938949939, | |
| "grad_norm": 1.4058549404144287, | |
| "learning_rate": 1.7233809701395087e-05, | |
| "loss": 1.35564386844635, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9963369963369964, | |
| "grad_norm": 0.8958796858787537, | |
| "learning_rate": 1.72153371017947e-05, | |
| "loss": 1.233031153678894, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9987789987789988, | |
| "grad_norm": 0.7508826851844788, | |
| "learning_rate": 1.7196814308790516e-05, | |
| "loss": 1.1463748216629028, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.0012210012210012, | |
| "grad_norm": 0.6122261881828308, | |
| "learning_rate": 1.717824147196767e-05, | |
| "loss": 1.007127285003662, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.0036630036630036, | |
| "grad_norm": 1.004014015197754, | |
| "learning_rate": 1.7159618741315433e-05, | |
| "loss": 1.0883307456970215, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.006105006105006, | |
| "grad_norm": 1.8373212814331055, | |
| "learning_rate": 1.7140946267226006e-05, | |
| "loss": 0.4619407653808594, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.0085470085470085, | |
| "grad_norm": 7.073435306549072, | |
| "learning_rate": 1.712222420049331e-05, | |
| "loss": 0.8937675356864929, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.010989010989011, | |
| "grad_norm": 3.16390061378479, | |
| "learning_rate": 1.7103452692311756e-05, | |
| "loss": 0.7834187150001526, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.0134310134310134, | |
| "grad_norm": 1.286433458328247, | |
| "learning_rate": 1.708463189427504e-05, | |
| "loss": 0.7017002105712891, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.0158730158730158, | |
| "grad_norm": 2.5467231273651123, | |
| "learning_rate": 1.7065761958374905e-05, | |
| "loss": 0.9201502203941345, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.0183150183150182, | |
| "grad_norm": 1.361122965812683, | |
| "learning_rate": 1.7046843036999912e-05, | |
| "loss": 0.9217178821563721, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.0207570207570207, | |
| "grad_norm": 2.6307156085968018, | |
| "learning_rate": 1.7027875282934224e-05, | |
| "loss": 1.00894033908844, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.0231990231990231, | |
| "grad_norm": 0.9444079995155334, | |
| "learning_rate": 1.7008858849356363e-05, | |
| "loss": 1.0666855573654175, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 1.807748556137085, | |
| "learning_rate": 1.6989793889837966e-05, | |
| "loss": 0.7795441746711731, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.028083028083028, | |
| "grad_norm": 4.041755199432373, | |
| "learning_rate": 1.6970680558342566e-05, | |
| "loss": 0.7524101734161377, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.0305250305250304, | |
| "grad_norm": 0.885811448097229, | |
| "learning_rate": 1.695151900922432e-05, | |
| "loss": 0.9602640271186829, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.032967032967033, | |
| "grad_norm": 0.9917791485786438, | |
| "learning_rate": 1.6932309397226792e-05, | |
| "loss": 0.8459327816963196, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.0354090354090355, | |
| "grad_norm": 0.9382413029670715, | |
| "learning_rate": 1.6913051877481676e-05, | |
| "loss": 1.1561813354492188, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.037851037851038, | |
| "grad_norm": 1.5294519662857056, | |
| "learning_rate": 1.6893746605507567e-05, | |
| "loss": 0.7689896821975708, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0402930402930404, | |
| "grad_norm": 1.7145957946777344, | |
| "learning_rate": 1.6874393737208688e-05, | |
| "loss": 0.5241991281509399, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.0427350427350428, | |
| "grad_norm": 0.781104326248169, | |
| "learning_rate": 1.685499342887364e-05, | |
| "loss": 1.0428876876831055, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.0451770451770452, | |
| "grad_norm": 1.5123246908187866, | |
| "learning_rate": 1.6835545837174132e-05, | |
| "loss": 0.668832004070282, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.0476190476190477, | |
| "grad_norm": 1.0035831928253174, | |
| "learning_rate": 1.681605111916373e-05, | |
| "loss": 1.2478870153427124, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.05006105006105, | |
| "grad_norm": 0.9146220684051514, | |
| "learning_rate": 1.679650943227657e-05, | |
| "loss": 0.8985828161239624, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.0525030525030525, | |
| "grad_norm": 1.358199119567871, | |
| "learning_rate": 1.6776920934326103e-05, | |
| "loss": 1.0257023572921753, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.054945054945055, | |
| "grad_norm": 1.0113524198532104, | |
| "learning_rate": 1.675728578350381e-05, | |
| "loss": 1.0212005376815796, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.0573870573870574, | |
| "grad_norm": 2.236260175704956, | |
| "learning_rate": 1.673760413837793e-05, | |
| "loss": 1.4508510828018188, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.0598290598290598, | |
| "grad_norm": 2.680145740509033, | |
| "learning_rate": 1.6717876157892175e-05, | |
| "loss": 0.5031489729881287, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.0622710622710623, | |
| "grad_norm": 1.7734426259994507, | |
| "learning_rate": 1.6698102001364456e-05, | |
| "loss": 0.9893677234649658, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.0647130647130647, | |
| "grad_norm": 1.0509651899337769, | |
| "learning_rate": 1.6678281828485576e-05, | |
| "loss": 0.897520124912262, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.0671550671550671, | |
| "grad_norm": 1.6916723251342773, | |
| "learning_rate": 1.6658415799317966e-05, | |
| "loss": 0.7381224036216736, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.0695970695970696, | |
| "grad_norm": 1.0783177614212036, | |
| "learning_rate": 1.6638504074294375e-05, | |
| "loss": 0.9826089143753052, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.072039072039072, | |
| "grad_norm": 0.9295514225959778, | |
| "learning_rate": 1.6618546814216586e-05, | |
| "loss": 1.0204219818115234, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.0744810744810744, | |
| "grad_norm": 2.3482747077941895, | |
| "learning_rate": 1.65985441802541e-05, | |
| "loss": 0.6614128947257996, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 0.9849045276641846, | |
| "learning_rate": 1.6578496333942848e-05, | |
| "loss": 0.9977365732192993, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.0793650793650793, | |
| "grad_norm": 4.873172283172607, | |
| "learning_rate": 1.655840343718389e-05, | |
| "loss": 0.6593250036239624, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.0818070818070817, | |
| "grad_norm": 60.08795928955078, | |
| "learning_rate": 1.6538265652242103e-05, | |
| "loss": 0.7343877553939819, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.0842490842490842, | |
| "grad_norm": 1.1528880596160889, | |
| "learning_rate": 1.6518083141744862e-05, | |
| "loss": 1.0775821208953857, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.0866910866910866, | |
| "grad_norm": 0.5812370181083679, | |
| "learning_rate": 1.649785606868073e-05, | |
| "loss": 0.7265040874481201, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.089133089133089, | |
| "grad_norm": 0.9050455093383789, | |
| "learning_rate": 1.647758459639816e-05, | |
| "loss": 0.94173663854599, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.0915750915750915, | |
| "grad_norm": 1.2509444952011108, | |
| "learning_rate": 1.6457268888604143e-05, | |
| "loss": 1.1309514045715332, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.0940170940170941, | |
| "grad_norm": 1.1489883661270142, | |
| "learning_rate": 1.643690910936292e-05, | |
| "loss": 1.1048157215118408, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.0964590964590966, | |
| "grad_norm": 3.072650909423828, | |
| "learning_rate": 1.6416505423094636e-05, | |
| "loss": 0.8980664014816284, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.098901098901099, | |
| "grad_norm": 1.031434416770935, | |
| "learning_rate": 1.639605799457401e-05, | |
| "loss": 0.6644148826599121, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1013431013431014, | |
| "grad_norm": 3.342662811279297, | |
| "learning_rate": 1.6375566988929025e-05, | |
| "loss": 0.6176282167434692, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.1037851037851039, | |
| "grad_norm": 0.8162381649017334, | |
| "learning_rate": 1.6355032571639574e-05, | |
| "loss": 0.5790269374847412, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.1062271062271063, | |
| "grad_norm": 1.7559690475463867, | |
| "learning_rate": 1.6334454908536123e-05, | |
| "loss": 0.8540843725204468, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.1086691086691087, | |
| "grad_norm": 2.1038284301757812, | |
| "learning_rate": 1.631383416579839e-05, | |
| "loss": 1.0307986736297607, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 0.8097777366638184, | |
| "learning_rate": 1.6293170509954e-05, | |
| "loss": 0.7846847176551819, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.1135531135531136, | |
| "grad_norm": 0.7294727563858032, | |
| "learning_rate": 1.6272464107877112e-05, | |
| "loss": 1.0868881940841675, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.115995115995116, | |
| "grad_norm": 1.1073777675628662, | |
| "learning_rate": 1.6251715126787114e-05, | |
| "loss": 0.6077226400375366, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.1184371184371185, | |
| "grad_norm": 1.3670622110366821, | |
| "learning_rate": 1.623092373424723e-05, | |
| "loss": 0.7134993076324463, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.120879120879121, | |
| "grad_norm": 1.8728268146514893, | |
| "learning_rate": 1.6210090098163206e-05, | |
| "loss": 1.1230908632278442, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.1233211233211233, | |
| "grad_norm": 0.782214343547821, | |
| "learning_rate": 1.618921438678192e-05, | |
| "loss": 0.9432562589645386, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.1257631257631258, | |
| "grad_norm": 1.2588818073272705, | |
| "learning_rate": 1.616829676869005e-05, | |
| "loss": 0.8601541519165039, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.1282051282051282, | |
| "grad_norm": 1.1834020614624023, | |
| "learning_rate": 1.61473374128127e-05, | |
| "loss": 0.7565584778785706, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.1306471306471306, | |
| "grad_norm": 1.476582646369934, | |
| "learning_rate": 1.612633648841203e-05, | |
| "loss": 0.6475503444671631, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.133089133089133, | |
| "grad_norm": 1.7382149696350098, | |
| "learning_rate": 1.61052941650859e-05, | |
| "loss": 0.5194863677024841, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.1355311355311355, | |
| "grad_norm": 1.6398006677627563, | |
| "learning_rate": 1.608421061276651e-05, | |
| "loss": 0.8809158205986023, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.137973137973138, | |
| "grad_norm": 1.1977638006210327, | |
| "learning_rate": 1.6063086001718986e-05, | |
| "loss": 1.0729451179504395, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.1404151404151404, | |
| "grad_norm": 1.9817147254943848, | |
| "learning_rate": 1.6041920502540058e-05, | |
| "loss": 1.008049726486206, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 1.1614291667938232, | |
| "learning_rate": 1.6020714286156646e-05, | |
| "loss": 0.8578592538833618, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.1452991452991452, | |
| "grad_norm": 0.9589775800704956, | |
| "learning_rate": 1.59994675238245e-05, | |
| "loss": 0.9546090960502625, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.1477411477411477, | |
| "grad_norm": 0.889543354511261, | |
| "learning_rate": 1.5978180387126797e-05, | |
| "loss": 1.0442495346069336, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.15018315018315, | |
| "grad_norm": 1.1197261810302734, | |
| "learning_rate": 1.5956853047972776e-05, | |
| "loss": 0.8928858637809753, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.1526251526251525, | |
| "grad_norm": 0.7546731233596802, | |
| "learning_rate": 1.5935485678596328e-05, | |
| "loss": 0.8579668998718262, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.155067155067155, | |
| "grad_norm": 1.2320284843444824, | |
| "learning_rate": 1.5914078451554637e-05, | |
| "loss": 0.683056652545929, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.1575091575091574, | |
| "grad_norm": 1.5659480094909668, | |
| "learning_rate": 1.5892631539726754e-05, | |
| "loss": 0.6238126754760742, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.1599511599511598, | |
| "grad_norm": 1.120065450668335, | |
| "learning_rate": 1.5871145116312207e-05, | |
| "loss": 0.9421287178993225, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1623931623931625, | |
| "grad_norm": 0.9711224436759949, | |
| "learning_rate": 1.5849619354829627e-05, | |
| "loss": 0.9722180366516113, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.164835164835165, | |
| "grad_norm": 1.5788224935531616, | |
| "learning_rate": 1.5828054429115317e-05, | |
| "loss": 0.9436995983123779, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.1672771672771673, | |
| "grad_norm": 0.5967025756835938, | |
| "learning_rate": 1.580645051332186e-05, | |
| "loss": 0.8100671768188477, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.1697191697191698, | |
| "grad_norm": 0.7621123790740967, | |
| "learning_rate": 1.5784807781916714e-05, | |
| "loss": 0.7545087337493896, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.1721611721611722, | |
| "grad_norm": 1.0470103025436401, | |
| "learning_rate": 1.5763126409680803e-05, | |
| "loss": 1.0842094421386719, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.1746031746031746, | |
| "grad_norm": 1.1259769201278687, | |
| "learning_rate": 1.5741406571707108e-05, | |
| "loss": 0.7638933062553406, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.177045177045177, | |
| "grad_norm": 0.513518750667572, | |
| "learning_rate": 1.571964844339924e-05, | |
| "loss": 0.6498727798461914, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.1794871794871795, | |
| "grad_norm": 0.5694072246551514, | |
| "learning_rate": 1.569785220047003e-05, | |
| "loss": 0.983795702457428, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.181929181929182, | |
| "grad_norm": 0.9271643161773682, | |
| "learning_rate": 1.5676018018940134e-05, | |
| "loss": 1.1204752922058105, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.1843711843711844, | |
| "grad_norm": 1.4760109186172485, | |
| "learning_rate": 1.5654146075136565e-05, | |
| "loss": 0.7088498473167419, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.1868131868131868, | |
| "grad_norm": 1.260972023010254, | |
| "learning_rate": 1.5632236545691308e-05, | |
| "loss": 0.9644913077354431, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.1892551892551892, | |
| "grad_norm": 0.883178174495697, | |
| "learning_rate": 1.561028960753988e-05, | |
| "loss": 0.7552489638328552, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.1916971916971917, | |
| "grad_norm": 4.277756214141846, | |
| "learning_rate": 1.5588305437919884e-05, | |
| "loss": 0.6645691990852356, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.1941391941391941, | |
| "grad_norm": 1.147638201713562, | |
| "learning_rate": 1.556628421436962e-05, | |
| "loss": 0.8974350094795227, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.1965811965811965, | |
| "grad_norm": 2.6772568225860596, | |
| "learning_rate": 1.554422611472661e-05, | |
| "loss": 1.0676953792572021, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.199023199023199, | |
| "grad_norm": 1.5516761541366577, | |
| "learning_rate": 1.552213131712617e-05, | |
| "loss": 1.0465797185897827, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.2014652014652014, | |
| "grad_norm": 0.5330000519752502, | |
| "learning_rate": 1.55e-05, | |
| "loss": 1.1170203685760498, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.2039072039072038, | |
| "grad_norm": 1.7233712673187256, | |
| "learning_rate": 1.5477832342074713e-05, | |
| "loss": 0.7278258800506592, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.2063492063492063, | |
| "grad_norm": 4.363593101501465, | |
| "learning_rate": 1.545562852237039e-05, | |
| "loss": 0.7073162794113159, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.2087912087912087, | |
| "grad_norm": 1.1713706254959106, | |
| "learning_rate": 1.5433388720199156e-05, | |
| "loss": 0.891094982624054, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.2112332112332111, | |
| "grad_norm": 0.9442173838615417, | |
| "learning_rate": 1.5411113115163722e-05, | |
| "loss": 0.9304923415184021, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.2136752136752136, | |
| "grad_norm": 2.135201930999756, | |
| "learning_rate": 1.538880188715593e-05, | |
| "loss": 0.9996479749679565, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.2161172161172162, | |
| "grad_norm": 1.68083918094635, | |
| "learning_rate": 1.5366455216355298e-05, | |
| "loss": 0.8368605971336365, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.2185592185592187, | |
| "grad_norm": 0.7228335738182068, | |
| "learning_rate": 1.534407328322758e-05, | |
| "loss": 0.9793355464935303, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.221001221001221, | |
| "grad_norm": 3.5241169929504395, | |
| "learning_rate": 1.5321656268523294e-05, | |
| "loss": 0.6125832796096802, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2234432234432235, | |
| "grad_norm": 0.628485381603241, | |
| "learning_rate": 1.5299204353276268e-05, | |
| "loss": 0.7384300827980042, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.225885225885226, | |
| "grad_norm": 0.8416216373443604, | |
| "learning_rate": 1.5276717718802183e-05, | |
| "loss": 0.9433239698410034, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.2283272283272284, | |
| "grad_norm": 1.3178609609603882, | |
| "learning_rate": 1.5254196546697088e-05, | |
| "loss": 0.9707098603248596, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 1.0210210084915161, | |
| "learning_rate": 1.523164101883597e-05, | |
| "loss": 0.5824246406555176, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.2332112332112333, | |
| "grad_norm": 0.7243679165840149, | |
| "learning_rate": 1.5209051317371242e-05, | |
| "loss": 1.0274351835250854, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.2356532356532357, | |
| "grad_norm": 0.7745081782341003, | |
| "learning_rate": 1.5186427624731313e-05, | |
| "loss": 0.6757472157478333, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.2380952380952381, | |
| "grad_norm": 0.5712753534317017, | |
| "learning_rate": 1.5163770123619083e-05, | |
| "loss": 1.041149616241455, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.2405372405372406, | |
| "grad_norm": 1.5870078802108765, | |
| "learning_rate": 1.5141078997010486e-05, | |
| "loss": 0.886056125164032, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.242979242979243, | |
| "grad_norm": 0.9383798837661743, | |
| "learning_rate": 1.5118354428153008e-05, | |
| "loss": 0.9722467660903931, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.2454212454212454, | |
| "grad_norm": 3.283290147781372, | |
| "learning_rate": 1.5095596600564197e-05, | |
| "loss": 0.6366119980812073, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.2478632478632479, | |
| "grad_norm": 3.411051034927368, | |
| "learning_rate": 1.5072805698030197e-05, | |
| "loss": 0.7901923656463623, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.2503052503052503, | |
| "grad_norm": 1.0399166345596313, | |
| "learning_rate": 1.504998190460426e-05, | |
| "loss": 0.9346777200698853, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.2527472527472527, | |
| "grad_norm": 0.6323780417442322, | |
| "learning_rate": 1.5027125404605246e-05, | |
| "loss": 0.8927645087242126, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.2551892551892552, | |
| "grad_norm": 0.7854591608047485, | |
| "learning_rate": 1.500423638261615e-05, | |
| "loss": 0.8685034513473511, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.2576312576312576, | |
| "grad_norm": 0.7747111320495605, | |
| "learning_rate": 1.4981315023482605e-05, | |
| "loss": 0.8063104152679443, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.26007326007326, | |
| "grad_norm": 0.7940489649772644, | |
| "learning_rate": 1.4958361512311394e-05, | |
| "loss": 1.0881439447402954, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.2625152625152625, | |
| "grad_norm": 3.6989586353302, | |
| "learning_rate": 1.4935376034468944e-05, | |
| "loss": 1.1380131244659424, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.264957264957265, | |
| "grad_norm": 0.5151039361953735, | |
| "learning_rate": 1.4912358775579841e-05, | |
| "loss": 0.6871868968009949, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.2673992673992673, | |
| "grad_norm": 1.3680596351623535, | |
| "learning_rate": 1.4889309921525325e-05, | |
| "loss": 0.6862649321556091, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.2698412698412698, | |
| "grad_norm": 0.6552305221557617, | |
| "learning_rate": 1.4866229658441793e-05, | |
| "loss": 0.7429234385490417, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.2722832722832722, | |
| "grad_norm": 0.5459038019180298, | |
| "learning_rate": 1.4843118172719289e-05, | |
| "loss": 0.9307520389556885, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.2747252747252746, | |
| "grad_norm": 0.5527384281158447, | |
| "learning_rate": 1.4819975650999998e-05, | |
| "loss": 0.7104328274726868, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.277167277167277, | |
| "grad_norm": 1.2261544466018677, | |
| "learning_rate": 1.4796802280176762e-05, | |
| "loss": 1.0070260763168335, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.2796092796092795, | |
| "grad_norm": 1.9242292642593384, | |
| "learning_rate": 1.4773598247391527e-05, | |
| "loss": 0.690989077091217, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 1.825949788093567, | |
| "learning_rate": 1.4750363740033881e-05, | |
| "loss": 0.42399048805236816, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2844932844932844, | |
| "grad_norm": 1.841841459274292, | |
| "learning_rate": 1.4727098945739497e-05, | |
| "loss": 1.0426183938980103, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.2869352869352868, | |
| "grad_norm": 0.51153963804245, | |
| "learning_rate": 1.470380405238865e-05, | |
| "loss": 0.8385255336761475, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.2893772893772895, | |
| "grad_norm": 2.656769275665283, | |
| "learning_rate": 1.4680479248104678e-05, | |
| "loss": 0.6596496105194092, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.291819291819292, | |
| "grad_norm": 1.2762665748596191, | |
| "learning_rate": 1.4657124721252476e-05, | |
| "loss": 1.232382893562317, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.2942612942612943, | |
| "grad_norm": 1.1065174341201782, | |
| "learning_rate": 1.4633740660436974e-05, | |
| "loss": 1.0262730121612549, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.2967032967032968, | |
| "grad_norm": 3.235954999923706, | |
| "learning_rate": 1.4610327254501607e-05, | |
| "loss": 0.6136125326156616, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.2991452991452992, | |
| "grad_norm": 0.5966620445251465, | |
| "learning_rate": 1.4586884692526791e-05, | |
| "loss": 0.8876266479492188, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.3015873015873016, | |
| "grad_norm": 2.7788665294647217, | |
| "learning_rate": 1.4563413163828397e-05, | |
| "loss": 0.7026379108428955, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.304029304029304, | |
| "grad_norm": 0.7998191714286804, | |
| "learning_rate": 1.4539912857956234e-05, | |
| "loss": 0.9727767705917358, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.3064713064713065, | |
| "grad_norm": 1.385021686553955, | |
| "learning_rate": 1.4516383964692495e-05, | |
| "loss": 0.7625731825828552, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.308913308913309, | |
| "grad_norm": 1.5408962965011597, | |
| "learning_rate": 1.4492826674050248e-05, | |
| "loss": 0.9061781167984009, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.3113553113553114, | |
| "grad_norm": 6.768632888793945, | |
| "learning_rate": 1.4469241176271884e-05, | |
| "loss": 0.7514428496360779, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.3137973137973138, | |
| "grad_norm": 0.7883042097091675, | |
| "learning_rate": 1.4445627661827589e-05, | |
| "loss": 0.6796785593032837, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.3162393162393162, | |
| "grad_norm": 1.3659143447875977, | |
| "learning_rate": 1.4421986321413801e-05, | |
| "loss": 0.9605479836463928, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.3186813186813187, | |
| "grad_norm": 1.356332778930664, | |
| "learning_rate": 1.439831734595168e-05, | |
| "loss": 0.8200567364692688, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.321123321123321, | |
| "grad_norm": 1.2193089723587036, | |
| "learning_rate": 1.4374620926585556e-05, | |
| "loss": 0.881037175655365, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.3235653235653235, | |
| "grad_norm": 0.5569941401481628, | |
| "learning_rate": 1.4350897254681386e-05, | |
| "loss": 0.8864683508872986, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.326007326007326, | |
| "grad_norm": 1.2279424667358398, | |
| "learning_rate": 1.4327146521825213e-05, | |
| "loss": 1.0031923055648804, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.3284493284493284, | |
| "grad_norm": 7.039901256561279, | |
| "learning_rate": 1.4303368919821619e-05, | |
| "loss": 1.0991631746292114, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.3308913308913308, | |
| "grad_norm": 0.7994674444198608, | |
| "learning_rate": 1.4279564640692172e-05, | |
| "loss": 0.6553327441215515, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 5.8774871826171875, | |
| "learning_rate": 1.4255733876673874e-05, | |
| "loss": 0.7461038827896118, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.3357753357753357, | |
| "grad_norm": 0.7029107213020325, | |
| "learning_rate": 1.4231876820217623e-05, | |
| "loss": 0.9785415530204773, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.3382173382173383, | |
| "grad_norm": 3.4110426902770996, | |
| "learning_rate": 1.4207993663986636e-05, | |
| "loss": 0.47891128063201904, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.3406593406593408, | |
| "grad_norm": 1.4747514724731445, | |
| "learning_rate": 1.4184084600854906e-05, | |
| "loss": 1.1681262254714966, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.3431013431013432, | |
| "grad_norm": 1.336816668510437, | |
| "learning_rate": 1.4160149823905654e-05, | |
| "loss": 1.0751440525054932, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3455433455433456, | |
| "grad_norm": 0.80948805809021, | |
| "learning_rate": 1.4136189526429749e-05, | |
| "loss": 1.000352144241333, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.347985347985348, | |
| "grad_norm": 2.687490701675415, | |
| "learning_rate": 1.4112203901924153e-05, | |
| "loss": 0.8417548537254333, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.3504273504273505, | |
| "grad_norm": 0.8591554760932922, | |
| "learning_rate": 1.4088193144090376e-05, | |
| "loss": 0.9740299582481384, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.352869352869353, | |
| "grad_norm": 3.9168152809143066, | |
| "learning_rate": 1.406415744683289e-05, | |
| "loss": 0.7925201058387756, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.3553113553113554, | |
| "grad_norm": 0.8020510673522949, | |
| "learning_rate": 1.4040097004257567e-05, | |
| "loss": 1.042458415031433, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.3577533577533578, | |
| "grad_norm": 1.342916488647461, | |
| "learning_rate": 1.4016012010670125e-05, | |
| "loss": 0.9074981808662415, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.3601953601953602, | |
| "grad_norm": 1.7544145584106445, | |
| "learning_rate": 1.3991902660574544e-05, | |
| "loss": 0.8596875667572021, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.3626373626373627, | |
| "grad_norm": 2.7417960166931152, | |
| "learning_rate": 1.39677691486715e-05, | |
| "loss": 0.5096735954284668, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.3650793650793651, | |
| "grad_norm": 6.50905704498291, | |
| "learning_rate": 1.3943611669856797e-05, | |
| "loss": 0.8825461268424988, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.3675213675213675, | |
| "grad_norm": 1.5938875675201416, | |
| "learning_rate": 1.3919430419219787e-05, | |
| "loss": 0.9512450695037842, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.36996336996337, | |
| "grad_norm": 2.952125072479248, | |
| "learning_rate": 1.389522559204179e-05, | |
| "loss": 0.9308354258537292, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.3724053724053724, | |
| "grad_norm": 0.7429002523422241, | |
| "learning_rate": 1.387099738379454e-05, | |
| "loss": 0.8262976408004761, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.3748473748473748, | |
| "grad_norm": 2.061551809310913, | |
| "learning_rate": 1.3846745990138581e-05, | |
| "loss": 1.28501558303833, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.3772893772893773, | |
| "grad_norm": 0.9269969463348389, | |
| "learning_rate": 1.382247160692169e-05, | |
| "loss": 0.9468799829483032, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.3797313797313797, | |
| "grad_norm": 0.8824846744537354, | |
| "learning_rate": 1.3798174430177314e-05, | |
| "loss": 0.6640329360961914, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.3821733821733821, | |
| "grad_norm": 0.633753776550293, | |
| "learning_rate": 1.3773854656122962e-05, | |
| "loss": 0.7266710996627808, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 5.053553581237793, | |
| "learning_rate": 1.3749512481158649e-05, | |
| "loss": 0.5124362707138062, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.387057387057387, | |
| "grad_norm": 1.3869932889938354, | |
| "learning_rate": 1.3725148101865275e-05, | |
| "loss": 0.6932591199874878, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.3894993894993894, | |
| "grad_norm": 0.8337790369987488, | |
| "learning_rate": 1.3700761715003068e-05, | |
| "loss": 1.0207314491271973, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.3919413919413919, | |
| "grad_norm": 2.2834839820861816, | |
| "learning_rate": 1.3676353517509981e-05, | |
| "loss": 0.8703376650810242, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.3943833943833943, | |
| "grad_norm": 1.934580683708191, | |
| "learning_rate": 1.3651923706500105e-05, | |
| "loss": 0.9365097284317017, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.3968253968253967, | |
| "grad_norm": 2.526843786239624, | |
| "learning_rate": 1.362747247926207e-05, | |
| "loss": 0.7051898837089539, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.3992673992673992, | |
| "grad_norm": 0.8698064684867859, | |
| "learning_rate": 1.3603000033257465e-05, | |
| "loss": 1.0435025691986084, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.4017094017094016, | |
| "grad_norm": 2.076078176498413, | |
| "learning_rate": 1.3578506566119236e-05, | |
| "loss": 0.8728469610214233, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.404151404151404, | |
| "grad_norm": 0.8785778880119324, | |
| "learning_rate": 1.355399227565008e-05, | |
| "loss": 0.7566535472869873, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4065934065934065, | |
| "grad_norm": 1.0821596384048462, | |
| "learning_rate": 1.352945735982087e-05, | |
| "loss": 0.7982299327850342, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.409035409035409, | |
| "grad_norm": 1.226269006729126, | |
| "learning_rate": 1.3504902016769039e-05, | |
| "loss": 0.7825957536697388, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.4114774114774113, | |
| "grad_norm": 1.9049503803253174, | |
| "learning_rate": 1.348032644479698e-05, | |
| "loss": 0.6891085505485535, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.4139194139194138, | |
| "grad_norm": 1.1582715511322021, | |
| "learning_rate": 1.3455730842370462e-05, | |
| "loss": 0.8980281352996826, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.4163614163614164, | |
| "grad_norm": 0.8849154114723206, | |
| "learning_rate": 1.3431115408117002e-05, | |
| "loss": 0.8913061618804932, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.4188034188034189, | |
| "grad_norm": 1.0964971780776978, | |
| "learning_rate": 1.3406480340824272e-05, | |
| "loss": 0.7366968393325806, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.4212454212454213, | |
| "grad_norm": 13.473047256469727, | |
| "learning_rate": 1.3381825839438514e-05, | |
| "loss": 0.6932869553565979, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.4236874236874237, | |
| "grad_norm": 1.122653603553772, | |
| "learning_rate": 1.3357152103062892e-05, | |
| "loss": 1.1828283071517944, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.4261294261294262, | |
| "grad_norm": 0.561507523059845, | |
| "learning_rate": 1.3332459330955921e-05, | |
| "loss": 0.966327428817749, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 2.9495770931243896, | |
| "learning_rate": 1.3307747722529838e-05, | |
| "loss": 0.8709004521369934, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.431013431013431, | |
| "grad_norm": 0.6762902140617371, | |
| "learning_rate": 1.3283017477348993e-05, | |
| "loss": 0.9068043231964111, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.4334554334554335, | |
| "grad_norm": 0.7292370796203613, | |
| "learning_rate": 1.3258268795128258e-05, | |
| "loss": 0.9378133416175842, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "grad_norm": 0.974267303943634, | |
| "learning_rate": 1.3233501875731376e-05, | |
| "loss": 1.0176819562911987, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.4383394383394383, | |
| "grad_norm": 5.0265116691589355, | |
| "learning_rate": 1.320871691916938e-05, | |
| "loss": 0.7393254041671753, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.4407814407814408, | |
| "grad_norm": 3.240424394607544, | |
| "learning_rate": 1.3183914125598966e-05, | |
| "loss": 0.8406731486320496, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.4432234432234432, | |
| "grad_norm": 0.9493277668952942, | |
| "learning_rate": 1.3159093695320881e-05, | |
| "loss": 0.756401002407074, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.4456654456654456, | |
| "grad_norm": 0.9762367010116577, | |
| "learning_rate": 1.313425582877829e-05, | |
| "loss": 1.055999755859375, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.448107448107448, | |
| "grad_norm": 0.6565649509429932, | |
| "learning_rate": 1.3109400726555179e-05, | |
| "loss": 0.8509088754653931, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.4505494505494505, | |
| "grad_norm": 2.6168346405029297, | |
| "learning_rate": 1.3084528589374718e-05, | |
| "loss": 0.7348777651786804, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.452991452991453, | |
| "grad_norm": 1.5224627256393433, | |
| "learning_rate": 1.305963961809765e-05, | |
| "loss": 0.9267134666442871, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.4554334554334554, | |
| "grad_norm": 0.7623134255409241, | |
| "learning_rate": 1.3034734013720669e-05, | |
| "loss": 0.8056920170783997, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.4578754578754578, | |
| "grad_norm": 1.4244619607925415, | |
| "learning_rate": 1.3009811977374784e-05, | |
| "loss": 0.6724956631660461, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.4603174603174602, | |
| "grad_norm": 0.7519621253013611, | |
| "learning_rate": 1.2984873710323711e-05, | |
| "loss": 0.6628673076629639, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.462759462759463, | |
| "grad_norm": 0.7634888887405396, | |
| "learning_rate": 1.2959919413962242e-05, | |
| "loss": 0.8408687710762024, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.4652014652014653, | |
| "grad_norm": 1.9624353647232056, | |
| "learning_rate": 1.2934949289814611e-05, | |
| "loss": 1.1985151767730713, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4676434676434678, | |
| "grad_norm": 1.5909016132354736, | |
| "learning_rate": 1.290996353953288e-05, | |
| "loss": 0.9667496681213379, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.4700854700854702, | |
| "grad_norm": 0.8254397511482239, | |
| "learning_rate": 1.2884962364895304e-05, | |
| "loss": 0.9893684983253479, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.4725274725274726, | |
| "grad_norm": 0.9778246879577637, | |
| "learning_rate": 1.2859945967804687e-05, | |
| "loss": 0.8230042457580566, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.474969474969475, | |
| "grad_norm": 2.8977315425872803, | |
| "learning_rate": 1.2834914550286789e-05, | |
| "loss": 0.7464233040809631, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.4774114774114775, | |
| "grad_norm": 16.703990936279297, | |
| "learning_rate": 1.2809868314488647e-05, | |
| "loss": 0.8318718671798706, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.47985347985348, | |
| "grad_norm": 1.9694427251815796, | |
| "learning_rate": 1.2784807462676983e-05, | |
| "loss": 0.8906052708625793, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.4822954822954824, | |
| "grad_norm": 0.8902061581611633, | |
| "learning_rate": 1.2759732197236548e-05, | |
| "loss": 0.9788769483566284, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.4847374847374848, | |
| "grad_norm": 0.8015345335006714, | |
| "learning_rate": 1.2734642720668494e-05, | |
| "loss": 0.9402112364768982, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.4871794871794872, | |
| "grad_norm": 2.7102816104888916, | |
| "learning_rate": 1.2709539235588739e-05, | |
| "loss": 0.27936387062072754, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.4896214896214897, | |
| "grad_norm": 0.5606179237365723, | |
| "learning_rate": 1.2684421944726323e-05, | |
| "loss": 0.7066472768783569, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.492063492063492, | |
| "grad_norm": 1.7472079992294312, | |
| "learning_rate": 1.2659291050921798e-05, | |
| "loss": 0.8000496029853821, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.4945054945054945, | |
| "grad_norm": 3.1667306423187256, | |
| "learning_rate": 1.263414675712554e-05, | |
| "loss": 0.733214259147644, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.496947496947497, | |
| "grad_norm": 1.6288788318634033, | |
| "learning_rate": 1.2608989266396165e-05, | |
| "loss": 0.8229939341545105, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.4993894993894994, | |
| "grad_norm": 3.6219799518585205, | |
| "learning_rate": 1.2583818781898855e-05, | |
| "loss": 0.4456430971622467, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.5018315018315018, | |
| "grad_norm": 1.921484351158142, | |
| "learning_rate": 1.2558635506903717e-05, | |
| "loss": 0.6831130981445312, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.5042735042735043, | |
| "grad_norm": 0.4906938970088959, | |
| "learning_rate": 1.253343964478417e-05, | |
| "loss": 0.6764166951179504, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.5067155067155067, | |
| "grad_norm": 1.23770272731781, | |
| "learning_rate": 1.250823139901527e-05, | |
| "loss": 0.9079239368438721, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.5091575091575091, | |
| "grad_norm": 0.9974614977836609, | |
| "learning_rate": 1.2483010973172077e-05, | |
| "loss": 0.9452921748161316, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.5115995115995116, | |
| "grad_norm": 0.9079129099845886, | |
| "learning_rate": 1.2457778570928026e-05, | |
| "loss": 0.8234338760375977, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.514041514041514, | |
| "grad_norm": 0.9488117098808289, | |
| "learning_rate": 1.2432534396053261e-05, | |
| "loss": 0.8415461778640747, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.5164835164835164, | |
| "grad_norm": 0.7722516059875488, | |
| "learning_rate": 1.2407278652413001e-05, | |
| "loss": 1.0288302898406982, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.5189255189255189, | |
| "grad_norm": 3.5721123218536377, | |
| "learning_rate": 1.2382011543965896e-05, | |
| "loss": 0.7554802298545837, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.5213675213675213, | |
| "grad_norm": 0.6691564917564392, | |
| "learning_rate": 1.2356733274762367e-05, | |
| "loss": 0.7608579397201538, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.5238095238095237, | |
| "grad_norm": 0.9692053198814392, | |
| "learning_rate": 1.2331444048942969e-05, | |
| "loss": 0.8119852542877197, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.5262515262515262, | |
| "grad_norm": 1.7576018571853638, | |
| "learning_rate": 1.2306144070736747e-05, | |
| "loss": 1.1432095766067505, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.5286935286935286, | |
| "grad_norm": 2.8032066822052, | |
| "learning_rate": 1.228083354445957e-05, | |
| "loss": 0.7118352055549622, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.531135531135531, | |
| "grad_norm": 1.4581559896469116, | |
| "learning_rate": 1.2255512674512491e-05, | |
| "loss": 0.9391320943832397, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.5335775335775335, | |
| "grad_norm": 0.9859986305236816, | |
| "learning_rate": 1.2230181665380101e-05, | |
| "loss": 1.0426268577575684, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.536019536019536, | |
| "grad_norm": 0.6827996373176575, | |
| "learning_rate": 1.220484072162887e-05, | |
| "loss": 0.35382741689682007, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 4.84762716293335, | |
| "learning_rate": 1.2179490047905495e-05, | |
| "loss": 0.6097034215927124, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.5409035409035408, | |
| "grad_norm": 1.7744395732879639, | |
| "learning_rate": 1.2154129848935258e-05, | |
| "loss": 0.6083784103393555, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.5433455433455432, | |
| "grad_norm": 2.7440474033355713, | |
| "learning_rate": 1.2128760329520355e-05, | |
| "loss": 0.7916078567504883, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.5457875457875456, | |
| "grad_norm": 1.4891324043273926, | |
| "learning_rate": 1.210338169453825e-05, | |
| "loss": 0.8106079697608948, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.5482295482295483, | |
| "grad_norm": 0.9212846159934998, | |
| "learning_rate": 1.2077994148940033e-05, | |
| "loss": 0.8362663984298706, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.5506715506715507, | |
| "grad_norm": 1.9237959384918213, | |
| "learning_rate": 1.2052597897748746e-05, | |
| "loss": 0.4818616807460785, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.5531135531135531, | |
| "grad_norm": 3.0629465579986572, | |
| "learning_rate": 1.202719314605773e-05, | |
| "loss": 1.0731854438781738, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 1.05351984500885, | |
| "learning_rate": 1.2001780099028988e-05, | |
| "loss": 0.943490207195282, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.557997557997558, | |
| "grad_norm": 4.432197570800781, | |
| "learning_rate": 1.1976358961891504e-05, | |
| "loss": 1.3021904230117798, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.5604395604395604, | |
| "grad_norm": 1.0480554103851318, | |
| "learning_rate": 1.1950929939939596e-05, | |
| "loss": 0.7510530948638916, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.5628815628815629, | |
| "grad_norm": 1.6610548496246338, | |
| "learning_rate": 1.192549323853126e-05, | |
| "loss": 0.9113296270370483, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.5653235653235653, | |
| "grad_norm": 4.827365875244141, | |
| "learning_rate": 1.1900049063086508e-05, | |
| "loss": 0.6182503700256348, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.5677655677655677, | |
| "grad_norm": 0.8534301519393921, | |
| "learning_rate": 1.1874597619085712e-05, | |
| "loss": 0.9308310151100159, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.5702075702075702, | |
| "grad_norm": 0.9158720970153809, | |
| "learning_rate": 1.1849139112067937e-05, | |
| "loss": 0.9331011772155762, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.5726495726495726, | |
| "grad_norm": 12.01048755645752, | |
| "learning_rate": 1.18236737476293e-05, | |
| "loss": 0.490848183631897, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.575091575091575, | |
| "grad_norm": 0.9270315170288086, | |
| "learning_rate": 1.1798201731421286e-05, | |
| "loss": 0.7262513637542725, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.5775335775335775, | |
| "grad_norm": 1.8197249174118042, | |
| "learning_rate": 1.1772723269149096e-05, | |
| "loss": 0.43270692229270935, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.5799755799755801, | |
| "grad_norm": 1.0564115047454834, | |
| "learning_rate": 1.1747238566569993e-05, | |
| "loss": 0.6380181908607483, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.5824175824175826, | |
| "grad_norm": 0.937374472618103, | |
| "learning_rate": 1.1721747829491639e-05, | |
| "loss": 0.9579664468765259, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.584859584859585, | |
| "grad_norm": 0.9189720749855042, | |
| "learning_rate": 1.169625126377042e-05, | |
| "loss": 1.1132162809371948, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "grad_norm": 1.5094869136810303, | |
| "learning_rate": 1.1670749075309798e-05, | |
| "loss": 0.9595221877098083, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5897435897435899, | |
| "grad_norm": 3.5550084114074707, | |
| "learning_rate": 1.164524147005864e-05, | |
| "loss": 1.0293970108032227, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.5921855921855923, | |
| "grad_norm": 8.070341110229492, | |
| "learning_rate": 1.1619728654009561e-05, | |
| "loss": 0.9469819664955139, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.5946275946275947, | |
| "grad_norm": 2.206435203552246, | |
| "learning_rate": 1.1594210833197252e-05, | |
| "loss": 0.6112901568412781, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.5970695970695972, | |
| "grad_norm": 0.7995406985282898, | |
| "learning_rate": 1.156868821369683e-05, | |
| "loss": 0.9325740337371826, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.5995115995115996, | |
| "grad_norm": 1.177374243736267, | |
| "learning_rate": 1.1543161001622154e-05, | |
| "loss": 0.821311891078949, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.601953601953602, | |
| "grad_norm": 1.0490672588348389, | |
| "learning_rate": 1.1517629403124175e-05, | |
| "loss": 0.8008186221122742, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.6043956043956045, | |
| "grad_norm": 1.078908085823059, | |
| "learning_rate": 1.1492093624389274e-05, | |
| "loss": 0.9607588648796082, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.606837606837607, | |
| "grad_norm": 0.9914792776107788, | |
| "learning_rate": 1.1466553871637585e-05, | |
| "loss": 1.0678871870040894, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.6092796092796093, | |
| "grad_norm": 0.9516023993492126, | |
| "learning_rate": 1.1441010351121332e-05, | |
| "loss": 0.927726686000824, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.6117216117216118, | |
| "grad_norm": 1.6526710987091064, | |
| "learning_rate": 1.1415463269123172e-05, | |
| "loss": 1.1496163606643677, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.6141636141636142, | |
| "grad_norm": 0.8162203431129456, | |
| "learning_rate": 1.1389912831954524e-05, | |
| "loss": 0.849646270275116, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.6166056166056166, | |
| "grad_norm": 0.7434989809989929, | |
| "learning_rate": 1.1364359245953897e-05, | |
| "loss": 1.0158569812774658, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.619047619047619, | |
| "grad_norm": 2.0639302730560303, | |
| "learning_rate": 1.1338802717485234e-05, | |
| "loss": 0.6589023470878601, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.6214896214896215, | |
| "grad_norm": 1.0379024744033813, | |
| "learning_rate": 1.1313243452936235e-05, | |
| "loss": 0.9295322895050049, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.623931623931624, | |
| "grad_norm": 1.181497573852539, | |
| "learning_rate": 1.1287681658716706e-05, | |
| "loss": 1.0116742849349976, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.6263736263736264, | |
| "grad_norm": 4.863892078399658, | |
| "learning_rate": 1.1262117541256872e-05, | |
| "loss": 0.8862733244895935, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.6288156288156288, | |
| "grad_norm": 0.7002055644989014, | |
| "learning_rate": 1.1236551307005722e-05, | |
| "loss": 0.9096848368644714, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.6312576312576312, | |
| "grad_norm": 0.6345333456993103, | |
| "learning_rate": 1.1210983162429347e-05, | |
| "loss": 0.5657076835632324, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.6336996336996337, | |
| "grad_norm": 2.7891440391540527, | |
| "learning_rate": 1.1185413314009254e-05, | |
| "loss": 0.9815369248390198, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.636141636141636, | |
| "grad_norm": 6.338902473449707, | |
| "learning_rate": 1.1159841968240714e-05, | |
| "loss": 0.5724242925643921, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.6385836385836385, | |
| "grad_norm": 2.7349283695220947, | |
| "learning_rate": 1.1134269331631096e-05, | |
| "loss": 0.4281773269176483, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "grad_norm": 1.9846585988998413, | |
| "learning_rate": 1.1108695610698187e-05, | |
| "loss": 1.0027917623519897, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.6434676434676434, | |
| "grad_norm": 0.6990553140640259, | |
| "learning_rate": 1.1083121011968531e-05, | |
| "loss": 0.9550279378890991, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.6459096459096458, | |
| "grad_norm": 2.958153486251831, | |
| "learning_rate": 1.1057545741975768e-05, | |
| "loss": 0.6426241993904114, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.6483516483516483, | |
| "grad_norm": 0.8284672498703003, | |
| "learning_rate": 1.1031970007258947e-05, | |
| "loss": 0.8278497457504272, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6507936507936507, | |
| "grad_norm": 0.7631545066833496, | |
| "learning_rate": 1.1006394014360882e-05, | |
| "loss": 0.9407053589820862, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.6532356532356531, | |
| "grad_norm": 4.05110502243042, | |
| "learning_rate": 1.0980817969826458e-05, | |
| "loss": 0.9099552035331726, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.6556776556776556, | |
| "grad_norm": 1.0000635385513306, | |
| "learning_rate": 1.0955242080200994e-05, | |
| "loss": 0.9383828639984131, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.658119658119658, | |
| "grad_norm": 1.1321988105773926, | |
| "learning_rate": 1.0929666552028545e-05, | |
| "loss": 0.52699214220047, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.6605616605616604, | |
| "grad_norm": 1.246857762336731, | |
| "learning_rate": 1.0904091591850255e-05, | |
| "loss": 0.6198506355285645, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.6630036630036629, | |
| "grad_norm": 1.0080903768539429, | |
| "learning_rate": 1.0878517406202674e-05, | |
| "loss": 0.9911934733390808, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.6654456654456653, | |
| "grad_norm": 0.8918383121490479, | |
| "learning_rate": 1.0852944201616097e-05, | |
| "loss": 1.0504215955734253, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.6678876678876677, | |
| "grad_norm": 1.0392669439315796, | |
| "learning_rate": 1.082737218461291e-05, | |
| "loss": 1.0229471921920776, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.6703296703296702, | |
| "grad_norm": 0.8570772409439087, | |
| "learning_rate": 1.080180156170589e-05, | |
| "loss": 1.049717903137207, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.6727716727716728, | |
| "grad_norm": 0.9958022236824036, | |
| "learning_rate": 1.0776232539396567e-05, | |
| "loss": 1.006693720817566, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.6752136752136753, | |
| "grad_norm": 0.882525622844696, | |
| "learning_rate": 1.0750665324173542e-05, | |
| "loss": 0.615381121635437, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.6776556776556777, | |
| "grad_norm": 0.9473522305488586, | |
| "learning_rate": 1.0725100122510819e-05, | |
| "loss": 0.36105355620384216, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.6800976800976801, | |
| "grad_norm": 3.743011236190796, | |
| "learning_rate": 1.0699537140866146e-05, | |
| "loss": 1.1695616245269775, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.6825396825396826, | |
| "grad_norm": 0.823453962802887, | |
| "learning_rate": 1.0673976585679341e-05, | |
| "loss": 0.9196591377258301, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.684981684981685, | |
| "grad_norm": 0.5954387187957764, | |
| "learning_rate": 1.0648418663370628e-05, | |
| "loss": 0.7695765495300293, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.6874236874236874, | |
| "grad_norm": 2.546109437942505, | |
| "learning_rate": 1.0622863580338967e-05, | |
| "loss": 1.0195831060409546, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.6898656898656899, | |
| "grad_norm": 0.7414639592170715, | |
| "learning_rate": 1.0597311542960385e-05, | |
| "loss": 0.8976457715034485, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 0.6246572732925415, | |
| "learning_rate": 1.0571762757586321e-05, | |
| "loss": 0.9752371907234192, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.6947496947496947, | |
| "grad_norm": 0.8245002627372742, | |
| "learning_rate": 1.0546217430541947e-05, | |
| "loss": 0.9225857257843018, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.6971916971916972, | |
| "grad_norm": 0.7589647769927979, | |
| "learning_rate": 1.0520675768124507e-05, | |
| "loss": 0.47266364097595215, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.6996336996336996, | |
| "grad_norm": 0.8037369847297668, | |
| "learning_rate": 1.0495137976601648e-05, | |
| "loss": 0.8273367881774902, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.702075702075702, | |
| "grad_norm": 0.9903712868690491, | |
| "learning_rate": 1.0469604262209765e-05, | |
| "loss": 0.7290286421775818, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.7045177045177047, | |
| "grad_norm": 2.0067808628082275, | |
| "learning_rate": 1.0444074831152317e-05, | |
| "loss": 0.9373266100883484, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.7069597069597071, | |
| "grad_norm": 20.187288284301758, | |
| "learning_rate": 1.0418549889598175e-05, | |
| "loss": 0.8240612149238586, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.7094017094017095, | |
| "grad_norm": 4.022505283355713, | |
| "learning_rate": 1.0393029643679962e-05, | |
| "loss": 0.44202497601509094, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.711843711843712, | |
| "grad_norm": 5.573869705200195, | |
| "learning_rate": 1.0367514299492366e-05, | |
| "loss": 0.9583691954612732, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 1.5996133089065552, | |
| "learning_rate": 1.0342004063090503e-05, | |
| "loss": 1.0398838520050049, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.7167277167277168, | |
| "grad_norm": 2.385746717453003, | |
| "learning_rate": 1.0316499140488232e-05, | |
| "loss": 0.4760570824146271, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.7191697191697193, | |
| "grad_norm": 0.8254954218864441, | |
| "learning_rate": 1.0290999737656497e-05, | |
| "loss": 0.907942533493042, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.7216117216117217, | |
| "grad_norm": 8.329554557800293, | |
| "learning_rate": 1.026550606052168e-05, | |
| "loss": 0.6862547397613525, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.7240537240537241, | |
| "grad_norm": 2.332361936569214, | |
| "learning_rate": 1.0240018314963909e-05, | |
| "loss": 0.8768781423568726, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.7264957264957266, | |
| "grad_norm": 2.285680055618286, | |
| "learning_rate": 1.0214536706815418e-05, | |
| "loss": 0.986327588558197, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.728937728937729, | |
| "grad_norm": 3.5364201068878174, | |
| "learning_rate": 1.0189061441858873e-05, | |
| "loss": 0.8355549573898315, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.7313797313797314, | |
| "grad_norm": 0.8595628142356873, | |
| "learning_rate": 1.0163592725825712e-05, | |
| "loss": 0.8929445743560791, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.7338217338217339, | |
| "grad_norm": 15.206433296203613, | |
| "learning_rate": 1.0138130764394496e-05, | |
| "loss": 0.7870601415634155, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.7362637362637363, | |
| "grad_norm": 2.8101370334625244, | |
| "learning_rate": 1.0112675763189224e-05, | |
| "loss": 0.7534129023551941, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.7387057387057387, | |
| "grad_norm": 1.858702540397644, | |
| "learning_rate": 1.0087227927777696e-05, | |
| "loss": 0.8370426893234253, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.7411477411477412, | |
| "grad_norm": 2.0665295124053955, | |
| "learning_rate": 1.006178746366984e-05, | |
| "loss": 0.6909109354019165, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.7435897435897436, | |
| "grad_norm": 0.9323246479034424, | |
| "learning_rate": 1.0036354576316052e-05, | |
| "loss": 1.014011263847351, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.746031746031746, | |
| "grad_norm": 1.75360107421875, | |
| "learning_rate": 1.0010929471105548e-05, | |
| "loss": 1.2392351627349854, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.7484737484737485, | |
| "grad_norm": 1.979491949081421, | |
| "learning_rate": 9.98551235336469e-06, | |
| "loss": 0.6340602040290833, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.750915750915751, | |
| "grad_norm": 2.876166343688965, | |
| "learning_rate": 9.960103428355337e-06, | |
| "loss": 0.7525686621665955, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.7533577533577533, | |
| "grad_norm": 1.366552710533142, | |
| "learning_rate": 9.934702901273187e-06, | |
| "loss": 0.6044411063194275, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.7557997557997558, | |
| "grad_norm": 0.689400315284729, | |
| "learning_rate": 9.90931097724612e-06, | |
| "loss": 0.4377739727497101, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.7582417582417582, | |
| "grad_norm": 0.8386373519897461, | |
| "learning_rate": 9.883927861332538e-06, | |
| "loss": 0.909875214099884, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.7606837606837606, | |
| "grad_norm": 7.745026111602783, | |
| "learning_rate": 9.85855375851971e-06, | |
| "loss": 0.7949923872947693, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.763125763125763, | |
| "grad_norm": 2.948460340499878, | |
| "learning_rate": 9.833188873722122e-06, | |
| "loss": 0.6595785021781921, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.7655677655677655, | |
| "grad_norm": 0.7448163032531738, | |
| "learning_rate": 9.80783341177981e-06, | |
| "loss": 1.0280483961105347, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.768009768009768, | |
| "grad_norm": 0.7969598770141602, | |
| "learning_rate": 9.782487577456724e-06, | |
| "loss": 1.0123943090438843, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.7704517704517704, | |
| "grad_norm": 0.9583572149276733, | |
| "learning_rate": 9.75715157543905e-06, | |
| "loss": 0.8486643433570862, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.7728937728937728, | |
| "grad_norm": 2.09142804145813, | |
| "learning_rate": 9.731825610333587e-06, | |
| "loss": 0.3455406129360199, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.7753357753357752, | |
| "grad_norm": 0.9442964196205139, | |
| "learning_rate": 9.706509886666067e-06, | |
| "loss": 0.8303570747375488, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 1.240134358406067, | |
| "learning_rate": 9.681204608879518e-06, | |
| "loss": 0.5113586187362671, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.7802197802197801, | |
| "grad_norm": 1.1532829999923706, | |
| "learning_rate": 9.655909981332614e-06, | |
| "loss": 0.8892757892608643, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.7826617826617825, | |
| "grad_norm": 1.5256012678146362, | |
| "learning_rate": 9.63062620829801e-06, | |
| "loss": 0.8083629608154297, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.785103785103785, | |
| "grad_norm": 1.8043534755706787, | |
| "learning_rate": 9.605353493960717e-06, | |
| "loss": 0.9189132452011108, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.7875457875457874, | |
| "grad_norm": 0.841884434223175, | |
| "learning_rate": 9.580092042416427e-06, | |
| "loss": 0.6249831318855286, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.7899877899877898, | |
| "grad_norm": 2.1716599464416504, | |
| "learning_rate": 9.554842057669886e-06, | |
| "loss": 0.6827890872955322, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.7924297924297923, | |
| "grad_norm": 3.5236616134643555, | |
| "learning_rate": 9.529603743633229e-06, | |
| "loss": 0.7608170509338379, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 1.99154531955719, | |
| "learning_rate": 9.504377304124346e-06, | |
| "loss": 0.9152241945266724, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.7973137973137974, | |
| "grad_norm": 0.8060831427574158, | |
| "learning_rate": 9.47916294286523e-06, | |
| "loss": 0.8515353202819824, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.7997557997557998, | |
| "grad_norm": 5.8603363037109375, | |
| "learning_rate": 9.453960863480333e-06, | |
| "loss": 0.5703706741333008, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.8021978021978022, | |
| "grad_norm": 7.417604446411133, | |
| "learning_rate": 9.428771269494926e-06, | |
| "loss": 0.7551999092102051, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.8046398046398047, | |
| "grad_norm": 1.034999966621399, | |
| "learning_rate": 9.403594364333444e-06, | |
| "loss": 0.6955189108848572, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.807081807081807, | |
| "grad_norm": 0.9549148678779602, | |
| "learning_rate": 9.378430351317854e-06, | |
| "loss": 0.42793938517570496, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.8095238095238095, | |
| "grad_norm": 1.3916822671890259, | |
| "learning_rate": 9.353279433666014e-06, | |
| "loss": 0.6840672492980957, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.811965811965812, | |
| "grad_norm": 0.854276716709137, | |
| "learning_rate": 9.328141814490021e-06, | |
| "loss": 0.893316924571991, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.8144078144078144, | |
| "grad_norm": 1.491588830947876, | |
| "learning_rate": 9.303017696794578e-06, | |
| "loss": 0.872158944606781, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.8168498168498168, | |
| "grad_norm": 1.8033097982406616, | |
| "learning_rate": 9.277907283475358e-06, | |
| "loss": 0.6238676905632019, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.8192918192918193, | |
| "grad_norm": 0.8885567784309387, | |
| "learning_rate": 9.252810777317351e-06, | |
| "loss": 0.6716984510421753, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.8217338217338217, | |
| "grad_norm": 1.0771310329437256, | |
| "learning_rate": 9.227728380993253e-06, | |
| "loss": 0.8512567281723022, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.8241758241758241, | |
| "grad_norm": 1.4891635179519653, | |
| "learning_rate": 9.202660297061798e-06, | |
| "loss": 0.5891348123550415, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.8266178266178266, | |
| "grad_norm": 1.5767910480499268, | |
| "learning_rate": 9.177606727966142e-06, | |
| "loss": 0.8717406392097473, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.8290598290598292, | |
| "grad_norm": 0.8637403845787048, | |
| "learning_rate": 9.15256787603222e-06, | |
| "loss": 1.3341138362884521, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.8315018315018317, | |
| "grad_norm": 1.3066986799240112, | |
| "learning_rate": 9.127543943467128e-06, | |
| "loss": 1.2278974056243896, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.833943833943834, | |
| "grad_norm": 1.3648895025253296, | |
| "learning_rate": 9.102535132357457e-06, | |
| "loss": 0.6873140335083008, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.8363858363858365, | |
| "grad_norm": 0.45770537853240967, | |
| "learning_rate": 9.077541644667697e-06, | |
| "loss": 0.7067763209342957, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.838827838827839, | |
| "grad_norm": 2.4009127616882324, | |
| "learning_rate": 9.052563682238587e-06, | |
| "loss": 0.6803405284881592, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.8412698412698414, | |
| "grad_norm": 1.205779790878296, | |
| "learning_rate": 9.02760144678548e-06, | |
| "loss": 0.6593731641769409, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.8437118437118438, | |
| "grad_norm": 0.640776515007019, | |
| "learning_rate": 9.00265513989673e-06, | |
| "loss": 0.8603323101997375, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 1.0433986186981201, | |
| "learning_rate": 8.977724963032056e-06, | |
| "loss": 0.8412877917289734, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.8485958485958487, | |
| "grad_norm": 1.245303750038147, | |
| "learning_rate": 8.952811117520914e-06, | |
| "loss": 1.0396430492401123, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.8510378510378511, | |
| "grad_norm": 1.5737297534942627, | |
| "learning_rate": 8.927913804560864e-06, | |
| "loss": 0.6088389754295349, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.8534798534798536, | |
| "grad_norm": 0.9162042140960693, | |
| "learning_rate": 8.903033225215975e-06, | |
| "loss": 1.1635559797286987, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.855921855921856, | |
| "grad_norm": 1.7877050638198853, | |
| "learning_rate": 8.878169580415154e-06, | |
| "loss": 0.631327748298645, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.8583638583638584, | |
| "grad_norm": 3.03653883934021, | |
| "learning_rate": 8.85332307095057e-06, | |
| "loss": 0.902554452419281, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.8608058608058609, | |
| "grad_norm": 1.9247746467590332, | |
| "learning_rate": 8.828493897475998e-06, | |
| "loss": 0.8101663589477539, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.8632478632478633, | |
| "grad_norm": 1.386506199836731, | |
| "learning_rate": 8.803682260505216e-06, | |
| "loss": 0.7383776903152466, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.8656898656898657, | |
| "grad_norm": 1.1092829704284668, | |
| "learning_rate": 8.778888360410385e-06, | |
| "loss": 0.7297862768173218, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.8681318681318682, | |
| "grad_norm": 0.7110038995742798, | |
| "learning_rate": 8.754112397420426e-06, | |
| "loss": 0.8971010446548462, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.8705738705738706, | |
| "grad_norm": 1.9106638431549072, | |
| "learning_rate": 8.729354571619404e-06, | |
| "loss": 0.7592481374740601, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.873015873015873, | |
| "grad_norm": 0.805887758731842, | |
| "learning_rate": 8.704615082944914e-06, | |
| "loss": 0.8079948425292969, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.8754578754578755, | |
| "grad_norm": 0.6133478283882141, | |
| "learning_rate": 8.679894131186462e-06, | |
| "loss": 1.000016450881958, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.877899877899878, | |
| "grad_norm": 0.6692440509796143, | |
| "learning_rate": 8.655191915983859e-06, | |
| "loss": 0.8313310742378235, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.8803418803418803, | |
| "grad_norm": 0.9560274481773376, | |
| "learning_rate": 8.630508636825602e-06, | |
| "loss": 0.9431169033050537, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.8827838827838828, | |
| "grad_norm": 1.700568675994873, | |
| "learning_rate": 8.605844493047269e-06, | |
| "loss": 0.9815627336502075, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.8852258852258852, | |
| "grad_norm": 1.308621883392334, | |
| "learning_rate": 8.581199683829899e-06, | |
| "loss": 0.7461444735527039, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.8876678876678876, | |
| "grad_norm": 1.2452470064163208, | |
| "learning_rate": 8.556574408198399e-06, | |
| "loss": 0.9441168904304504, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.89010989010989, | |
| "grad_norm": 3.298710823059082, | |
| "learning_rate": 8.531968865019919e-06, | |
| "loss": 0.8527262210845947, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.8925518925518925, | |
| "grad_norm": 0.8520393967628479, | |
| "learning_rate": 8.507383253002264e-06, | |
| "loss": 0.47991418838500977, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.894993894993895, | |
| "grad_norm": 1.5283163785934448, | |
| "learning_rate": 8.482817770692276e-06, | |
| "loss": 0.8953297138214111, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.8974358974358974, | |
| "grad_norm": 2.6013505458831787, | |
| "learning_rate": 8.458272616474226e-06, | |
| "loss": 0.598823070526123, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.8998778998778998, | |
| "grad_norm": 6.25869083404541, | |
| "learning_rate": 8.43374798856824e-06, | |
| "loss": 1.0903539657592773, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.9023199023199022, | |
| "grad_norm": 0.7708169221878052, | |
| "learning_rate": 8.40924408502866e-06, | |
| "loss": 0.6560428738594055, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 1.3442054986953735, | |
| "learning_rate": 8.384761103742476e-06, | |
| "loss": 0.553628146648407, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.907203907203907, | |
| "grad_norm": 0.8295760750770569, | |
| "learning_rate": 8.360299242427713e-06, | |
| "loss": 0.8809893727302551, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.9096459096459095, | |
| "grad_norm": 1.2123860120773315, | |
| "learning_rate": 8.335858698631829e-06, | |
| "loss": 0.7752953171730042, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.912087912087912, | |
| "grad_norm": 1.137731909751892, | |
| "learning_rate": 8.311439669730139e-06, | |
| "loss": 0.937446653842926, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.9145299145299144, | |
| "grad_norm": 1.4613070487976074, | |
| "learning_rate": 8.287042352924206e-06, | |
| "loss": 0.9597198963165283, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.9169719169719168, | |
| "grad_norm": 7.560548305511475, | |
| "learning_rate": 8.26266694524024e-06, | |
| "loss": 0.6756553053855896, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.9194139194139193, | |
| "grad_norm": 0.7736316919326782, | |
| "learning_rate": 8.238313643527533e-06, | |
| "loss": 0.8379277586936951, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.9218559218559217, | |
| "grad_norm": 2.3948774337768555, | |
| "learning_rate": 8.213982644456856e-06, | |
| "loss": 0.7130874991416931, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.9242979242979243, | |
| "grad_norm": 2.804558753967285, | |
| "learning_rate": 8.189674144518864e-06, | |
| "loss": 0.7871428728103638, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.9267399267399268, | |
| "grad_norm": 3.343308925628662, | |
| "learning_rate": 8.165388340022507e-06, | |
| "loss": 0.7644234895706177, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.9291819291819292, | |
| "grad_norm": 0.9689104557037354, | |
| "learning_rate": 8.14112542709347e-06, | |
| "loss": 0.9481227397918701, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.9316239316239316, | |
| "grad_norm": 0.9340876936912537, | |
| "learning_rate": 8.116885601672557e-06, | |
| "loss": 0.2258923351764679, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.934065934065934, | |
| "grad_norm": 1.9040846824645996, | |
| "learning_rate": 8.09266905951413e-06, | |
| "loss": 0.5065496563911438, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.9365079365079365, | |
| "grad_norm": 2.174138069152832, | |
| "learning_rate": 8.068475996184527e-06, | |
| "loss": 0.5920478701591492, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.938949938949939, | |
| "grad_norm": 0.8130704760551453, | |
| "learning_rate": 8.044306607060466e-06, | |
| "loss": 0.9720399379730225, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.9413919413919414, | |
| "grad_norm": 0.833109974861145, | |
| "learning_rate": 8.02016108732748e-06, | |
| "loss": 1.0517313480377197, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.9438339438339438, | |
| "grad_norm": 2.0496108531951904, | |
| "learning_rate": 7.996039631978352e-06, | |
| "loss": 1.0347234010696411, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.9462759462759462, | |
| "grad_norm": 1.0047261714935303, | |
| "learning_rate": 7.97194243581151e-06, | |
| "loss": 0.6489905118942261, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.9487179487179487, | |
| "grad_norm": 1.0025273561477661, | |
| "learning_rate": 7.947869693429486e-06, | |
| "loss": 0.568684458732605, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.9511599511599511, | |
| "grad_norm": 1.1909536123275757, | |
| "learning_rate": 7.923821599237322e-06, | |
| "loss": 0.6664155125617981, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.9536019536019538, | |
| "grad_norm": 1.6859694719314575, | |
| "learning_rate": 7.899798347441005e-06, | |
| "loss": 0.7015742063522339, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.9560439560439562, | |
| "grad_norm": 0.6844836473464966, | |
| "learning_rate": 7.87580013204591e-06, | |
| "loss": 0.9169449210166931, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.9584859584859586, | |
| "grad_norm": 2.2930445671081543, | |
| "learning_rate": 7.85182714685522e-06, | |
| "loss": 0.8345751762390137, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.960927960927961, | |
| "grad_norm": 2.5689308643341064, | |
| "learning_rate": 7.827879585468363e-06, | |
| "loss": 1.1974244117736816, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.9633699633699635, | |
| "grad_norm": 1.2992660999298096, | |
| "learning_rate": 7.803957641279457e-06, | |
| "loss": 1.1730899810791016, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.965811965811966, | |
| "grad_norm": 1.0391148328781128, | |
| "learning_rate": 7.780061507475738e-06, | |
| "loss": 0.9335651397705078, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.9682539682539684, | |
| "grad_norm": 3.6143672466278076, | |
| "learning_rate": 7.756191377036004e-06, | |
| "loss": 0.8546837568283081, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.9706959706959708, | |
| "grad_norm": 0.9346309304237366, | |
| "learning_rate": 7.732347442729062e-06, | |
| "loss": 1.0305918455123901, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.9731379731379732, | |
| "grad_norm": 0.9905077815055847, | |
| "learning_rate": 7.708529897112158e-06, | |
| "loss": 0.8775286674499512, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.9755799755799757, | |
| "grad_norm": 0.6666707396507263, | |
| "learning_rate": 7.684738932529441e-06, | |
| "loss": 0.8464508056640625, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.978021978021978, | |
| "grad_norm": 1.0916727781295776, | |
| "learning_rate": 7.660974741110387e-06, | |
| "loss": 1.035678505897522, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.9804639804639805, | |
| "grad_norm": 0.7847446203231812, | |
| "learning_rate": 7.637237514768265e-06, | |
| "loss": 0.6054593324661255, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.982905982905983, | |
| "grad_norm": 2.2946202754974365, | |
| "learning_rate": 7.613527445198576e-06, | |
| "loss": 0.45836907625198364, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.9853479853479854, | |
| "grad_norm": 9.175978660583496, | |
| "learning_rate": 7.5898447238775264e-06, | |
| "loss": 0.7117047905921936, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.9877899877899878, | |
| "grad_norm": 3.764439105987549, | |
| "learning_rate": 7.566189542060445e-06, | |
| "loss": 1.0821315050125122, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.9902319902319903, | |
| "grad_norm": 0.9272487163543701, | |
| "learning_rate": 7.5425620907802655e-06, | |
| "loss": 1.1502904891967773, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.9926739926739927, | |
| "grad_norm": 1.1519207954406738, | |
| "learning_rate": 7.518962560845986e-06, | |
| "loss": 0.8673257231712341, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.9951159951159951, | |
| "grad_norm": 0.6419383883476257, | |
| "learning_rate": 7.4953911428411085e-06, | |
| "loss": 0.75059574842453, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.9975579975579976, | |
| "grad_norm": 1.7326091527938843, | |
| "learning_rate": 7.4718480271221125e-06, | |
| "loss": 1.0258231163024902, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.8297693133354187, | |
| "learning_rate": 7.448333403816926e-06, | |
| "loss": 0.9197133779525757, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.0024420024420024, | |
| "grad_norm": 0.842572808265686, | |
| "learning_rate": 7.424847462823361e-06, | |
| "loss": 0.6060487627983093, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.004884004884005, | |
| "grad_norm": 1.4340323209762573, | |
| "learning_rate": 7.401390393807615e-06, | |
| "loss": 0.47724178433418274, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.0073260073260073, | |
| "grad_norm": 0.6351611018180847, | |
| "learning_rate": 7.37796238620272e-06, | |
| "loss": 0.5051848292350769, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.0097680097680097, | |
| "grad_norm": 3.20005202293396, | |
| "learning_rate": 7.3545636292070055e-06, | |
| "loss": 0.438951700925827, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.012210012210012, | |
| "grad_norm": 1.5867102146148682, | |
| "learning_rate": 7.331194311782597e-06, | |
| "loss": 0.528706431388855, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.0146520146520146, | |
| "grad_norm": 2.449397325515747, | |
| "learning_rate": 7.307854622653863e-06, | |
| "loss": 0.3387841284275055, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.017094017094017, | |
| "grad_norm": 5.5735626220703125, | |
| "learning_rate": 7.284544750305902e-06, | |
| "loss": 0.6135000586509705, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.0195360195360195, | |
| "grad_norm": 2.001272439956665, | |
| "learning_rate": 7.261264882983024e-06, | |
| "loss": 0.4525635838508606, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.021978021978022, | |
| "grad_norm": 1.0277931690216064, | |
| "learning_rate": 7.238015208687226e-06, | |
| "loss": 0.4565449655056, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.0244200244200243, | |
| "grad_norm": 1.670928716659546, | |
| "learning_rate": 7.214795915176671e-06, | |
| "loss": 0.4369199872016907, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.0268620268620268, | |
| "grad_norm": 1.4175351858139038, | |
| "learning_rate": 7.191607189964181e-06, | |
| "loss": 0.6220426559448242, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.029304029304029, | |
| "grad_norm": 1.3668700456619263, | |
| "learning_rate": 7.16844922031571e-06, | |
| "loss": 0.557952880859375, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.0317460317460316, | |
| "grad_norm": 0.9909934401512146, | |
| "learning_rate": 7.145322193248838e-06, | |
| "loss": 0.2245861142873764, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.034188034188034, | |
| "grad_norm": 6.492028713226318, | |
| "learning_rate": 7.122226295531267e-06, | |
| "loss": 0.40176424384117126, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.0366300366300365, | |
| "grad_norm": 0.9408150911331177, | |
| "learning_rate": 7.099161713679308e-06, | |
| "loss": 0.4665899872779846, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.039072039072039, | |
| "grad_norm": 1.566773533821106, | |
| "learning_rate": 7.07612863395636e-06, | |
| "loss": 0.6036043763160706, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.0415140415140414, | |
| "grad_norm": 1.2262314558029175, | |
| "learning_rate": 7.053127242371434e-06, | |
| "loss": 0.5682324171066284, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.043956043956044, | |
| "grad_norm": 0.9549220204353333, | |
| "learning_rate": 7.030157724677631e-06, | |
| "loss": 0.5213257074356079, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.0463980463980462, | |
| "grad_norm": 1.66300368309021, | |
| "learning_rate": 7.0072202663706405e-06, | |
| "loss": 0.3227638006210327, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.0488400488400487, | |
| "grad_norm": 1.2017823457717896, | |
| "learning_rate": 6.984315052687258e-06, | |
| "loss": 0.5378082990646362, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 0.8874703645706177, | |
| "learning_rate": 6.96144226860388e-06, | |
| "loss": 0.49545711278915405, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.0537240537240535, | |
| "grad_norm": 1.3648614883422852, | |
| "learning_rate": 6.938602098835e-06, | |
| "loss": 0.3199822008609772, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.056166056166056, | |
| "grad_norm": 2.5054514408111572, | |
| "learning_rate": 6.915794727831743e-06, | |
| "loss": 0.3839988112449646, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.0586080586080584, | |
| "grad_norm": 2.381861925125122, | |
| "learning_rate": 6.893020339780341e-06, | |
| "loss": 0.3781861662864685, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.061050061050061, | |
| "grad_norm": 2.2430403232574463, | |
| "learning_rate": 6.870279118600679e-06, | |
| "loss": 0.6202837824821472, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.0634920634920633, | |
| "grad_norm": 2.3006107807159424, | |
| "learning_rate": 6.847571247944791e-06, | |
| "loss": 0.46027785539627075, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.065934065934066, | |
| "grad_norm": 1.330511450767517, | |
| "learning_rate": 6.8248969111953825e-06, | |
| "loss": 0.31774628162384033, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.0683760683760686, | |
| "grad_norm": 1.060591459274292, | |
| "learning_rate": 6.80225629146434e-06, | |
| "loss": 0.47486642003059387, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.070818070818071, | |
| "grad_norm": 1.2816616296768188, | |
| "learning_rate": 6.7796495715912694e-06, | |
| "loss": 0.4364372789859772, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.0732600732600734, | |
| "grad_norm": 1.004572868347168, | |
| "learning_rate": 6.757076934142013e-06, | |
| "loss": 0.4288478493690491, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.075702075702076, | |
| "grad_norm": 1.2579833269119263, | |
| "learning_rate": 6.734538561407158e-06, | |
| "loss": 0.4020456075668335, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.0781440781440783, | |
| "grad_norm": 1.9755547046661377, | |
| "learning_rate": 6.712034635400593e-06, | |
| "loss": 0.26895561814308167, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.0805860805860807, | |
| "grad_norm": 2.1291699409484863, | |
| "learning_rate": 6.689565337858019e-06, | |
| "loss": 0.2938929796218872, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.083028083028083, | |
| "grad_norm": 1.6085429191589355, | |
| "learning_rate": 6.6671308502354844e-06, | |
| "loss": 0.19200079143047333, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.0854700854700856, | |
| "grad_norm": 3.190870761871338, | |
| "learning_rate": 6.644731353707927e-06, | |
| "loss": 0.5591083765029907, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.087912087912088, | |
| "grad_norm": 1.8141244649887085, | |
| "learning_rate": 6.622367029167702e-06, | |
| "loss": 0.2770901918411255, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.0903540903540905, | |
| "grad_norm": 4.159117221832275, | |
| "learning_rate": 6.600038057223126e-06, | |
| "loss": 0.394546240568161, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.092796092796093, | |
| "grad_norm": 1.3365147113800049, | |
| "learning_rate": 6.577744618197017e-06, | |
| "loss": 0.4641517996788025, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.0952380952380953, | |
| "grad_norm": 0.9762091636657715, | |
| "learning_rate": 6.555486892125243e-06, | |
| "loss": 0.32657861709594727, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.0976800976800978, | |
| "grad_norm": 1.1228184700012207, | |
| "learning_rate": 6.533265058755256e-06, | |
| "loss": 0.6660332083702087, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.1001221001221, | |
| "grad_norm": 0.9115656614303589, | |
| "learning_rate": 6.5110792975446515e-06, | |
| "loss": 0.48777180910110474, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.1025641025641026, | |
| "grad_norm": 1.8341835737228394, | |
| "learning_rate": 6.488929787659721e-06, | |
| "loss": 0.6992468237876892, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.105006105006105, | |
| "grad_norm": 1.1542752981185913, | |
| "learning_rate": 6.466816707973991e-06, | |
| "loss": 0.3529256284236908, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.1074481074481075, | |
| "grad_norm": 19.553573608398438, | |
| "learning_rate": 6.444740237066791e-06, | |
| "loss": 0.45478177070617676, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.10989010989011, | |
| "grad_norm": 0.6075100898742676, | |
| "learning_rate": 6.422700553221817e-06, | |
| "loss": 0.3780288100242615, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.1123321123321124, | |
| "grad_norm": 0.8796222805976868, | |
| "learning_rate": 6.400697834425662e-06, | |
| "loss": 0.42669016122817993, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.114774114774115, | |
| "grad_norm": 0.9508007764816284, | |
| "learning_rate": 6.378732258366421e-06, | |
| "loss": 0.34392303228378296, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.1172161172161172, | |
| "grad_norm": 0.28383857011795044, | |
| "learning_rate": 6.356804002432225e-06, | |
| "loss": 0.1719311773777008, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.1196581196581197, | |
| "grad_norm": 1.0620123147964478, | |
| "learning_rate": 6.334913243709809e-06, | |
| "loss": 0.5892414450645447, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.122100122100122, | |
| "grad_norm": 1.1223015785217285, | |
| "learning_rate": 6.313060158983104e-06, | |
| "loss": 0.3725854456424713, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.1245421245421245, | |
| "grad_norm": 0.83611661195755, | |
| "learning_rate": 6.291244924731794e-06, | |
| "loss": 0.4878256618976593, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.126984126984127, | |
| "grad_norm": 1.6328321695327759, | |
| "learning_rate": 6.26946771712988e-06, | |
| "loss": 0.43116888403892517, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.1294261294261294, | |
| "grad_norm": 1.3364393711090088, | |
| "learning_rate": 6.247728712044283e-06, | |
| "loss": 0.37520939111709595, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.131868131868132, | |
| "grad_norm": 1.3389878273010254, | |
| "learning_rate": 6.226028085033413e-06, | |
| "loss": 0.5751076936721802, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.1343101343101343, | |
| "grad_norm": 1.8287776708602905, | |
| "learning_rate": 6.2043660113457325e-06, | |
| "loss": 0.20154741406440735, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.1367521367521367, | |
| "grad_norm": 1.4840490818023682, | |
| "learning_rate": 6.182742665918373e-06, | |
| "loss": 0.6898431777954102, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.139194139194139, | |
| "grad_norm": 0.9770026803016663, | |
| "learning_rate": 6.161158223375705e-06, | |
| "loss": 0.3924607038497925, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.1416361416361416, | |
| "grad_norm": 0.7722997069358826, | |
| "learning_rate": 6.13961285802792e-06, | |
| "loss": 0.43264567852020264, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.144078144078144, | |
| "grad_norm": 0.9995938539505005, | |
| "learning_rate": 6.118106743869641e-06, | |
| "loss": 0.5022901296615601, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.1465201465201464, | |
| "grad_norm": 0.5033841133117676, | |
| "learning_rate": 6.096640054578511e-06, | |
| "loss": 0.21431341767311096, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.148962148962149, | |
| "grad_norm": 1.137976050376892, | |
| "learning_rate": 6.075212963513776e-06, | |
| "loss": 0.4715498685836792, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.1514041514041513, | |
| "grad_norm": 0.9455146193504333, | |
| "learning_rate": 6.053825643714912e-06, | |
| "loss": 0.4320064187049866, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 2.8845789432525635, | |
| "learning_rate": 6.032478267900206e-06, | |
| "loss": 0.3226162791252136, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.156288156288156, | |
| "grad_norm": 0.9458103179931641, | |
| "learning_rate": 6.011171008465363e-06, | |
| "loss": 0.2729605436325073, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.1587301587301586, | |
| "grad_norm": 1.9725005626678467, | |
| "learning_rate": 5.989904037482128e-06, | |
| "loss": 0.3462582230567932, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.161172161172161, | |
| "grad_norm": 2.0717337131500244, | |
| "learning_rate": 5.968677526696882e-06, | |
| "loss": 0.38312727212905884, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.1636141636141635, | |
| "grad_norm": 0.8864312767982483, | |
| "learning_rate": 5.947491647529267e-06, | |
| "loss": 0.353424072265625, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.166056166056166, | |
| "grad_norm": 1.0762509107589722, | |
| "learning_rate": 5.9263465710707814e-06, | |
| "loss": 0.5065031051635742, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.1684981684981683, | |
| "grad_norm": 0.7869840264320374, | |
| "learning_rate": 5.905242468083423e-06, | |
| "loss": 0.5348921418190002, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.1709401709401708, | |
| "grad_norm": 2.1878821849823, | |
| "learning_rate": 5.884179508998299e-06, | |
| "loss": 0.27236610651016235, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.173382173382173, | |
| "grad_norm": 0.9579680562019348, | |
| "learning_rate": 5.863157863914239e-06, | |
| "loss": 0.43548962473869324, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.1758241758241756, | |
| "grad_norm": 1.8547625541687012, | |
| "learning_rate": 5.8421777025964446e-06, | |
| "loss": 0.5892971754074097, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.178266178266178, | |
| "grad_norm": 0.9620394706726074, | |
| "learning_rate": 5.8212391944750965e-06, | |
| "loss": 0.4943884313106537, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.1807081807081805, | |
| "grad_norm": 2.7082159519195557, | |
| "learning_rate": 5.8003425086440015e-06, | |
| "loss": 0.5425156354904175, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.183150183150183, | |
| "grad_norm": 4.512080669403076, | |
| "learning_rate": 5.779487813859218e-06, | |
| "loss": 0.3213900625705719, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.185592185592186, | |
| "grad_norm": 0.9232001900672913, | |
| "learning_rate": 5.758675278537692e-06, | |
| "loss": 0.46233004331588745, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.1880341880341883, | |
| "grad_norm": 3.6497743129730225, | |
| "learning_rate": 5.737905070755907e-06, | |
| "loss": 0.480983167886734, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 1.0851823091506958, | |
| "learning_rate": 5.717177358248522e-06, | |
| "loss": 0.2742152810096741, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.192918192918193, | |
| "grad_norm": 2.418455123901367, | |
| "learning_rate": 5.696492308407002e-06, | |
| "loss": 0.3769078254699707, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.1953601953601956, | |
| "grad_norm": 0.7429922223091125, | |
| "learning_rate": 5.675850088278298e-06, | |
| "loss": 0.40196555852890015, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.197802197802198, | |
| "grad_norm": 1.3570210933685303, | |
| "learning_rate": 5.655250864563469e-06, | |
| "loss": 0.3571450412273407, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.2002442002442004, | |
| "grad_norm": 1.8261560201644897, | |
| "learning_rate": 5.63469480361635e-06, | |
| "loss": 0.4585352838039398, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.202686202686203, | |
| "grad_norm": 2.33353328704834, | |
| "learning_rate": 5.614182071442201e-06, | |
| "loss": 0.4414786100387573, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 1.7394614219665527, | |
| "learning_rate": 5.59371283369637e-06, | |
| "loss": 0.5657206177711487, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.2075702075702077, | |
| "grad_norm": 1.2605091333389282, | |
| "learning_rate": 5.573287255682967e-06, | |
| "loss": 0.5330032706260681, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.21001221001221, | |
| "grad_norm": 0.2691946029663086, | |
| "learning_rate": 5.552905502353502e-06, | |
| "loss": 0.2634370028972626, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.2124542124542126, | |
| "grad_norm": 0.983033299446106, | |
| "learning_rate": 5.532567738305576e-06, | |
| "loss": 0.4326469302177429, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.214896214896215, | |
| "grad_norm": 0.23342449963092804, | |
| "learning_rate": 5.512274127781552e-06, | |
| "loss": 0.1571735441684723, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.2173382173382175, | |
| "grad_norm": 1.2843339443206787, | |
| "learning_rate": 5.492024834667205e-06, | |
| "loss": 0.5355442762374878, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.21978021978022, | |
| "grad_norm": 0.949738621711731, | |
| "learning_rate": 5.471820022490422e-06, | |
| "loss": 0.38218754529953003, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 0.8940930962562561, | |
| "learning_rate": 5.451659854419882e-06, | |
| "loss": 0.49747079610824585, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.2246642246642248, | |
| "grad_norm": 0.6108909249305725, | |
| "learning_rate": 5.431544493263714e-06, | |
| "loss": 0.2641042172908783, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.227106227106227, | |
| "grad_norm": 0.776020884513855, | |
| "learning_rate": 5.411474101468208e-06, | |
| "loss": 0.39929312467575073, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.2295482295482296, | |
| "grad_norm": 1.3689377307891846, | |
| "learning_rate": 5.3914488411165e-06, | |
| "loss": 0.2978437840938568, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.231990231990232, | |
| "grad_norm": 2.88201904296875, | |
| "learning_rate": 5.3714688739272396e-06, | |
| "loss": 0.3673563599586487, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.2344322344322345, | |
| "grad_norm": 2.748995065689087, | |
| "learning_rate": 5.351534361253312e-06, | |
| "loss": 0.29434409737586975, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.236874236874237, | |
| "grad_norm": 1.0924896001815796, | |
| "learning_rate": 5.331645464080526e-06, | |
| "loss": 0.46827900409698486, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.2393162393162394, | |
| "grad_norm": 0.8829333186149597, | |
| "learning_rate": 5.311802343026302e-06, | |
| "loss": 0.5047073364257812, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.241758241758242, | |
| "grad_norm": 1.315529227256775, | |
| "learning_rate": 5.292005158338394e-06, | |
| "loss": 0.40334218740463257, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.244200244200244, | |
| "grad_norm": 2.0851378440856934, | |
| "learning_rate": 5.272254069893579e-06, | |
| "loss": 0.5924956798553467, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.2466422466422467, | |
| "grad_norm": 3.3136000633239746, | |
| "learning_rate": 5.2525492371963785e-06, | |
| "loss": 0.31219542026519775, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.249084249084249, | |
| "grad_norm": 0.728590190410614, | |
| "learning_rate": 5.232890819377765e-06, | |
| "loss": 0.46928393840789795, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.2515262515262515, | |
| "grad_norm": 0.7545236945152283, | |
| "learning_rate": 5.213278975193874e-06, | |
| "loss": 0.4485982060432434, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.253968253968254, | |
| "grad_norm": 2.7309439182281494, | |
| "learning_rate": 5.193713863024722e-06, | |
| "loss": 0.3948480784893036, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 0.3682626187801361, | |
| "learning_rate": 5.174195640872937e-06, | |
| "loss": 0.3254821300506592, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.258852258852259, | |
| "grad_norm": 1.932949423789978, | |
| "learning_rate": 5.154724466362473e-06, | |
| "loss": 0.43265148997306824, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.2612942612942613, | |
| "grad_norm": 1.3246240615844727, | |
| "learning_rate": 5.135300496737335e-06, | |
| "loss": 0.5352158546447754, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.2637362637362637, | |
| "grad_norm": 0.8921855688095093, | |
| "learning_rate": 5.115923888860321e-06, | |
| "loss": 0.6833795309066772, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.266178266178266, | |
| "grad_norm": 1.2048108577728271, | |
| "learning_rate": 5.096594799211748e-06, | |
| "loss": 0.6043341755867004, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.2686202686202686, | |
| "grad_norm": 1.5590717792510986, | |
| "learning_rate": 5.0773133838881806e-06, | |
| "loss": 0.6158211827278137, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.271062271062271, | |
| "grad_norm": 0.9362733364105225, | |
| "learning_rate": 5.058079798601184e-06, | |
| "loss": 0.7204128503799438, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.2735042735042734, | |
| "grad_norm": 1.0600636005401611, | |
| "learning_rate": 5.0388941986760675e-06, | |
| "loss": 0.32139068841934204, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.275946275946276, | |
| "grad_norm": 0.8406434059143066, | |
| "learning_rate": 5.019756739050606e-06, | |
| "loss": 0.29253455996513367, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.2783882783882783, | |
| "grad_norm": 2.3749077320098877, | |
| "learning_rate": 5.000667574273821e-06, | |
| "loss": 0.39995700120925903, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.2808302808302807, | |
| "grad_norm": 0.8223360180854797, | |
| "learning_rate": 4.981626858504718e-06, | |
| "loss": 0.45448631048202515, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.283272283272283, | |
| "grad_norm": 0.7664647698402405, | |
| "learning_rate": 4.962634745511027e-06, | |
| "loss": 0.42726626992225647, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 1.1275815963745117, | |
| "learning_rate": 4.943691388667989e-06, | |
| "loss": 0.4752141237258911, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.288156288156288, | |
| "grad_norm": 2.4123940467834473, | |
| "learning_rate": 4.924796940957099e-06, | |
| "loss": 0.13898348808288574, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.2905982905982905, | |
| "grad_norm": 1.461748480796814, | |
| "learning_rate": 4.905951554964876e-06, | |
| "loss": 0.6339101791381836, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.293040293040293, | |
| "grad_norm": 2.0306098461151123, | |
| "learning_rate": 4.887155382881625e-06, | |
| "loss": 0.347889244556427, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.2954822954822953, | |
| "grad_norm": 1.3482933044433594, | |
| "learning_rate": 4.868408576500216e-06, | |
| "loss": 0.340035080909729, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.2979242979242978, | |
| "grad_norm": 4.910120010375977, | |
| "learning_rate": 4.849711287214856e-06, | |
| "loss": 0.5293861031532288, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.3003663003663, | |
| "grad_norm": 1.0976754426956177, | |
| "learning_rate": 4.8310636660198616e-06, | |
| "loss": 0.31249868869781494, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.3028083028083026, | |
| "grad_norm": 1.3118927478790283, | |
| "learning_rate": 4.812465863508448e-06, | |
| "loss": 0.5040943026542664, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.305250305250305, | |
| "grad_norm": 0.9740425944328308, | |
| "learning_rate": 4.7939180298715055e-06, | |
| "loss": 0.42627787590026855, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 1.1387205123901367, | |
| "learning_rate": 4.775420314896384e-06, | |
| "loss": 0.44656771421432495, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.31013431013431, | |
| "grad_norm": 2.269031047821045, | |
| "learning_rate": 4.756972867965698e-06, | |
| "loss": 0.5736830830574036, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.3125763125763124, | |
| "grad_norm": 0.9688907265663147, | |
| "learning_rate": 4.738575838056104e-06, | |
| "loss": 0.4964962601661682, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.315018315018315, | |
| "grad_norm": 1.7838249206542969, | |
| "learning_rate": 4.7202293737371066e-06, | |
| "loss": 0.4222361445426941, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.317460317460317, | |
| "grad_norm": 1.0578351020812988, | |
| "learning_rate": 4.7019336231698576e-06, | |
| "loss": 0.5211227536201477, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.3199023199023197, | |
| "grad_norm": 1.8706358671188354, | |
| "learning_rate": 4.6836887341059525e-06, | |
| "loss": 0.8980540633201599, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.3223443223443225, | |
| "grad_norm": 1.151202917098999, | |
| "learning_rate": 4.6654948538862475e-06, | |
| "loss": 0.4475945234298706, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.324786324786325, | |
| "grad_norm": 4.294190406799316, | |
| "learning_rate": 4.647352129439665e-06, | |
| "loss": 0.251365065574646, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.3272283272283274, | |
| "grad_norm": 1.604580044746399, | |
| "learning_rate": 4.629260707282009e-06, | |
| "loss": 0.190834641456604, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.32967032967033, | |
| "grad_norm": 1.1880110502243042, | |
| "learning_rate": 4.6112207335147704e-06, | |
| "loss": 0.2842097878456116, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.3321123321123323, | |
| "grad_norm": 2.0477302074432373, | |
| "learning_rate": 4.593232353823968e-06, | |
| "loss": 0.23184801638126373, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.3345543345543347, | |
| "grad_norm": 1.7173128128051758, | |
| "learning_rate": 4.575295713478956e-06, | |
| "loss": 0.40144017338752747, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.336996336996337, | |
| "grad_norm": 0.9430311322212219, | |
| "learning_rate": 4.557410957331249e-06, | |
| "loss": 0.5639522075653076, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.3394383394383396, | |
| "grad_norm": 3.2917191982269287, | |
| "learning_rate": 4.539578229813372e-06, | |
| "loss": 0.636457622051239, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.341880341880342, | |
| "grad_norm": 1.405510663986206, | |
| "learning_rate": 4.521797674937672e-06, | |
| "loss": 0.26978304982185364, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.3443223443223444, | |
| "grad_norm": 2.574928045272827, | |
| "learning_rate": 4.5040694362951625e-06, | |
| "loss": 0.3309711515903473, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.346764346764347, | |
| "grad_norm": 1.7721152305603027, | |
| "learning_rate": 4.486393657054369e-06, | |
| "loss": 0.3379634618759155, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.3492063492063493, | |
| "grad_norm": 0.34488657116889954, | |
| "learning_rate": 4.468770479960171e-06, | |
| "loss": 0.2894682288169861, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.3516483516483517, | |
| "grad_norm": 1.060381531715393, | |
| "learning_rate": 4.451200047332638e-06, | |
| "loss": 0.44025763869285583, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.354090354090354, | |
| "grad_norm": 1.5222772359848022, | |
| "learning_rate": 4.433682501065897e-06, | |
| "loss": 0.3474840223789215, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.3565323565323566, | |
| "grad_norm": 2.951404094696045, | |
| "learning_rate": 4.416217982626981e-06, | |
| "loss": 0.3358984589576721, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 1.0801118612289429, | |
| "learning_rate": 4.398806633054675e-06, | |
| "loss": 0.3395053446292877, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.3614163614163615, | |
| "grad_norm": 2.127126693725586, | |
| "learning_rate": 4.381448592958394e-06, | |
| "loss": 0.5439938902854919, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.363858363858364, | |
| "grad_norm": 1.0053937435150146, | |
| "learning_rate": 4.36414400251704e-06, | |
| "loss": 0.2674437463283539, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.3663003663003663, | |
| "grad_norm": 0.9853598475456238, | |
| "learning_rate": 4.346893001477861e-06, | |
| "loss": 0.4141199290752411, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.3687423687423688, | |
| "grad_norm": 8.180671691894531, | |
| "learning_rate": 4.329695729155342e-06, | |
| "loss": 0.5360310673713684, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.371184371184371, | |
| "grad_norm": 0.22848689556121826, | |
| "learning_rate": 4.3125523244300686e-06, | |
| "loss": 0.25111788511276245, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.3736263736263736, | |
| "grad_norm": 1.5355631113052368, | |
| "learning_rate": 4.295462925747594e-06, | |
| "loss": 0.3430798351764679, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.376068376068376, | |
| "grad_norm": 1.6975699663162231, | |
| "learning_rate": 4.278427671117344e-06, | |
| "loss": 0.08609216660261154, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.3785103785103785, | |
| "grad_norm": 1.575578212738037, | |
| "learning_rate": 4.261446698111496e-06, | |
| "loss": 0.194163978099823, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 4.127973556518555, | |
| "learning_rate": 4.24452014386385e-06, | |
| "loss": 0.20009776949882507, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.3833943833943834, | |
| "grad_norm": 0.7139300107955933, | |
| "learning_rate": 4.22764814506874e-06, | |
| "loss": 0.12069036066532135, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.385836385836386, | |
| "grad_norm": 3.075773000717163, | |
| "learning_rate": 4.210830837979932e-06, | |
| "loss": 0.35760805010795593, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.3882783882783882, | |
| "grad_norm": 1.492324948310852, | |
| "learning_rate": 4.194068358409503e-06, | |
| "loss": 0.48620444536209106, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.3907203907203907, | |
| "grad_norm": 1.7053909301757812, | |
| "learning_rate": 4.17736084172677e-06, | |
| "loss": 0.20889446139335632, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.393162393162393, | |
| "grad_norm": 1.3225889205932617, | |
| "learning_rate": 4.160708422857178e-06, | |
| "loss": 0.5993058085441589, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.3956043956043955, | |
| "grad_norm": 1.3367353677749634, | |
| "learning_rate": 4.144111236281214e-06, | |
| "loss": 0.1960648149251938, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.398046398046398, | |
| "grad_norm": 2.359844446182251, | |
| "learning_rate": 4.127569416033332e-06, | |
| "loss": 0.5698574185371399, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.4004884004884004, | |
| "grad_norm": 1.1340882778167725, | |
| "learning_rate": 4.111083095700858e-06, | |
| "loss": 0.18890273571014404, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.402930402930403, | |
| "grad_norm": 2.4454874992370605, | |
| "learning_rate": 4.094652408422913e-06, | |
| "loss": 0.3097396492958069, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.4053724053724053, | |
| "grad_norm": 4.218069553375244, | |
| "learning_rate": 4.078277486889341e-06, | |
| "loss": 0.23327361047267914, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.4078144078144077, | |
| "grad_norm": 3.866490364074707, | |
| "learning_rate": 4.061958463339646e-06, | |
| "loss": 0.06529633700847626, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 0.4942020773887634, | |
| "learning_rate": 4.045695469561899e-06, | |
| "loss": 0.08752602338790894, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.4126984126984126, | |
| "grad_norm": 3.321356773376465, | |
| "learning_rate": 4.029488636891702e-06, | |
| "loss": 0.3558381199836731, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.415140415140415, | |
| "grad_norm": 3.152714729309082, | |
| "learning_rate": 4.013338096211109e-06, | |
| "loss": 0.3303931653499603, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.4175824175824174, | |
| "grad_norm": 0.6018658876419067, | |
| "learning_rate": 3.99724397794758e-06, | |
| "loss": 0.22131627798080444, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.42002442002442, | |
| "grad_norm": 1.3327726125717163, | |
| "learning_rate": 3.981206412072914e-06, | |
| "loss": 0.39478451013565063, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.4224664224664223, | |
| "grad_norm": 1.705815076828003, | |
| "learning_rate": 3.965225528102217e-06, | |
| "loss": 0.3109724521636963, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.4249084249084247, | |
| "grad_norm": 0.7618647217750549, | |
| "learning_rate": 3.949301455092845e-06, | |
| "loss": 0.5224888920783997, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.427350427350427, | |
| "grad_norm": 1.2163892984390259, | |
| "learning_rate": 3.933434321643356e-06, | |
| "loss": 0.4845066964626312, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.42979242979243, | |
| "grad_norm": 0.8843790292739868, | |
| "learning_rate": 3.917624255892489e-06, | |
| "loss": 0.5302805304527283, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.4322344322344325, | |
| "grad_norm": 1.2315729856491089, | |
| "learning_rate": 3.901871385518117e-06, | |
| "loss": 0.42821258306503296, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.434676434676435, | |
| "grad_norm": 0.9088804125785828, | |
| "learning_rate": 3.886175837736214e-06, | |
| "loss": 0.4940814673900604, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.4371184371184373, | |
| "grad_norm": 1.1520100831985474, | |
| "learning_rate": 3.870537739299836e-06, | |
| "loss": 0.3047824501991272, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.4395604395604398, | |
| "grad_norm": 0.7935906648635864, | |
| "learning_rate": 3.854957216498099e-06, | |
| "loss": 0.5371643900871277, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.442002442002442, | |
| "grad_norm": 1.0501606464385986, | |
| "learning_rate": 3.839434395155135e-06, | |
| "loss": 0.24889859557151794, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 1.7994686365127563, | |
| "learning_rate": 3.8239694006291194e-06, | |
| "loss": 0.45958831906318665, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.446886446886447, | |
| "grad_norm": 0.9377945065498352, | |
| "learning_rate": 3.8085623578112136e-06, | |
| "loss": 0.22220918536186218, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.4493284493284495, | |
| "grad_norm": 1.056534767150879, | |
| "learning_rate": 3.793213391124586e-06, | |
| "loss": 0.29667913913726807, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.451770451770452, | |
| "grad_norm": 1.055069088935852, | |
| "learning_rate": 3.7779226245233937e-06, | |
| "loss": 0.7430405616760254, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.4542124542124544, | |
| "grad_norm": 1.062638282775879, | |
| "learning_rate": 3.7626901814917927e-06, | |
| "loss": 0.3536508083343506, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.456654456654457, | |
| "grad_norm": 2.2568395137786865, | |
| "learning_rate": 3.747516185042922e-06, | |
| "loss": 0.2591190040111542, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.4590964590964592, | |
| "grad_norm": 1.5303833484649658, | |
| "learning_rate": 3.7324007577179283e-06, | |
| "loss": 0.5008297562599182, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 0.9226781725883484, | |
| "learning_rate": 3.7173440215849744e-06, | |
| "loss": 0.4963090121746063, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.463980463980464, | |
| "grad_norm": 0.9127579927444458, | |
| "learning_rate": 3.7023460982382355e-06, | |
| "loss": 0.5157759189605713, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.4664224664224665, | |
| "grad_norm": 7.223013401031494, | |
| "learning_rate": 3.687407108796942e-06, | |
| "loss": 0.4686001241207123, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.468864468864469, | |
| "grad_norm": 1.2899993658065796, | |
| "learning_rate": 3.672527173904388e-06, | |
| "loss": 0.25978168845176697, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.4713064713064714, | |
| "grad_norm": 5.451155662536621, | |
| "learning_rate": 3.6577064137269525e-06, | |
| "loss": 0.3640308380126953, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.473748473748474, | |
| "grad_norm": 10.173837661743164, | |
| "learning_rate": 3.6429449479531416e-06, | |
| "loss": 0.3720964193344116, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.4761904761904763, | |
| "grad_norm": 0.20691752433776855, | |
| "learning_rate": 3.6282428957926154e-06, | |
| "loss": 0.2083432972431183, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.4786324786324787, | |
| "grad_norm": 2.024094581604004, | |
| "learning_rate": 3.613600375975221e-06, | |
| "loss": 0.5114956498146057, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.481074481074481, | |
| "grad_norm": 1.2281562089920044, | |
| "learning_rate": 3.599017506750042e-06, | |
| "loss": 0.47537893056869507, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.4835164835164836, | |
| "grad_norm": 2.2216989994049072, | |
| "learning_rate": 3.5844944058844393e-06, | |
| "loss": 0.25453007221221924, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.485958485958486, | |
| "grad_norm": 2.591078281402588, | |
| "learning_rate": 3.570031190663098e-06, | |
| "loss": 0.5005137920379639, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.4884004884004884, | |
| "grad_norm": 0.48911339044570923, | |
| "learning_rate": 3.5556279778870862e-06, | |
| "loss": 0.5193389058113098, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.490842490842491, | |
| "grad_norm": 27.082082748413086, | |
| "learning_rate": 3.5412848838729075e-06, | |
| "loss": 0.5654491782188416, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.4932844932844933, | |
| "grad_norm": 1.6297937631607056, | |
| "learning_rate": 3.5270020244515583e-06, | |
| "loss": 0.5325220227241516, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.4957264957264957, | |
| "grad_norm": 0.9335009455680847, | |
| "learning_rate": 3.5127795149676014e-06, | |
| "loss": 0.38437139987945557, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.498168498168498, | |
| "grad_norm": 141.24978637695312, | |
| "learning_rate": 3.49861747027823e-06, | |
| "loss": 0.2638123035430908, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.5006105006105006, | |
| "grad_norm": 1.3640321493148804, | |
| "learning_rate": 3.484516004752334e-06, | |
| "loss": 0.4149170219898224, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.503052503052503, | |
| "grad_norm": 1.0066052675247192, | |
| "learning_rate": 3.4704752322695877e-06, | |
| "loss": 0.4781511425971985, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.5054945054945055, | |
| "grad_norm": 1.2308069467544556, | |
| "learning_rate": 3.456495266219525e-06, | |
| "loss": 0.7653157711029053, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.507936507936508, | |
| "grad_norm": 1.3373329639434814, | |
| "learning_rate": 3.442576219500614e-06, | |
| "loss": 0.36611488461494446, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.5103785103785103, | |
| "grad_norm": 1.555979609489441, | |
| "learning_rate": 3.428718204519369e-06, | |
| "loss": 0.531693696975708, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.5128205128205128, | |
| "grad_norm": 8.703025817871094, | |
| "learning_rate": 3.4149213331894193e-06, | |
| "loss": 0.18801343441009521, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.515262515262515, | |
| "grad_norm": 1.2803109884262085, | |
| "learning_rate": 3.4011857169306127e-06, | |
| "loss": 0.16657070815563202, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.5177045177045176, | |
| "grad_norm": 0.712373673915863, | |
| "learning_rate": 3.3875114666681235e-06, | |
| "loss": 0.2420540601015091, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.52014652014652, | |
| "grad_norm": 1.780391812324524, | |
| "learning_rate": 3.3738986928315474e-06, | |
| "loss": 0.4269709587097168, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.5225885225885225, | |
| "grad_norm": 1.2723828554153442, | |
| "learning_rate": 3.360347505354011e-06, | |
| "loss": 0.3732086420059204, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.525030525030525, | |
| "grad_norm": 2.761953353881836, | |
| "learning_rate": 3.3468580136712903e-06, | |
| "loss": 0.5551900863647461, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.5274725274725274, | |
| "grad_norm": 0.8927345275878906, | |
| "learning_rate": 3.333430326720921e-06, | |
| "loss": 0.5004504919052124, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.52991452991453, | |
| "grad_norm": 0.67017662525177, | |
| "learning_rate": 3.3200645529413165e-06, | |
| "loss": 0.31844204664230347, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.5323565323565322, | |
| "grad_norm": 1.6567728519439697, | |
| "learning_rate": 3.3067608002709006e-06, | |
| "loss": 0.592690646648407, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.5347985347985347, | |
| "grad_norm": 1.0990091562271118, | |
| "learning_rate": 3.2935191761472313e-06, | |
| "loss": 0.509267270565033, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.537240537240537, | |
| "grad_norm": 2.832087516784668, | |
| "learning_rate": 3.280339787506127e-06, | |
| "loss": 0.4890163540840149, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.5396825396825395, | |
| "grad_norm": 3.6818792819976807, | |
| "learning_rate": 3.2672227407808184e-06, | |
| "loss": 0.35127052664756775, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.542124542124542, | |
| "grad_norm": 0.9744904041290283, | |
| "learning_rate": 3.2541681419010716e-06, | |
| "loss": 0.4693216383457184, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.5445665445665444, | |
| "grad_norm": 0.9872434735298157, | |
| "learning_rate": 3.2411760962923434e-06, | |
| "loss": 0.47572940587997437, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.547008547008547, | |
| "grad_norm": 1.288815975189209, | |
| "learning_rate": 3.228246708874926e-06, | |
| "loss": 0.45491641759872437, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.5494505494505493, | |
| "grad_norm": 1.0426764488220215, | |
| "learning_rate": 3.2153800840631043e-06, | |
| "loss": 0.6177046298980713, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.5518925518925517, | |
| "grad_norm": 1.2259653806686401, | |
| "learning_rate": 3.202576325764307e-06, | |
| "loss": 0.45679447054862976, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.554334554334554, | |
| "grad_norm": 2.0075936317443848, | |
| "learning_rate": 3.1898355373782663e-06, | |
| "loss": 0.3028113842010498, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.5567765567765566, | |
| "grad_norm": 0.8422965407371521, | |
| "learning_rate": 3.177157821796191e-06, | |
| "loss": 0.2570323646068573, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.559218559218559, | |
| "grad_norm": 0.8695139288902283, | |
| "learning_rate": 3.1645432813999306e-06, | |
| "loss": 0.3652976155281067, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.5616605616605614, | |
| "grad_norm": 2.6163241863250732, | |
| "learning_rate": 3.1519920180611436e-06, | |
| "loss": 0.08200995624065399, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 0.7538577914237976, | |
| "learning_rate": 3.139504133140484e-06, | |
| "loss": 0.26613810658454895, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.5665445665445663, | |
| "grad_norm": 0.9928892254829407, | |
| "learning_rate": 3.127079727486781e-06, | |
| "loss": 0.39854198694229126, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.5689865689865687, | |
| "grad_norm": 2.9046833515167236, | |
| "learning_rate": 3.114718901436215e-06, | |
| "loss": 0.35459813475608826, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.8664820194244385, | |
| "learning_rate": 3.1024217548115195e-06, | |
| "loss": 0.3210771977901459, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.5738705738705736, | |
| "grad_norm": 1.5520901679992676, | |
| "learning_rate": 3.090188386921171e-06, | |
| "loss": 0.24245740473270416, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.576312576312576, | |
| "grad_norm": 1.7673155069351196, | |
| "learning_rate": 3.078018896558582e-06, | |
| "loss": 0.21324002742767334, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.578754578754579, | |
| "grad_norm": 0.730332612991333, | |
| "learning_rate": 3.0659133820013123e-06, | |
| "loss": 0.469443142414093, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.5811965811965814, | |
| "grad_norm": 1.5071324110031128, | |
| "learning_rate": 3.0538719410102612e-06, | |
| "loss": 0.16458410024642944, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.583638583638584, | |
| "grad_norm": 1.1855233907699585, | |
| "learning_rate": 3.0418946708288984e-06, | |
| "loss": 0.3730916976928711, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.586080586080586, | |
| "grad_norm": 1.2179559469223022, | |
| "learning_rate": 3.029981668182458e-06, | |
| "loss": 0.5398478507995605, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.5885225885225887, | |
| "grad_norm": 1.1000230312347412, | |
| "learning_rate": 3.0181330292771727e-06, | |
| "loss": 0.25115227699279785, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.590964590964591, | |
| "grad_norm": 1.437605857849121, | |
| "learning_rate": 3.0063488497994864e-06, | |
| "loss": 0.6454752087593079, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.5934065934065935, | |
| "grad_norm": 0.7121138572692871, | |
| "learning_rate": 2.994629224915288e-06, | |
| "loss": 0.30809617042541504, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.595848595848596, | |
| "grad_norm": 1.196258783340454, | |
| "learning_rate": 2.9829742492691436e-06, | |
| "loss": 0.1984136551618576, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.5982905982905984, | |
| "grad_norm": 3.140024423599243, | |
| "learning_rate": 2.971384016983522e-06, | |
| "loss": 0.4299178123474121, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.600732600732601, | |
| "grad_norm": 2.820770502090454, | |
| "learning_rate": 2.959858621658047e-06, | |
| "loss": 0.2969256043434143, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.6031746031746033, | |
| "grad_norm": 3.3160879611968994, | |
| "learning_rate": 2.94839815636874e-06, | |
| "loss": 0.2652299702167511, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.6056166056166057, | |
| "grad_norm": 0.7100194096565247, | |
| "learning_rate": 2.9370027136672536e-06, | |
| "loss": 0.34369128942489624, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.608058608058608, | |
| "grad_norm": 3.5660557746887207, | |
| "learning_rate": 2.925672385580145e-06, | |
| "loss": 0.30307111144065857, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.6105006105006106, | |
| "grad_norm": 0.9895382523536682, | |
| "learning_rate": 2.9144072636081233e-06, | |
| "loss": 0.2503519058227539, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.612942612942613, | |
| "grad_norm": 0.7191367745399475, | |
| "learning_rate": 2.9032074387253017e-06, | |
| "loss": 0.25583434104919434, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 1.4276951551437378, | |
| "learning_rate": 2.892073001378481e-06, | |
| "loss": 0.3618330955505371, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.617826617826618, | |
| "grad_norm": 2.0080482959747314, | |
| "learning_rate": 2.881004041486406e-06, | |
| "loss": 0.4887958765029907, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.6202686202686203, | |
| "grad_norm": 0.8838030099868774, | |
| "learning_rate": 2.8700006484390395e-06, | |
| "loss": 0.46932682394981384, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.6227106227106227, | |
| "grad_norm": 1.1301337480545044, | |
| "learning_rate": 2.8590629110968503e-06, | |
| "loss": 0.3209373652935028, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.625152625152625, | |
| "grad_norm": 1.117184042930603, | |
| "learning_rate": 2.8481909177900874e-06, | |
| "loss": 0.468944787979126, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.6275946275946276, | |
| "grad_norm": 1.6847853660583496, | |
| "learning_rate": 2.837384756318063e-06, | |
| "loss": 0.439802885055542, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.63003663003663, | |
| "grad_norm": 1.6028481721878052, | |
| "learning_rate": 2.826644513948456e-06, | |
| "loss": 0.48533153533935547, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.6324786324786325, | |
| "grad_norm": 2.249617576599121, | |
| "learning_rate": 2.8159702774166e-06, | |
| "loss": 0.5256586670875549, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.634920634920635, | |
| "grad_norm": 1.6403663158416748, | |
| "learning_rate": 2.8053621329247767e-06, | |
| "loss": 0.5299547910690308, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.6373626373626373, | |
| "grad_norm": 1.569277048110962, | |
| "learning_rate": 2.7948201661415307e-06, | |
| "loss": 0.2885707914829254, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.6398046398046398, | |
| "grad_norm": 1.2910041809082031, | |
| "learning_rate": 2.7843444622009746e-06, | |
| "loss": 0.34332627058029175, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.642246642246642, | |
| "grad_norm": 1.1258636713027954, | |
| "learning_rate": 2.773935105702096e-06, | |
| "loss": 0.3300524652004242, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.6446886446886446, | |
| "grad_norm": 1.1584712266921997, | |
| "learning_rate": 2.763592180708081e-06, | |
| "loss": 0.4990626871585846, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.647130647130647, | |
| "grad_norm": 0.8549714684486389, | |
| "learning_rate": 2.7533157707456336e-06, | |
| "loss": 0.42835402488708496, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.6495726495726495, | |
| "grad_norm": 0.7408347129821777, | |
| "learning_rate": 2.7431059588042945e-06, | |
| "loss": 0.504192590713501, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.652014652014652, | |
| "grad_norm": 1.2692267894744873, | |
| "learning_rate": 2.7329628273357815e-06, | |
| "loss": 0.5846405029296875, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.6544566544566544, | |
| "grad_norm": 1.1758378744125366, | |
| "learning_rate": 2.72288645825332e-06, | |
| "loss": 0.4775027632713318, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.656898656898657, | |
| "grad_norm": 1.020842432975769, | |
| "learning_rate": 2.7128769329309744e-06, | |
| "loss": 0.2678804397583008, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.659340659340659, | |
| "grad_norm": 0.7583962082862854, | |
| "learning_rate": 2.702934332203002e-06, | |
| "loss": 0.4422096908092499, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.6617826617826617, | |
| "grad_norm": 2.3237428665161133, | |
| "learning_rate": 2.6930587363631932e-06, | |
| "loss": 0.4233754575252533, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.664224664224664, | |
| "grad_norm": 0.6809400916099548, | |
| "learning_rate": 2.6832502251642223e-06, | |
| "loss": 0.40418240427970886, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.7269507050514221, | |
| "learning_rate": 2.6735088778170105e-06, | |
| "loss": 0.2588379979133606, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.669108669108669, | |
| "grad_norm": 1.2103101015090942, | |
| "learning_rate": 2.66383477299008e-06, | |
| "loss": 0.39823517203330994, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.6715506715506714, | |
| "grad_norm": 0.9755131006240845, | |
| "learning_rate": 2.6542279888089163e-06, | |
| "loss": 0.3795110881328583, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.6739926739926743, | |
| "grad_norm": 0.9968971610069275, | |
| "learning_rate": 2.6446886028553476e-06, | |
| "loss": 0.5400364995002747, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.6764346764346767, | |
| "grad_norm": 2.260093927383423, | |
| "learning_rate": 2.6352166921669076e-06, | |
| "loss": 0.5039065480232239, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.678876678876679, | |
| "grad_norm": 2.027021646499634, | |
| "learning_rate": 2.625812333236222e-06, | |
| "loss": 0.13939893245697021, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.6813186813186816, | |
| "grad_norm": 0.9278559684753418, | |
| "learning_rate": 2.61647560201038e-06, | |
| "loss": 0.33114153146743774, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.683760683760684, | |
| "grad_norm": 0.7097818851470947, | |
| "learning_rate": 2.6072065738903335e-06, | |
| "loss": 0.521342396736145, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.6862026862026864, | |
| "grad_norm": 1.291375756263733, | |
| "learning_rate": 2.5980053237302816e-06, | |
| "loss": 0.4681139588356018, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.688644688644689, | |
| "grad_norm": 4.184018611907959, | |
| "learning_rate": 2.588871925837062e-06, | |
| "loss": 0.28020548820495605, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.6910866910866913, | |
| "grad_norm": 2.3299784660339355, | |
| "learning_rate": 2.5798064539695604e-06, | |
| "loss": 0.5311964750289917, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.6935286935286937, | |
| "grad_norm": 2.1903867721557617, | |
| "learning_rate": 2.5708089813381088e-06, | |
| "loss": 0.12289441376924515, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.695970695970696, | |
| "grad_norm": 2.261828899383545, | |
| "learning_rate": 2.561879580603893e-06, | |
| "loss": 0.47109082341194153, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.6984126984126986, | |
| "grad_norm": 1.110669493675232, | |
| "learning_rate": 2.5530183238783728e-06, | |
| "loss": 0.3485221564769745, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.700854700854701, | |
| "grad_norm": 1.4882543087005615, | |
| "learning_rate": 2.5442252827226925e-06, | |
| "loss": 0.5045080184936523, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.7032967032967035, | |
| "grad_norm": 1.0571633577346802, | |
| "learning_rate": 2.5355005281471046e-06, | |
| "loss": 0.2372823804616928, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.705738705738706, | |
| "grad_norm": 0.6153679490089417, | |
| "learning_rate": 2.526844130610399e-06, | |
| "loss": 0.2721218168735504, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.7081807081807083, | |
| "grad_norm": 1.4203280210494995, | |
| "learning_rate": 2.5182561600193317e-06, | |
| "loss": 0.311516672372818, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.7106227106227108, | |
| "grad_norm": 2.5656776428222656, | |
| "learning_rate": 2.5097366857280636e-06, | |
| "loss": 0.1073763519525528, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.713064713064713, | |
| "grad_norm": 1.4745090007781982, | |
| "learning_rate": 2.501285776537593e-06, | |
| "loss": 0.358319491147995, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.7155067155067156, | |
| "grad_norm": 1.3464287519454956, | |
| "learning_rate": 2.4929035006952106e-06, | |
| "loss": 0.21015426516532898, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.717948717948718, | |
| "grad_norm": 1.07408607006073, | |
| "learning_rate": 2.4845899258939362e-06, | |
| "loss": 0.25736236572265625, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.7203907203907205, | |
| "grad_norm": 2.0369420051574707, | |
| "learning_rate": 2.4763451192719816e-06, | |
| "loss": 0.2484760284423828, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.722832722832723, | |
| "grad_norm": 1.062886357307434, | |
| "learning_rate": 2.4681691474122064e-06, | |
| "loss": 0.4695739150047302, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.7252747252747254, | |
| "grad_norm": 3.0891904830932617, | |
| "learning_rate": 2.4600620763415754e-06, | |
| "loss": 0.2893969714641571, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.727716727716728, | |
| "grad_norm": 0.8144769072532654, | |
| "learning_rate": 2.4520239715306325e-06, | |
| "loss": 0.5152880549430847, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.7301587301587302, | |
| "grad_norm": 1.6376501321792603, | |
| "learning_rate": 2.4440548978929678e-06, | |
| "loss": 0.7832448482513428, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.7326007326007327, | |
| "grad_norm": 0.9825433492660522, | |
| "learning_rate": 2.4361549197846914e-06, | |
| "loss": 0.376642107963562, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.735042735042735, | |
| "grad_norm": 1.621090054512024, | |
| "learning_rate": 2.42832410100392e-06, | |
| "loss": 0.26889967918395996, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.7374847374847375, | |
| "grad_norm": 0.8367129564285278, | |
| "learning_rate": 2.420562504790256e-06, | |
| "loss": 0.5269310474395752, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.73992673992674, | |
| "grad_norm": 2.0027148723602295, | |
| "learning_rate": 2.412870193824278e-06, | |
| "loss": 0.2715807557106018, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.7423687423687424, | |
| "grad_norm": 1.490946650505066, | |
| "learning_rate": 2.4052472302270365e-06, | |
| "loss": 0.2188037633895874, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.744810744810745, | |
| "grad_norm": 1.6017478704452515, | |
| "learning_rate": 2.3976936755595533e-06, | |
| "loss": 0.4869040846824646, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.7472527472527473, | |
| "grad_norm": 1.3607432842254639, | |
| "learning_rate": 2.390209590822319e-06, | |
| "loss": 0.40255841612815857, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.7496947496947497, | |
| "grad_norm": 1.5456528663635254, | |
| "learning_rate": 2.3827950364548034e-06, | |
| "loss": 0.6289904117584229, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.752136752136752, | |
| "grad_norm": 1.5753426551818848, | |
| "learning_rate": 2.375450072334972e-06, | |
| "loss": 0.5615298748016357, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.7545787545787546, | |
| "grad_norm": 1.4261977672576904, | |
| "learning_rate": 2.3681747577787924e-06, | |
| "loss": 0.2363334745168686, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.757020757020757, | |
| "grad_norm": 1.1819992065429688, | |
| "learning_rate": 2.3609691515397628e-06, | |
| "loss": 0.4858379364013672, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.7594627594627594, | |
| "grad_norm": 1.9267686605453491, | |
| "learning_rate": 2.3538333118084396e-06, | |
| "loss": 0.5177884697914124, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 1.1344858407974243, | |
| "learning_rate": 2.3467672962119565e-06, | |
| "loss": 0.5373342037200928, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.7643467643467643, | |
| "grad_norm": 0.8637273907661438, | |
| "learning_rate": 2.3397711618135725e-06, | |
| "loss": 0.43640759587287903, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.7667887667887667, | |
| "grad_norm": 1.145462155342102, | |
| "learning_rate": 2.332844965112201e-06, | |
| "loss": 0.3964022099971771, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.5111590623855591, | |
| "learning_rate": 2.3259887620419573e-06, | |
| "loss": 0.3127731680870056, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.7716727716727716, | |
| "grad_norm": 0.791425347328186, | |
| "learning_rate": 2.3192026079717086e-06, | |
| "loss": 0.2613333463668823, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.774114774114774, | |
| "grad_norm": 0.23441043496131897, | |
| "learning_rate": 2.3124865577046252e-06, | |
| "loss": 0.07839272171258926, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.7765567765567765, | |
| "grad_norm": 1.0026205778121948, | |
| "learning_rate": 2.3058406654777355e-06, | |
| "loss": 0.502284824848175, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.778998778998779, | |
| "grad_norm": 0.9165741801261902, | |
| "learning_rate": 2.299264984961492e-06, | |
| "loss": 0.6292468905448914, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.7814407814407813, | |
| "grad_norm": 1.3016325235366821, | |
| "learning_rate": 2.2927595692593366e-06, | |
| "loss": 0.3484017252922058, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.7838827838827838, | |
| "grad_norm": 1.573944091796875, | |
| "learning_rate": 2.286324470907269e-06, | |
| "loss": 0.18759427964687347, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.786324786324786, | |
| "grad_norm": 2.0719950199127197, | |
| "learning_rate": 2.279959741873426e-06, | |
| "loss": 0.419060617685318, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.7887667887667886, | |
| "grad_norm": 1.6407965421676636, | |
| "learning_rate": 2.2736654335576634e-06, | |
| "loss": 0.4783077836036682, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.791208791208791, | |
| "grad_norm": 1.0861320495605469, | |
| "learning_rate": 2.267441596791132e-06, | |
| "loss": 0.4703105390071869, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.7936507936507935, | |
| "grad_norm": 0.9553175568580627, | |
| "learning_rate": 2.2612882818358784e-06, | |
| "loss": 0.41585975885391235, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.796092796092796, | |
| "grad_norm": 9.468893051147461, | |
| "learning_rate": 2.2552055383844327e-06, | |
| "loss": 0.08420296758413315, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.7985347985347984, | |
| "grad_norm": 2.4556336402893066, | |
| "learning_rate": 2.2491934155594063e-06, | |
| "loss": 0.35032370686531067, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.800976800976801, | |
| "grad_norm": 1.1944650411605835, | |
| "learning_rate": 2.243251961913099e-06, | |
| "loss": 0.36088746786117554, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.8034188034188032, | |
| "grad_norm": 0.9773551821708679, | |
| "learning_rate": 2.2373812254271074e-06, | |
| "loss": 0.42339953780174255, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.8058608058608057, | |
| "grad_norm": 1.2944077253341675, | |
| "learning_rate": 2.231581253511929e-06, | |
| "loss": 0.1882065087556839, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.808302808302808, | |
| "grad_norm": 1.328771948814392, | |
| "learning_rate": 2.2258520930065902e-06, | |
| "loss": 0.33834829926490784, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.8107448107448105, | |
| "grad_norm": 1.5796797275543213, | |
| "learning_rate": 2.2201937901782632e-06, | |
| "loss": 0.5746235847473145, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.813186813186813, | |
| "grad_norm": 0.1405964195728302, | |
| "learning_rate": 2.2146063907218928e-06, | |
| "loss": 0.2884528338909149, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.8156288156288154, | |
| "grad_norm": 0.7891967296600342, | |
| "learning_rate": 2.2090899397598235e-06, | |
| "loss": 0.34547799825668335, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.818070818070818, | |
| "grad_norm": 1.0902297496795654, | |
| "learning_rate": 2.2036444818414424e-06, | |
| "loss": 0.4068155288696289, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 1.061621904373169, | |
| "learning_rate": 2.198270060942815e-06, | |
| "loss": 0.4539620876312256, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.8229548229548227, | |
| "grad_norm": 0.9649152755737305, | |
| "learning_rate": 2.192966720466328e-06, | |
| "loss": 0.22723491489887238, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 2.825396825396825, | |
| "grad_norm": 10.881244659423828, | |
| "learning_rate": 2.1877345032403458e-06, | |
| "loss": 0.287578284740448, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 2.8278388278388276, | |
| "grad_norm": 2.314340829849243, | |
| "learning_rate": 2.182573451518859e-06, | |
| "loss": 0.4537888169288635, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 2.8302808302808304, | |
| "grad_norm": 1.7877088785171509, | |
| "learning_rate": 2.1774836069811415e-06, | |
| "loss": 0.3850943446159363, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 2.832722832722833, | |
| "grad_norm": 0.8207268714904785, | |
| "learning_rate": 2.1724650107314217e-06, | |
| "loss": 0.22680553793907166, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.8351648351648353, | |
| "grad_norm": 1.7450029850006104, | |
| "learning_rate": 2.1675177032985435e-06, | |
| "loss": 0.34959569573402405, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 2.8376068376068377, | |
| "grad_norm": 0.28571420907974243, | |
| "learning_rate": 2.1626417246356398e-06, | |
| "loss": 0.08046525716781616, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 2.84004884004884, | |
| "grad_norm": 1.3986101150512695, | |
| "learning_rate": 2.1578371141198154e-06, | |
| "loss": 0.3989933431148529, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 2.8424908424908426, | |
| "grad_norm": 1.5185210704803467, | |
| "learning_rate": 2.15310391055182e-06, | |
| "loss": 0.27708202600479126, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 2.844932844932845, | |
| "grad_norm": 2.8958606719970703, | |
| "learning_rate": 2.1484421521557453e-06, | |
| "loss": 0.24901802837848663, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.8473748473748475, | |
| "grad_norm": 1.108059048652649, | |
| "learning_rate": 2.143851876578706e-06, | |
| "loss": 0.45619091391563416, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 2.84981684981685, | |
| "grad_norm": 1.0437735319137573, | |
| "learning_rate": 2.1393331208905436e-06, | |
| "loss": 0.07932747900485992, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 2.8522588522588523, | |
| "grad_norm": 1.237439513206482, | |
| "learning_rate": 2.134885921583522e-06, | |
| "loss": 0.5910269021987915, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 2.8547008547008548, | |
| "grad_norm": 1.1078741550445557, | |
| "learning_rate": 2.1305103145720383e-06, | |
| "loss": 0.3153696656227112, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 1.034421682357788, | |
| "learning_rate": 2.1262063351923255e-06, | |
| "loss": 0.47363409399986267, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.8595848595848596, | |
| "grad_norm": 1.1710708141326904, | |
| "learning_rate": 2.121974018202172e-06, | |
| "loss": 0.48734188079833984, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 2.862026862026862, | |
| "grad_norm": 2.568005084991455, | |
| "learning_rate": 2.1178133977806413e-06, | |
| "loss": 0.19048890471458435, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 2.8644688644688645, | |
| "grad_norm": 1.4728940725326538, | |
| "learning_rate": 2.113724507527794e-06, | |
| "loss": 0.6129634976387024, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 2.866910866910867, | |
| "grad_norm": 0.22239279747009277, | |
| "learning_rate": 2.1097073804644163e-06, | |
| "loss": 0.2763885259628296, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 2.8693528693528694, | |
| "grad_norm": 0.6631549000740051, | |
| "learning_rate": 2.105762049031753e-06, | |
| "loss": 0.2500677704811096, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.871794871794872, | |
| "grad_norm": 1.0234497785568237, | |
| "learning_rate": 2.1018885450912487e-06, | |
| "loss": 0.45614075660705566, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.8742368742368742, | |
| "grad_norm": 1.8352830410003662, | |
| "learning_rate": 2.098086899924288e-06, | |
| "loss": 0.3945198953151703, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 2.8766788766788767, | |
| "grad_norm": 0.8980585932731628, | |
| "learning_rate": 2.0943571442319437e-06, | |
| "loss": 0.49924108386039734, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 2.879120879120879, | |
| "grad_norm": 25.131999969482422, | |
| "learning_rate": 2.090699308134726e-06, | |
| "loss": 0.4753328263759613, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 2.8815628815628815, | |
| "grad_norm": 1.648654818534851, | |
| "learning_rate": 2.0871134211723417e-06, | |
| "loss": 0.23788021504878998, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.884004884004884, | |
| "grad_norm": 1.9093987941741943, | |
| "learning_rate": 2.0835995123034603e-06, | |
| "loss": 0.32568857073783875, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 2.8864468864468864, | |
| "grad_norm": 1.0956945419311523, | |
| "learning_rate": 2.0801576099054696e-06, | |
| "loss": 0.6228987574577332, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 0.992882490158081, | |
| "learning_rate": 2.0767877417742564e-06, | |
| "loss": 0.39544668793678284, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 2.8913308913308913, | |
| "grad_norm": 4.350165367126465, | |
| "learning_rate": 2.0734899351239744e-06, | |
| "loss": 0.3747745156288147, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 2.8937728937728937, | |
| "grad_norm": 1.0189871788024902, | |
| "learning_rate": 2.0702642165868326e-06, | |
| "loss": 0.3083977997303009, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.896214896214896, | |
| "grad_norm": 1.012895107269287, | |
| "learning_rate": 2.0671106122128717e-06, | |
| "loss": 0.388817697763443, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 2.8986568986568986, | |
| "grad_norm": 0.2986360788345337, | |
| "learning_rate": 2.064029147469759e-06, | |
| "loss": 0.3050660490989685, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.901098901098901, | |
| "grad_norm": 3.99959397315979, | |
| "learning_rate": 2.0610198472425817e-06, | |
| "loss": 0.42830216884613037, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.9035409035409034, | |
| "grad_norm": 0.9284391403198242, | |
| "learning_rate": 2.0580827358336447e-06, | |
| "loss": 0.4124550223350525, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.905982905982906, | |
| "grad_norm": 1.0101871490478516, | |
| "learning_rate": 2.055217836962276e-06, | |
| "loss": 0.34032320976257324, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.9084249084249083, | |
| "grad_norm": 2.9604995250701904, | |
| "learning_rate": 2.0524251737646367e-06, | |
| "loss": 0.5842119455337524, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.9108669108669107, | |
| "grad_norm": 1.806335687637329, | |
| "learning_rate": 2.049704768793527e-06, | |
| "loss": 0.308889776468277, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.913308913308913, | |
| "grad_norm": 1.2805176973342896, | |
| "learning_rate": 2.0470566440182126e-06, | |
| "loss": 0.736882746219635, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.9157509157509156, | |
| "grad_norm": 1.484055995941162, | |
| "learning_rate": 2.0444808208242414e-06, | |
| "loss": 0.3669341504573822, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.918192918192918, | |
| "grad_norm": 2.3404009342193604, | |
| "learning_rate": 2.041977320013275e-06, | |
| "loss": 0.303989052772522, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.9206349206349205, | |
| "grad_norm": 4.0918097496032715, | |
| "learning_rate": 2.0395461618029175e-06, | |
| "loss": 0.4449572265148163, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.9230769230769234, | |
| "grad_norm": 1.0222722291946411, | |
| "learning_rate": 2.0371873658265546e-06, | |
| "loss": 0.31565719842910767, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.925518925518926, | |
| "grad_norm": 1.7059550285339355, | |
| "learning_rate": 2.0349009511331912e-06, | |
| "loss": 0.24595557153224945, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.927960927960928, | |
| "grad_norm": 3.1395483016967773, | |
| "learning_rate": 2.032686936187305e-06, | |
| "loss": 0.30839934945106506, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.9304029304029307, | |
| "grad_norm": 2.6876676082611084, | |
| "learning_rate": 2.0305453388686876e-06, | |
| "loss": 0.32078707218170166, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.932844932844933, | |
| "grad_norm": 1.2002466917037964, | |
| "learning_rate": 2.0284761764723087e-06, | |
| "loss": 0.27718839049339294, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.9352869352869355, | |
| "grad_norm": 1.524316668510437, | |
| "learning_rate": 2.026479465708171e-06, | |
| "loss": 0.18042829632759094, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.937728937728938, | |
| "grad_norm": 2.9133212566375732, | |
| "learning_rate": 2.0245552227011777e-06, | |
| "loss": 0.5652621984481812, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.9401709401709404, | |
| "grad_norm": 1.6217875480651855, | |
| "learning_rate": 2.022703462991003e-06, | |
| "loss": 0.28077784180641174, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.942612942612943, | |
| "grad_norm": 0.957901656627655, | |
| "learning_rate": 2.0209242015319625e-06, | |
| "loss": 0.312043696641922, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.9450549450549453, | |
| "grad_norm": 0.7723997235298157, | |
| "learning_rate": 2.0192174526928982e-06, | |
| "loss": 0.42037639021873474, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.9474969474969477, | |
| "grad_norm": 1.0776695013046265, | |
| "learning_rate": 2.0175832302570575e-06, | |
| "loss": 0.5173778533935547, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.94993894993895, | |
| "grad_norm": 0.926655650138855, | |
| "learning_rate": 2.016021547421984e-06, | |
| "loss": 0.46436506509780884, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.9523809523809526, | |
| "grad_norm": 1.5396034717559814, | |
| "learning_rate": 2.0145324167994134e-06, | |
| "loss": 0.24875374138355255, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.954822954822955, | |
| "grad_norm": 1.1180499792099, | |
| "learning_rate": 2.0131158504151655e-06, | |
| "loss": 0.35978463292121887, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.9572649572649574, | |
| "grad_norm": 0.9617190957069397, | |
| "learning_rate": 2.0117718597090543e-06, | |
| "loss": 0.3947286605834961, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.95970695970696, | |
| "grad_norm": 1.0433496236801147, | |
| "learning_rate": 2.010500455534788e-06, | |
| "loss": 0.28263401985168457, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.9621489621489623, | |
| "grad_norm": 1.070198893547058, | |
| "learning_rate": 2.0093016481598885e-06, | |
| "loss": 0.5800071954727173, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.9645909645909647, | |
| "grad_norm": 3.0985279083251953, | |
| "learning_rate": 2.0081754472656034e-06, | |
| "loss": 0.1977805346250534, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.967032967032967, | |
| "grad_norm": 1.0906306505203247, | |
| "learning_rate": 2.0071218619468327e-06, | |
| "loss": 0.3762721121311188, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.9694749694749696, | |
| "grad_norm": 0.7913962602615356, | |
| "learning_rate": 2.0061409007120475e-06, | |
| "loss": 0.3768196403980255, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.971916971916972, | |
| "grad_norm": 1.3056226968765259, | |
| "learning_rate": 2.005232571483231e-06, | |
| "loss": 0.46781641244888306, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.9743589743589745, | |
| "grad_norm": 1.005242109298706, | |
| "learning_rate": 2.0043968815958075e-06, | |
| "loss": 0.25440388917922974, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.976800976800977, | |
| "grad_norm": 3.3108999729156494, | |
| "learning_rate": 2.003633837798584e-06, | |
| "loss": 0.12983591854572296, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.9792429792429793, | |
| "grad_norm": 1.743328332901001, | |
| "learning_rate": 2.0029434462537e-06, | |
| "loss": 0.43715769052505493, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.9816849816849818, | |
| "grad_norm": 1.05440092086792, | |
| "learning_rate": 2.002325712536572e-06, | |
| "loss": 0.4317605495452881, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.984126984126984, | |
| "grad_norm": 2.774752616882324, | |
| "learning_rate": 2.001780641635854e-06, | |
| "loss": 0.39571458101272583, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.9865689865689866, | |
| "grad_norm": 1.0296354293823242, | |
| "learning_rate": 2.001308237953393e-06, | |
| "loss": 0.4417667090892792, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.989010989010989, | |
| "grad_norm": 1.3123754262924194, | |
| "learning_rate": 2.000908505304195e-06, | |
| "loss": 0.5195387601852417, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.9914529914529915, | |
| "grad_norm": 2.177339553833008, | |
| "learning_rate": 2.0005814469163937e-06, | |
| "loss": 0.19710102677345276, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.993894993894994, | |
| "grad_norm": 1.543820858001709, | |
| "learning_rate": 2.0003270654312266e-06, | |
| "loss": 0.4630212187767029, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.9963369963369964, | |
| "grad_norm": 1.5547709465026855, | |
| "learning_rate": 2.000145362903009e-06, | |
| "loss": 0.6292054057121277, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.998778998778999, | |
| "grad_norm": 0.33734217286109924, | |
| "learning_rate": 2.0000363407991222e-06, | |
| "loss": 0.16045792400836945, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2457, | |
| "total_flos": 2.578606960937009e+18, | |
| "train_loss": 0.8228938954362648, | |
| "train_runtime": 8271.5959, | |
| "train_samples_per_second": 4.753, | |
| "train_steps_per_second": 0.297 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2457, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.578606960937009e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |