Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-135 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-135 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-135") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-135") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-135") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-135 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-135" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-135", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-135
- SGLang
How to use furproxy/9b-135 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-135" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-135", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-135" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-135", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-135 with Docker Model Runner:
docker model run hf.co/furproxy/9b-135
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 3564, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0016835016835016834, | |
| "grad_norm": 9.827384948730469, | |
| "learning_rate": 1.1173184357541899e-08, | |
| "loss": 1.7055253982543945, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.003367003367003367, | |
| "grad_norm": 9.42782211303711, | |
| "learning_rate": 3.3519553072625695e-08, | |
| "loss": 1.2431578636169434, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005050505050505051, | |
| "grad_norm": 9.667145729064941, | |
| "learning_rate": 5.586592178770949e-08, | |
| "loss": 1.6887383460998535, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.006734006734006734, | |
| "grad_norm": 11.942709922790527, | |
| "learning_rate": 7.82122905027933e-08, | |
| "loss": 1.6064767837524414, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.008417508417508417, | |
| "grad_norm": 131.6094207763672, | |
| "learning_rate": 1.005586592178771e-07, | |
| "loss": 4.499759674072266, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.010101010101010102, | |
| "grad_norm": 6.955765724182129, | |
| "learning_rate": 1.2290502793296089e-07, | |
| "loss": 1.9788310527801514, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.011784511784511785, | |
| "grad_norm": 4.201331615447998, | |
| "learning_rate": 1.452513966480447e-07, | |
| "loss": 1.6753560304641724, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.013468013468013467, | |
| "grad_norm": 17.091062545776367, | |
| "learning_rate": 1.6759776536312846e-07, | |
| "loss": 1.6581202745437622, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.015151515151515152, | |
| "grad_norm": 22.55893325805664, | |
| "learning_rate": 1.8994413407821228e-07, | |
| "loss": 2.7158942222595215, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.016835016835016835, | |
| "grad_norm": 6.976036548614502, | |
| "learning_rate": 2.122905027932961e-07, | |
| "loss": 1.9487460851669312, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.018518518518518517, | |
| "grad_norm": 4.87603759765625, | |
| "learning_rate": 2.3463687150837988e-07, | |
| "loss": 1.845729947090149, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.020202020202020204, | |
| "grad_norm": 13.902255058288574, | |
| "learning_rate": 2.5698324022346367e-07, | |
| "loss": 3.498323917388916, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.021885521885521887, | |
| "grad_norm": 14.09145450592041, | |
| "learning_rate": 2.7932960893854745e-07, | |
| "loss": 2.7927517890930176, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.02356902356902357, | |
| "grad_norm": 12.507741928100586, | |
| "learning_rate": 3.016759776536313e-07, | |
| "loss": 2.1394832134246826, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.025252525252525252, | |
| "grad_norm": 50.04438018798828, | |
| "learning_rate": 3.240223463687151e-07, | |
| "loss": 3.230577230453491, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.026936026936026935, | |
| "grad_norm": 22.915058135986328, | |
| "learning_rate": 3.4636871508379887e-07, | |
| "loss": 1.7826504707336426, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.02861952861952862, | |
| "grad_norm": 34.94866943359375, | |
| "learning_rate": 3.6871508379888266e-07, | |
| "loss": 3.590939998626709, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.030303030303030304, | |
| "grad_norm": 9.724323272705078, | |
| "learning_rate": 3.9106145251396645e-07, | |
| "loss": 1.9341622591018677, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.03198653198653199, | |
| "grad_norm": 16.15651512145996, | |
| "learning_rate": 4.134078212290503e-07, | |
| "loss": 1.4625201225280762, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.03367003367003367, | |
| "grad_norm": 7.4519453048706055, | |
| "learning_rate": 4.35754189944134e-07, | |
| "loss": 2.242250919342041, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03535353535353535, | |
| "grad_norm": 6.571437835693359, | |
| "learning_rate": 4.5810055865921786e-07, | |
| "loss": 2.679516315460205, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.037037037037037035, | |
| "grad_norm": 19.185373306274414, | |
| "learning_rate": 4.804469273743016e-07, | |
| "loss": 2.1858067512512207, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.03872053872053872, | |
| "grad_norm": 18.07056999206543, | |
| "learning_rate": 5.027932960893855e-07, | |
| "loss": 1.433751106262207, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.04040404040404041, | |
| "grad_norm": 24.015710830688477, | |
| "learning_rate": 5.251396648044693e-07, | |
| "loss": 2.102412700653076, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.04208754208754209, | |
| "grad_norm": 22.281003952026367, | |
| "learning_rate": 5.474860335195531e-07, | |
| "loss": 1.8496794700622559, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04377104377104377, | |
| "grad_norm": 16.242393493652344, | |
| "learning_rate": 5.698324022346367e-07, | |
| "loss": 1.9199731349945068, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.045454545454545456, | |
| "grad_norm": 11.205278396606445, | |
| "learning_rate": 5.921787709497206e-07, | |
| "loss": 1.8013508319854736, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.04713804713804714, | |
| "grad_norm": 4.4281840324401855, | |
| "learning_rate": 6.145251396648044e-07, | |
| "loss": 1.5387322902679443, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.04882154882154882, | |
| "grad_norm": 27.68507194519043, | |
| "learning_rate": 6.368715083798882e-07, | |
| "loss": 1.7617017030715942, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.050505050505050504, | |
| "grad_norm": 13.444940567016602, | |
| "learning_rate": 6.59217877094972e-07, | |
| "loss": 1.5345146656036377, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05218855218855219, | |
| "grad_norm": 12.37048625946045, | |
| "learning_rate": 6.815642458100558e-07, | |
| "loss": 1.5472785234451294, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.05387205387205387, | |
| "grad_norm": 5.660282135009766, | |
| "learning_rate": 7.039106145251397e-07, | |
| "loss": 1.3724396228790283, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": 32.10633087158203, | |
| "learning_rate": 7.262569832402235e-07, | |
| "loss": 1.7364461421966553, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.05723905723905724, | |
| "grad_norm": 15.033787727355957, | |
| "learning_rate": 7.486033519553073e-07, | |
| "loss": 1.5618245601654053, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.058922558922558925, | |
| "grad_norm": 5.500316143035889, | |
| "learning_rate": 7.709497206703909e-07, | |
| "loss": 1.4692459106445312, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06060606060606061, | |
| "grad_norm": 7.862852096557617, | |
| "learning_rate": 7.932960893854748e-07, | |
| "loss": 1.767068862915039, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.06228956228956229, | |
| "grad_norm": 3.3375768661499023, | |
| "learning_rate": 8.156424581005586e-07, | |
| "loss": 1.5882585048675537, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.06397306397306397, | |
| "grad_norm": 4.3638529777526855, | |
| "learning_rate": 8.379888268156424e-07, | |
| "loss": 1.0791618824005127, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.06565656565656566, | |
| "grad_norm": 3.2826614379882812, | |
| "learning_rate": 8.603351955307262e-07, | |
| "loss": 1.623827338218689, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.06734006734006734, | |
| "grad_norm": 13.223998069763184, | |
| "learning_rate": 8.8268156424581e-07, | |
| "loss": 1.4189568758010864, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06902356902356903, | |
| "grad_norm": 8.176948547363281, | |
| "learning_rate": 9.050279329608939e-07, | |
| "loss": 1.5663306713104248, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0707070707070707, | |
| "grad_norm": 8.477921485900879, | |
| "learning_rate": 9.273743016759777e-07, | |
| "loss": 1.3473039865493774, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.0723905723905724, | |
| "grad_norm": 5.039812088012695, | |
| "learning_rate": 9.497206703910615e-07, | |
| "loss": 1.4909709692001343, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.07407407407407407, | |
| "grad_norm": 4.436509132385254, | |
| "learning_rate": 9.720670391061452e-07, | |
| "loss": 1.3051445484161377, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.07575757575757576, | |
| "grad_norm": 10.7329740524292, | |
| "learning_rate": 9.94413407821229e-07, | |
| "loss": 1.4471063613891602, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07744107744107744, | |
| "grad_norm": 48.17202377319336, | |
| "learning_rate": 1.0167597765363128e-06, | |
| "loss": 1.1504158973693848, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.07912457912457913, | |
| "grad_norm": 9.630391120910645, | |
| "learning_rate": 1.0391061452513965e-06, | |
| "loss": 1.238828182220459, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.08080808080808081, | |
| "grad_norm": 3.6707308292388916, | |
| "learning_rate": 1.0614525139664804e-06, | |
| "loss": 1.29024076461792, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.08249158249158249, | |
| "grad_norm": 20.06619644165039, | |
| "learning_rate": 1.0837988826815643e-06, | |
| "loss": 1.2375919818878174, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.08417508417508418, | |
| "grad_norm": 6.117098331451416, | |
| "learning_rate": 1.106145251396648e-06, | |
| "loss": 1.2162528038024902, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08585858585858586, | |
| "grad_norm": 7.965595245361328, | |
| "learning_rate": 1.1284916201117319e-06, | |
| "loss": 1.0878969430923462, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.08754208754208755, | |
| "grad_norm": 3.471269369125366, | |
| "learning_rate": 1.1508379888268155e-06, | |
| "loss": 0.8488566875457764, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.08922558922558922, | |
| "grad_norm": 19.03371238708496, | |
| "learning_rate": 1.1731843575418994e-06, | |
| "loss": 0.9605998992919922, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 4.8145551681518555, | |
| "learning_rate": 1.1955307262569831e-06, | |
| "loss": 1.2580342292785645, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.09259259259259259, | |
| "grad_norm": 12.215010643005371, | |
| "learning_rate": 1.217877094972067e-06, | |
| "loss": 0.8208008408546448, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.09427609427609428, | |
| "grad_norm": 5.212827682495117, | |
| "learning_rate": 1.2402234636871507e-06, | |
| "loss": 1.2487308979034424, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.09595959595959595, | |
| "grad_norm": 110.1784439086914, | |
| "learning_rate": 1.2625698324022344e-06, | |
| "loss": 1.0615664720535278, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.09764309764309764, | |
| "grad_norm": 8.633198738098145, | |
| "learning_rate": 1.2849162011173185e-06, | |
| "loss": 0.7479297518730164, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.09932659932659933, | |
| "grad_norm": 3.4412970542907715, | |
| "learning_rate": 1.3072625698324022e-06, | |
| "loss": 1.1516764163970947, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.10101010101010101, | |
| "grad_norm": 2.8980441093444824, | |
| "learning_rate": 1.329608938547486e-06, | |
| "loss": 1.0023488998413086, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1026936026936027, | |
| "grad_norm": 4.491576671600342, | |
| "learning_rate": 1.3519553072625697e-06, | |
| "loss": 1.207779884338379, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.10437710437710437, | |
| "grad_norm": 5.334079742431641, | |
| "learning_rate": 1.3743016759776536e-06, | |
| "loss": 0.8073678612709045, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.10606060606060606, | |
| "grad_norm": 5.402129650115967, | |
| "learning_rate": 1.3966480446927373e-06, | |
| "loss": 0.7180484533309937, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.10774410774410774, | |
| "grad_norm": 33.15776824951172, | |
| "learning_rate": 1.4189944134078212e-06, | |
| "loss": 1.076992392539978, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.10942760942760943, | |
| "grad_norm": 12.190916061401367, | |
| "learning_rate": 1.441340782122905e-06, | |
| "loss": 0.9793660640716553, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 5.5417070388793945, | |
| "learning_rate": 1.4636871508379886e-06, | |
| "loss": 0.9299952387809753, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.1127946127946128, | |
| "grad_norm": 3.002917766571045, | |
| "learning_rate": 1.4860335195530727e-06, | |
| "loss": 1.1973538398742676, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.11447811447811448, | |
| "grad_norm": 13.795450210571289, | |
| "learning_rate": 1.5083798882681564e-06, | |
| "loss": 1.1933711767196655, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.11616161616161616, | |
| "grad_norm": 3.4793336391448975, | |
| "learning_rate": 1.5307262569832403e-06, | |
| "loss": 1.5386559963226318, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.11784511784511785, | |
| "grad_norm": 9.980926513671875, | |
| "learning_rate": 1.553072625698324e-06, | |
| "loss": 1.125044584274292, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11952861952861953, | |
| "grad_norm": 4.957187175750732, | |
| "learning_rate": 1.5754189944134078e-06, | |
| "loss": 1.0593317747116089, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.12121212121212122, | |
| "grad_norm": 14.749825477600098, | |
| "learning_rate": 1.5977653631284915e-06, | |
| "loss": 0.9547094702720642, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.12289562289562289, | |
| "grad_norm": 3.5250778198242188, | |
| "learning_rate": 1.6201117318435752e-06, | |
| "loss": 1.1345624923706055, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.12457912457912458, | |
| "grad_norm": 3.4003188610076904, | |
| "learning_rate": 1.642458100558659e-06, | |
| "loss": 0.9924101829528809, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.12626262626262627, | |
| "grad_norm": 18.434391021728516, | |
| "learning_rate": 1.6648044692737428e-06, | |
| "loss": 1.2128210067749023, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12794612794612795, | |
| "grad_norm": 6.9610066413879395, | |
| "learning_rate": 1.6871508379888269e-06, | |
| "loss": 0.9494305849075317, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.12962962962962962, | |
| "grad_norm": 42.241188049316406, | |
| "learning_rate": 1.7094972067039106e-06, | |
| "loss": 1.1769180297851562, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.13131313131313133, | |
| "grad_norm": 19.53082275390625, | |
| "learning_rate": 1.7318435754189945e-06, | |
| "loss": 1.0955569744110107, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.132996632996633, | |
| "grad_norm": 4.005194187164307, | |
| "learning_rate": 1.7541899441340781e-06, | |
| "loss": 1.0531185865402222, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.13468013468013468, | |
| "grad_norm": 5.709774494171143, | |
| "learning_rate": 1.776536312849162e-06, | |
| "loss": 1.1533485651016235, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.13636363636363635, | |
| "grad_norm": 2.3597922325134277, | |
| "learning_rate": 1.7988826815642457e-06, | |
| "loss": 1.0321946144104004, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.13804713804713806, | |
| "grad_norm": 16.570262908935547, | |
| "learning_rate": 1.8212290502793294e-06, | |
| "loss": 0.9637615084648132, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.13973063973063973, | |
| "grad_norm": 8.452648162841797, | |
| "learning_rate": 1.8435754189944133e-06, | |
| "loss": 0.9408825039863586, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1414141414141414, | |
| "grad_norm": 2.8005619049072266, | |
| "learning_rate": 1.865921787709497e-06, | |
| "loss": 1.127833366394043, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.14309764309764308, | |
| "grad_norm": 6.316201686859131, | |
| "learning_rate": 1.8882681564245809e-06, | |
| "loss": 1.0138617753982544, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1447811447811448, | |
| "grad_norm": 14.958882331848145, | |
| "learning_rate": 1.9106145251396648e-06, | |
| "loss": 1.0158287286758423, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.14646464646464646, | |
| "grad_norm": 4.5443267822265625, | |
| "learning_rate": 1.9329608938547484e-06, | |
| "loss": 0.7117235064506531, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 4.039905548095703, | |
| "learning_rate": 1.9553072625698325e-06, | |
| "loss": 1.0871771574020386, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.14983164983164984, | |
| "grad_norm": 3.271326780319214, | |
| "learning_rate": 1.9776536312849162e-06, | |
| "loss": 1.267643690109253, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.15151515151515152, | |
| "grad_norm": 5.037292957305908, | |
| "learning_rate": 2e-06, | |
| "loss": 1.0257434844970703, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1531986531986532, | |
| "grad_norm": 4.92929220199585, | |
| "learning_rate": 1.9999984495606584e-06, | |
| "loss": 1.4013102054595947, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.15488215488215487, | |
| "grad_norm": 23.51206398010254, | |
| "learning_rate": 1.999993798247977e-06, | |
| "loss": 1.0038059949874878, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.15656565656565657, | |
| "grad_norm": 14.101850509643555, | |
| "learning_rate": 1.99998604607798e-06, | |
| "loss": 1.1263923645019531, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.15824915824915825, | |
| "grad_norm": 11.950604438781738, | |
| "learning_rate": 1.9999751930773778e-06, | |
| "loss": 0.9272401332855225, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.15993265993265993, | |
| "grad_norm": 21.03433609008789, | |
| "learning_rate": 1.999961239283563e-06, | |
| "loss": 0.7770416140556335, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.16161616161616163, | |
| "grad_norm": 3.4966766834259033, | |
| "learning_rate": 1.999944184744613e-06, | |
| "loss": 1.348158597946167, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.1632996632996633, | |
| "grad_norm": 3.6538894176483154, | |
| "learning_rate": 1.999924029519287e-06, | |
| "loss": 1.2516090869903564, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.16498316498316498, | |
| "grad_norm": 4.83535623550415, | |
| "learning_rate": 1.9999007736770295e-06, | |
| "loss": 1.072089672088623, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 3.3021559715270996, | |
| "learning_rate": 1.9998744172979654e-06, | |
| "loss": 1.1623098850250244, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.16835016835016836, | |
| "grad_norm": 14.60655689239502, | |
| "learning_rate": 1.9998449604729044e-06, | |
| "loss": 0.8636209964752197, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.17003367003367004, | |
| "grad_norm": 12.559534072875977, | |
| "learning_rate": 1.9998124033033366e-06, | |
| "loss": 0.895442008972168, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.1717171717171717, | |
| "grad_norm": 4.964874744415283, | |
| "learning_rate": 1.9997767459014363e-06, | |
| "loss": 1.0330384969711304, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1734006734006734, | |
| "grad_norm": 3.3170907497406006, | |
| "learning_rate": 1.9997379883900572e-06, | |
| "loss": 0.9942055940628052, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.1750841750841751, | |
| "grad_norm": 4.744529724121094, | |
| "learning_rate": 1.999696130902736e-06, | |
| "loss": 1.2099803686141968, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.17676767676767677, | |
| "grad_norm": 11.850593566894531, | |
| "learning_rate": 1.9996511735836895e-06, | |
| "loss": 0.7535406351089478, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.17845117845117844, | |
| "grad_norm": 16.69972038269043, | |
| "learning_rate": 1.999603116587814e-06, | |
| "loss": 0.9160436987876892, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.18013468013468015, | |
| "grad_norm": 2.5802817344665527, | |
| "learning_rate": 1.9995519600806863e-06, | |
| "loss": 1.3276009559631348, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 9.903021812438965, | |
| "learning_rate": 1.999497704238562e-06, | |
| "loss": 0.8258368372917175, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.1835016835016835, | |
| "grad_norm": 10.159919738769531, | |
| "learning_rate": 1.9994403492483755e-06, | |
| "loss": 0.6640470027923584, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 3.8735828399658203, | |
| "learning_rate": 1.999379895307739e-06, | |
| "loss": 1.3416516780853271, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.18686868686868688, | |
| "grad_norm": 3.4755043983459473, | |
| "learning_rate": 1.999316342624941e-06, | |
| "loss": 0.9075236320495605, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.18855218855218855, | |
| "grad_norm": 5.18587064743042, | |
| "learning_rate": 1.999249691418948e-06, | |
| "loss": 1.193176507949829, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.19023569023569023, | |
| "grad_norm": 6.766015529632568, | |
| "learning_rate": 1.999179941919401e-06, | |
| "loss": 0.9458363056182861, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1919191919191919, | |
| "grad_norm": 12.469842910766602, | |
| "learning_rate": 1.999107094366617e-06, | |
| "loss": 1.1906776428222656, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.1936026936026936, | |
| "grad_norm": 15.036520004272461, | |
| "learning_rate": 1.9990311490115858e-06, | |
| "loss": 1.3650178909301758, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.19528619528619529, | |
| "grad_norm": 5.799370288848877, | |
| "learning_rate": 1.9989521061159715e-06, | |
| "loss": 1.0698531866073608, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.19696969696969696, | |
| "grad_norm": 5.714483737945557, | |
| "learning_rate": 1.9988699659521098e-06, | |
| "loss": 1.1641753911972046, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.19865319865319866, | |
| "grad_norm": 10.119220733642578, | |
| "learning_rate": 1.9987847288030083e-06, | |
| "loss": 0.9833089113235474, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.20033670033670034, | |
| "grad_norm": 3.4788730144500732, | |
| "learning_rate": 1.998696394962345e-06, | |
| "loss": 1.1086716651916504, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.20202020202020202, | |
| "grad_norm": 3.9894561767578125, | |
| "learning_rate": 1.998604964734467e-06, | |
| "loss": 0.9258865118026733, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2037037037037037, | |
| "grad_norm": 4.706192970275879, | |
| "learning_rate": 1.99851043843439e-06, | |
| "loss": 1.1667051315307617, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2053872053872054, | |
| "grad_norm": 15.748969078063965, | |
| "learning_rate": 1.9984128163877964e-06, | |
| "loss": 0.9964404106140137, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.20707070707070707, | |
| "grad_norm": 9.65405559539795, | |
| "learning_rate": 1.998312098931036e-06, | |
| "loss": 0.6644821166992188, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.20875420875420875, | |
| "grad_norm": 13.462628364562988, | |
| "learning_rate": 1.998208286411122e-06, | |
| "loss": 1.2101833820343018, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.21043771043771045, | |
| "grad_norm": 2.0463879108428955, | |
| "learning_rate": 1.9981013791857327e-06, | |
| "loss": 0.9958995580673218, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.21212121212121213, | |
| "grad_norm": 3.3968567848205566, | |
| "learning_rate": 1.997991377623209e-06, | |
| "loss": 0.8969879150390625, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2138047138047138, | |
| "grad_norm": 17.595094680786133, | |
| "learning_rate": 1.9978782821025513e-06, | |
| "loss": 1.0462696552276611, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.21548821548821548, | |
| "grad_norm": 13.578154563903809, | |
| "learning_rate": 1.9977620930134223e-06, | |
| "loss": 1.1988019943237305, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.21717171717171718, | |
| "grad_norm": 4.280734062194824, | |
| "learning_rate": 1.9976428107561415e-06, | |
| "loss": 0.8459457755088806, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.21885521885521886, | |
| "grad_norm": 2.570441246032715, | |
| "learning_rate": 1.997520435741687e-06, | |
| "loss": 1.0279544591903687, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.22053872053872053, | |
| "grad_norm": 6.806192398071289, | |
| "learning_rate": 1.9973949683916927e-06, | |
| "loss": 1.0510814189910889, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 4.318380832672119, | |
| "learning_rate": 1.9972664091384454e-06, | |
| "loss": 1.1062796115875244, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.2239057239057239, | |
| "grad_norm": 3.807039976119995, | |
| "learning_rate": 1.997134758424886e-06, | |
| "loss": 1.1960452795028687, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2255892255892256, | |
| "grad_norm": 6.313713550567627, | |
| "learning_rate": 1.9970000167046075e-06, | |
| "loss": 0.6546218991279602, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 3.2756094932556152, | |
| "learning_rate": 1.996862184441851e-06, | |
| "loss": 0.9819681644439697, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.22895622895622897, | |
| "grad_norm": 13.153508186340332, | |
| "learning_rate": 1.9967212621115065e-06, | |
| "loss": 1.3135335445404053, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.23063973063973064, | |
| "grad_norm": 14.49177074432373, | |
| "learning_rate": 1.996577250199111e-06, | |
| "loss": 1.1486749649047852, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.23232323232323232, | |
| "grad_norm": 26.132858276367188, | |
| "learning_rate": 1.9964301492008464e-06, | |
| "loss": 0.9009004831314087, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.234006734006734, | |
| "grad_norm": 3.963716506958008, | |
| "learning_rate": 1.996279959623537e-06, | |
| "loss": 1.1650899648666382, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.2356902356902357, | |
| "grad_norm": 13.785598754882812, | |
| "learning_rate": 1.9961266819846495e-06, | |
| "loss": 0.9621269702911377, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.23737373737373738, | |
| "grad_norm": 6.935214042663574, | |
| "learning_rate": 1.9959703168122897e-06, | |
| "loss": 0.9427906274795532, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.23905723905723905, | |
| "grad_norm": 3.0722286701202393, | |
| "learning_rate": 1.995810864645202e-06, | |
| "loss": 1.2749511003494263, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.24074074074074073, | |
| "grad_norm": 4.774331092834473, | |
| "learning_rate": 1.995648326032765e-06, | |
| "loss": 0.9315462112426758, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 4.373500823974609, | |
| "learning_rate": 1.9954827015349937e-06, | |
| "loss": 0.8452310562133789, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.2441077441077441, | |
| "grad_norm": 9.997944831848145, | |
| "learning_rate": 1.9953139917225333e-06, | |
| "loss": 1.1583993434906006, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.24579124579124578, | |
| "grad_norm": 9.785924911499023, | |
| "learning_rate": 1.995142197176661e-06, | |
| "loss": 0.6743492484092712, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.2474747474747475, | |
| "grad_norm": 9.52839183807373, | |
| "learning_rate": 1.9949673184892803e-06, | |
| "loss": 1.274944543838501, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.24915824915824916, | |
| "grad_norm": 13.619229316711426, | |
| "learning_rate": 1.9947893562629227e-06, | |
| "loss": 1.085368037223816, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.25084175084175087, | |
| "grad_norm": 6.220252513885498, | |
| "learning_rate": 1.9946083111107425e-06, | |
| "loss": 0.6333813667297363, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.25252525252525254, | |
| "grad_norm": 12.346251487731934, | |
| "learning_rate": 1.9944241836565167e-06, | |
| "loss": 0.7786128520965576, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2542087542087542, | |
| "grad_norm": 10.734468460083008, | |
| "learning_rate": 1.9942369745346417e-06, | |
| "loss": 1.0820167064666748, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.2558922558922559, | |
| "grad_norm": 25.510744094848633, | |
| "learning_rate": 1.9940466843901318e-06, | |
| "loss": 0.9161986112594604, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.25757575757575757, | |
| "grad_norm": 5.673551559448242, | |
| "learning_rate": 1.9938533138786163e-06, | |
| "loss": 1.3526289463043213, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.25925925925925924, | |
| "grad_norm": 11.891182899475098, | |
| "learning_rate": 1.9936568636663383e-06, | |
| "loss": 1.1077102422714233, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.2609427609427609, | |
| "grad_norm": 7.852316856384277, | |
| "learning_rate": 1.9934573344301514e-06, | |
| "loss": 1.0809465646743774, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.26262626262626265, | |
| "grad_norm": 20.96988296508789, | |
| "learning_rate": 1.993254726857518e-06, | |
| "loss": 1.225387454032898, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.26430976430976433, | |
| "grad_norm": 5.888166427612305, | |
| "learning_rate": 1.9930490416465057e-06, | |
| "loss": 1.086962103843689, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.265993265993266, | |
| "grad_norm": 2.8382439613342285, | |
| "learning_rate": 1.992840279505787e-06, | |
| "loss": 1.225638508796692, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.2676767676767677, | |
| "grad_norm": 4.078027725219727, | |
| "learning_rate": 1.9926284411546355e-06, | |
| "loss": 0.99470055103302, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.26936026936026936, | |
| "grad_norm": 14.269658088684082, | |
| "learning_rate": 1.9924135273229235e-06, | |
| "loss": 0.727924108505249, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.27104377104377103, | |
| "grad_norm": 3.730602502822876, | |
| "learning_rate": 1.9921955387511195e-06, | |
| "loss": 0.9582691192626953, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 3.153249979019165, | |
| "learning_rate": 1.991974476190285e-06, | |
| "loss": 1.263975977897644, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.27441077441077444, | |
| "grad_norm": 2.4196362495422363, | |
| "learning_rate": 1.9917503404020747e-06, | |
| "loss": 1.0396244525909424, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.2760942760942761, | |
| "grad_norm": 12.836146354675293, | |
| "learning_rate": 1.9915231321587305e-06, | |
| "loss": 0.8178722262382507, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 5.543509483337402, | |
| "learning_rate": 1.99129285224308e-06, | |
| "loss": 0.9038114547729492, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.27946127946127947, | |
| "grad_norm": 5.564317226409912, | |
| "learning_rate": 1.9910595014485347e-06, | |
| "loss": 1.0971403121948242, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.28114478114478114, | |
| "grad_norm": 5.212599754333496, | |
| "learning_rate": 1.990823080579086e-06, | |
| "loss": 1.0671043395996094, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.2828282828282828, | |
| "grad_norm": 5.401691436767578, | |
| "learning_rate": 1.990583590449303e-06, | |
| "loss": 1.0057094097137451, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.2845117845117845, | |
| "grad_norm": 3.39033579826355, | |
| "learning_rate": 1.990341031884331e-06, | |
| "loss": 1.1939620971679688, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.28619528619528617, | |
| "grad_norm": 12.433296203613281, | |
| "learning_rate": 1.9900954057198856e-06, | |
| "loss": 0.9549685120582581, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2878787878787879, | |
| "grad_norm": 23.119340896606445, | |
| "learning_rate": 1.989846712802252e-06, | |
| "loss": 1.1277296543121338, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.2895622895622896, | |
| "grad_norm": 42.77076721191406, | |
| "learning_rate": 1.9895949539882827e-06, | |
| "loss": 0.8779406547546387, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.29124579124579125, | |
| "grad_norm": 2.3723807334899902, | |
| "learning_rate": 1.9893401301453926e-06, | |
| "loss": 1.1096537113189697, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.29292929292929293, | |
| "grad_norm": 7.652088165283203, | |
| "learning_rate": 1.989082242151556e-06, | |
| "loss": 1.053053379058838, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2946127946127946, | |
| "grad_norm": 8.224458694458008, | |
| "learning_rate": 1.988821290895307e-06, | |
| "loss": 0.7571377754211426, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 3.486557722091675, | |
| "learning_rate": 1.988557277275732e-06, | |
| "loss": 0.5875279903411865, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.29797979797979796, | |
| "grad_norm": 3.368520498275757, | |
| "learning_rate": 1.9882902022024683e-06, | |
| "loss": 1.0230705738067627, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.2996632996632997, | |
| "grad_norm": 7.633305549621582, | |
| "learning_rate": 1.9880200665957026e-06, | |
| "loss": 1.0808613300323486, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.30134680134680136, | |
| "grad_norm": 15.621920585632324, | |
| "learning_rate": 1.9877468713861656e-06, | |
| "loss": 0.9313445687294006, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.30303030303030304, | |
| "grad_norm": 3.499727249145508, | |
| "learning_rate": 1.98747061751513e-06, | |
| "loss": 0.8186299800872803, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3047138047138047, | |
| "grad_norm": 3.564624547958374, | |
| "learning_rate": 1.987191305934406e-06, | |
| "loss": 0.9808353185653687, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3063973063973064, | |
| "grad_norm": 2.6821398735046387, | |
| "learning_rate": 1.98690893760634e-06, | |
| "loss": 1.2293064594268799, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.30808080808080807, | |
| "grad_norm": 8.49547004699707, | |
| "learning_rate": 1.9866235135038095e-06, | |
| "loss": 0.8337675333023071, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.30976430976430974, | |
| "grad_norm": 3.7763280868530273, | |
| "learning_rate": 1.986335034610221e-06, | |
| "loss": 0.8535688519477844, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.3114478114478115, | |
| "grad_norm": 6.456183910369873, | |
| "learning_rate": 1.9860435019195054e-06, | |
| "loss": 1.0865236520767212, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.31313131313131315, | |
| "grad_norm": 6.974287509918213, | |
| "learning_rate": 1.9857489164361147e-06, | |
| "loss": 1.2327494621276855, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.3148148148148148, | |
| "grad_norm": 12.779848098754883, | |
| "learning_rate": 1.9854512791750214e-06, | |
| "loss": 0.6957528591156006, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.3164983164983165, | |
| "grad_norm": 3.759835720062256, | |
| "learning_rate": 1.9851505911617097e-06, | |
| "loss": 0.9909141659736633, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.3181818181818182, | |
| "grad_norm": 7.0778608322143555, | |
| "learning_rate": 1.984846853432177e-06, | |
| "loss": 1.3244696855545044, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.31986531986531985, | |
| "grad_norm": 24.917316436767578, | |
| "learning_rate": 1.9845400670329275e-06, | |
| "loss": 0.7233332991600037, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.32154882154882153, | |
| "grad_norm": 10.7407865524292, | |
| "learning_rate": 1.98423023302097e-06, | |
| "loss": 0.9228682518005371, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.32323232323232326, | |
| "grad_norm": 4.701694011688232, | |
| "learning_rate": 1.9839173524638115e-06, | |
| "loss": 1.1106748580932617, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.32491582491582494, | |
| "grad_norm": 5.9592976570129395, | |
| "learning_rate": 1.9836014264394587e-06, | |
| "loss": 0.7204115390777588, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.3265993265993266, | |
| "grad_norm": 24.467937469482422, | |
| "learning_rate": 1.9832824560364093e-06, | |
| "loss": 0.9101235866546631, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.3282828282828283, | |
| "grad_norm": 55.19502258300781, | |
| "learning_rate": 1.98296044235365e-06, | |
| "loss": 1.0853596925735474, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.32996632996632996, | |
| "grad_norm": 110.57111358642578, | |
| "learning_rate": 1.9826353865006538e-06, | |
| "loss": 0.7398289442062378, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.33164983164983164, | |
| "grad_norm": 6.112462520599365, | |
| "learning_rate": 1.9823072895973748e-06, | |
| "loss": 1.3101907968521118, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 18.562759399414062, | |
| "learning_rate": 1.981976152774245e-06, | |
| "loss": 1.1832518577575684, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.335016835016835, | |
| "grad_norm": 10.324470520019531, | |
| "learning_rate": 1.98164197717217e-06, | |
| "loss": 0.7631848454475403, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.3367003367003367, | |
| "grad_norm": 5.662529468536377, | |
| "learning_rate": 1.9813047639425253e-06, | |
| "loss": 0.9376566410064697, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3383838383838384, | |
| "grad_norm": 17.23822784423828, | |
| "learning_rate": 1.9809645142471528e-06, | |
| "loss": 0.9629780650138855, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.3400673400673401, | |
| "grad_norm": 8.00967025756836, | |
| "learning_rate": 1.980621229258355e-06, | |
| "loss": 1.0150327682495117, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.34175084175084175, | |
| "grad_norm": 4.659936904907227, | |
| "learning_rate": 1.9802749101588942e-06, | |
| "loss": 1.0681769847869873, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.3434343434343434, | |
| "grad_norm": 5.872868061065674, | |
| "learning_rate": 1.9799255581419844e-06, | |
| "loss": 0.9499913454055786, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.3451178451178451, | |
| "grad_norm": 4.6081109046936035, | |
| "learning_rate": 1.9795731744112908e-06, | |
| "loss": 0.5379456877708435, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3468013468013468, | |
| "grad_norm": 4.34984016418457, | |
| "learning_rate": 1.9792177601809234e-06, | |
| "loss": 0.8700510263442993, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.3484848484848485, | |
| "grad_norm": 12.086810111999512, | |
| "learning_rate": 1.9788593166754343e-06, | |
| "loss": 0.8910826444625854, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.3501683501683502, | |
| "grad_norm": 15.385903358459473, | |
| "learning_rate": 1.9784978451298115e-06, | |
| "loss": 1.1716386079788208, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.35185185185185186, | |
| "grad_norm": 7.822863578796387, | |
| "learning_rate": 1.9781333467894773e-06, | |
| "loss": 0.687047004699707, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.35353535353535354, | |
| "grad_norm": 10.231508255004883, | |
| "learning_rate": 1.9777658229102807e-06, | |
| "loss": 0.8759807348251343, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3552188552188552, | |
| "grad_norm": 10.260309219360352, | |
| "learning_rate": 1.9773952747584976e-06, | |
| "loss": 1.1332191228866577, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.3569023569023569, | |
| "grad_norm": 8.660632133483887, | |
| "learning_rate": 1.9770217036108212e-06, | |
| "loss": 0.5898092985153198, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.35858585858585856, | |
| "grad_norm": 24.724945068359375, | |
| "learning_rate": 1.9766451107543614e-06, | |
| "loss": 0.9762297868728638, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.3602693602693603, | |
| "grad_norm": 10.698787689208984, | |
| "learning_rate": 1.9762654974866396e-06, | |
| "loss": 0.7858309149742126, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.36195286195286197, | |
| "grad_norm": 9.971443176269531, | |
| "learning_rate": 1.975882865115583e-06, | |
| "loss": 1.2292566299438477, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 7.011922359466553, | |
| "learning_rate": 1.9754972149595204e-06, | |
| "loss": 0.9748165607452393, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.3653198653198653, | |
| "grad_norm": 12.33168888092041, | |
| "learning_rate": 1.97510854834718e-06, | |
| "loss": 0.8448182940483093, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.367003367003367, | |
| "grad_norm": 2.4483745098114014, | |
| "learning_rate": 1.9747168666176813e-06, | |
| "loss": 1.008624792098999, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.3686868686868687, | |
| "grad_norm": 10.966385841369629, | |
| "learning_rate": 1.9743221711205323e-06, | |
| "loss": 1.0692952871322632, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 2.965273141860962, | |
| "learning_rate": 1.9739244632156256e-06, | |
| "loss": 0.9337837100028992, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3720538720538721, | |
| "grad_norm": 12.18703556060791, | |
| "learning_rate": 1.973523744273232e-06, | |
| "loss": 0.9473227262496948, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.37373737373737376, | |
| "grad_norm": 8.538522720336914, | |
| "learning_rate": 1.973120015673997e-06, | |
| "loss": 0.7716883420944214, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.37542087542087543, | |
| "grad_norm": 11.410622596740723, | |
| "learning_rate": 1.9727132788089354e-06, | |
| "loss": 0.6292431354522705, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.3771043771043771, | |
| "grad_norm": 3.9945926666259766, | |
| "learning_rate": 1.972303535079427e-06, | |
| "loss": 1.1218082904815674, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.3787878787878788, | |
| "grad_norm": 19.375045776367188, | |
| "learning_rate": 1.971890785897211e-06, | |
| "loss": 1.007505178451538, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.38047138047138046, | |
| "grad_norm": 3.713459014892578, | |
| "learning_rate": 1.9714750326843825e-06, | |
| "loss": 0.7216253280639648, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.38215488215488214, | |
| "grad_norm": 6.826941013336182, | |
| "learning_rate": 1.9710562768733857e-06, | |
| "loss": 0.9892054796218872, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.3838383838383838, | |
| "grad_norm": 7.63702392578125, | |
| "learning_rate": 1.9706345199070107e-06, | |
| "loss": 0.7905744314193726, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.38552188552188554, | |
| "grad_norm": 11.529894828796387, | |
| "learning_rate": 1.970209763238388e-06, | |
| "loss": 0.9695171117782593, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.3872053872053872, | |
| "grad_norm": 2.9292163848876953, | |
| "learning_rate": 1.969782008330983e-06, | |
| "loss": 1.1221948862075806, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3888888888888889, | |
| "grad_norm": 4.672982215881348, | |
| "learning_rate": 1.969351256658591e-06, | |
| "loss": 0.8763028979301453, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.39057239057239057, | |
| "grad_norm": 4.81404972076416, | |
| "learning_rate": 1.968917509705333e-06, | |
| "loss": 0.8340336680412292, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.39225589225589225, | |
| "grad_norm": 19.125089645385742, | |
| "learning_rate": 1.9684807689656497e-06, | |
| "loss": 0.9119417071342468, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.3939393939393939, | |
| "grad_norm": 2.594858407974243, | |
| "learning_rate": 1.9680410359442972e-06, | |
| "loss": 0.9458074569702148, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.3956228956228956, | |
| "grad_norm": 3.8974621295928955, | |
| "learning_rate": 1.9675983121563397e-06, | |
| "loss": 0.9553569555282593, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.39730639730639733, | |
| "grad_norm": 6.4163641929626465, | |
| "learning_rate": 1.9671525991271478e-06, | |
| "loss": 0.7942986488342285, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.398989898989899, | |
| "grad_norm": 3.718247890472412, | |
| "learning_rate": 1.9667038983923902e-06, | |
| "loss": 0.9940693378448486, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.4006734006734007, | |
| "grad_norm": 25.65456199645996, | |
| "learning_rate": 1.9662522114980296e-06, | |
| "loss": 0.7515483498573303, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.40235690235690236, | |
| "grad_norm": 3.7314107418060303, | |
| "learning_rate": 1.965797540000318e-06, | |
| "loss": 0.9622472524642944, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 16.488338470458984, | |
| "learning_rate": 1.9653398854657887e-06, | |
| "loss": 1.041235089302063, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4057239057239057, | |
| "grad_norm": 8.276439666748047, | |
| "learning_rate": 1.9648792494712553e-06, | |
| "loss": 1.0389721393585205, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.4074074074074074, | |
| "grad_norm": 10.357524871826172, | |
| "learning_rate": 1.9644156336038024e-06, | |
| "loss": 0.8473480343818665, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.4090909090909091, | |
| "grad_norm": 12.934167861938477, | |
| "learning_rate": 1.9639490394607813e-06, | |
| "loss": 0.8664846420288086, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.4107744107744108, | |
| "grad_norm": 2.63865327835083, | |
| "learning_rate": 1.9634794686498055e-06, | |
| "loss": 1.0735490322113037, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.41245791245791247, | |
| "grad_norm": 9.611379623413086, | |
| "learning_rate": 1.9630069227887444e-06, | |
| "loss": 1.097601294517517, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.41414141414141414, | |
| "grad_norm": 37.54718780517578, | |
| "learning_rate": 1.9625314035057167e-06, | |
| "loss": 1.0461905002593994, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.4158249158249158, | |
| "grad_norm": 5.95384407043457, | |
| "learning_rate": 1.9620529124390863e-06, | |
| "loss": 0.9309274554252625, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.4175084175084175, | |
| "grad_norm": 5.1661763191223145, | |
| "learning_rate": 1.9615714512374567e-06, | |
| "loss": 1.0628364086151123, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.41919191919191917, | |
| "grad_norm": 4.157014846801758, | |
| "learning_rate": 1.9610870215596643e-06, | |
| "loss": 1.0677950382232666, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.4208754208754209, | |
| "grad_norm": 6.916998863220215, | |
| "learning_rate": 1.960599625074773e-06, | |
| "loss": 0.8103325366973877, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4225589225589226, | |
| "grad_norm": 6.891815185546875, | |
| "learning_rate": 1.9601092634620687e-06, | |
| "loss": 0.6272333264350891, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.42424242424242425, | |
| "grad_norm": 9.089258193969727, | |
| "learning_rate": 1.9596159384110535e-06, | |
| "loss": 0.8941874504089355, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.42592592592592593, | |
| "grad_norm": 16.94425392150879, | |
| "learning_rate": 1.95911965162144e-06, | |
| "loss": 0.938546359539032, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.4276094276094276, | |
| "grad_norm": 15.095925331115723, | |
| "learning_rate": 1.958620404803145e-06, | |
| "loss": 1.293353796005249, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.4292929292929293, | |
| "grad_norm": 3.3025577068328857, | |
| "learning_rate": 1.9581181996762834e-06, | |
| "loss": 1.0367740392684937, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.43097643097643096, | |
| "grad_norm": 3.0691745281219482, | |
| "learning_rate": 1.9576130379711634e-06, | |
| "loss": 1.178546667098999, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.43265993265993263, | |
| "grad_norm": 3.2468979358673096, | |
| "learning_rate": 1.95710492142828e-06, | |
| "loss": 1.115210771560669, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.43434343434343436, | |
| "grad_norm": 12.401965141296387, | |
| "learning_rate": 1.956593851798308e-06, | |
| "loss": 1.0290696620941162, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.43602693602693604, | |
| "grad_norm": 8.208135604858398, | |
| "learning_rate": 1.9560798308420974e-06, | |
| "loss": 1.0394536256790161, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.4377104377104377, | |
| "grad_norm": 15.533670425415039, | |
| "learning_rate": 1.955562860330667e-06, | |
| "loss": 0.9136192798614502, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4393939393939394, | |
| "grad_norm": 3.0875625610351562, | |
| "learning_rate": 1.9550429420451973e-06, | |
| "loss": 0.7975887060165405, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.44107744107744107, | |
| "grad_norm": 8.5232572555542, | |
| "learning_rate": 1.954520077777026e-06, | |
| "loss": 1.1077611446380615, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.44276094276094274, | |
| "grad_norm": 11.362956047058105, | |
| "learning_rate": 1.9539942693276405e-06, | |
| "loss": 0.7790743112564087, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 2.6764779090881348, | |
| "learning_rate": 1.9534655185086717e-06, | |
| "loss": 1.1893084049224854, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.44612794612794615, | |
| "grad_norm": 11.054378509521484, | |
| "learning_rate": 1.9529338271418886e-06, | |
| "loss": 0.8206809759140015, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4478114478114478, | |
| "grad_norm": 15.93736743927002, | |
| "learning_rate": 1.952399197059192e-06, | |
| "loss": 0.8338401317596436, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.4494949494949495, | |
| "grad_norm": 5.404129505157471, | |
| "learning_rate": 1.9518616301026077e-06, | |
| "loss": 0.9456153512001038, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.4511784511784512, | |
| "grad_norm": 4.291036128997803, | |
| "learning_rate": 1.9513211281242795e-06, | |
| "loss": 1.2254921197891235, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.45286195286195285, | |
| "grad_norm": 7.2202582359313965, | |
| "learning_rate": 1.9507776929864643e-06, | |
| "loss": 1.092686653137207, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 8.635713577270508, | |
| "learning_rate": 1.950231326561525e-06, | |
| "loss": 0.8675233125686646, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4562289562289562, | |
| "grad_norm": 8.679670333862305, | |
| "learning_rate": 1.9496820307319237e-06, | |
| "loss": 1.0159896612167358, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.45791245791245794, | |
| "grad_norm": 3.453657865524292, | |
| "learning_rate": 1.9491298073902157e-06, | |
| "loss": 1.118143081665039, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.4595959595959596, | |
| "grad_norm": 7.604466438293457, | |
| "learning_rate": 1.9485746584390426e-06, | |
| "loss": 1.1383062601089478, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.4612794612794613, | |
| "grad_norm": 10.454069137573242, | |
| "learning_rate": 1.948016585791127e-06, | |
| "loss": 1.3462685346603394, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.46296296296296297, | |
| "grad_norm": 7.511162757873535, | |
| "learning_rate": 1.9474555913692627e-06, | |
| "loss": 0.8798332214355469, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.46464646464646464, | |
| "grad_norm": 22.986238479614258, | |
| "learning_rate": 1.946891677106312e-06, | |
| "loss": 0.8471826314926147, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.4663299663299663, | |
| "grad_norm": 4.494133949279785, | |
| "learning_rate": 1.946324844945197e-06, | |
| "loss": 1.0384173393249512, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.468013468013468, | |
| "grad_norm": 9.850350379943848, | |
| "learning_rate": 1.9457550968388928e-06, | |
| "loss": 0.7141643166542053, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.4696969696969697, | |
| "grad_norm": 6.887972831726074, | |
| "learning_rate": 1.9451824347504213e-06, | |
| "loss": 1.190050721168518, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.4713804713804714, | |
| "grad_norm": 5.237252235412598, | |
| "learning_rate": 1.944606860652845e-06, | |
| "loss": 0.41058096289634705, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4730639730639731, | |
| "grad_norm": 15.578932762145996, | |
| "learning_rate": 1.944028376529258e-06, | |
| "loss": 0.598914384841919, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.47474747474747475, | |
| "grad_norm": 3.727078437805176, | |
| "learning_rate": 1.943446984372782e-06, | |
| "loss": 1.2833001613616943, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.4764309764309764, | |
| "grad_norm": 8.145559310913086, | |
| "learning_rate": 1.942862686186557e-06, | |
| "loss": 1.1502578258514404, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.4781144781144781, | |
| "grad_norm": 8.36186408996582, | |
| "learning_rate": 1.9422754839837366e-06, | |
| "loss": 0.45712798833847046, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.4797979797979798, | |
| "grad_norm": 32.920475006103516, | |
| "learning_rate": 1.9416853797874797e-06, | |
| "loss": 1.1332796812057495, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.48148148148148145, | |
| "grad_norm": 17.55156135559082, | |
| "learning_rate": 1.941092375630943e-06, | |
| "loss": 0.6961038112640381, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.4831649831649832, | |
| "grad_norm": 4.492574214935303, | |
| "learning_rate": 1.9404964735572754e-06, | |
| "loss": 0.9653905630111694, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 6.348426818847656, | |
| "learning_rate": 1.939897675619611e-06, | |
| "loss": 0.871944785118103, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.48653198653198654, | |
| "grad_norm": 25.369014739990234, | |
| "learning_rate": 1.9392959838810597e-06, | |
| "loss": 1.0709469318389893, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.4882154882154882, | |
| "grad_norm": 10.82548999786377, | |
| "learning_rate": 1.9386914004147034e-06, | |
| "loss": 0.7998636960983276, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4898989898989899, | |
| "grad_norm": 10.758012771606445, | |
| "learning_rate": 1.938083927303586e-06, | |
| "loss": 1.3598113059997559, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.49158249158249157, | |
| "grad_norm": 6.756187915802002, | |
| "learning_rate": 1.937473566640708e-06, | |
| "loss": 0.9948703050613403, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.49326599326599324, | |
| "grad_norm": 2.756861686706543, | |
| "learning_rate": 1.9368603205290196e-06, | |
| "loss": 0.8475466966629028, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.494949494949495, | |
| "grad_norm": 5.148032188415527, | |
| "learning_rate": 1.9362441910814105e-06, | |
| "loss": 0.6347664594650269, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.49663299663299665, | |
| "grad_norm": 2.980475425720215, | |
| "learning_rate": 1.935625180420706e-06, | |
| "loss": 1.1008853912353516, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.4983164983164983, | |
| "grad_norm": 3.5861027240753174, | |
| "learning_rate": 1.935003290679659e-06, | |
| "loss": 1.1105575561523438, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 37.69801712036133, | |
| "learning_rate": 1.934378524000941e-06, | |
| "loss": 0.7997324466705322, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.5016835016835017, | |
| "grad_norm": 10.022683143615723, | |
| "learning_rate": 1.933750882537136e-06, | |
| "loss": 0.9395183324813843, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.5033670033670034, | |
| "grad_norm": 3.6454007625579834, | |
| "learning_rate": 1.9331203684507333e-06, | |
| "loss": 1.2922556400299072, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.5050505050505051, | |
| "grad_norm": 11.494460105895996, | |
| "learning_rate": 1.9324869839141184e-06, | |
| "loss": 0.7769290804862976, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5067340067340067, | |
| "grad_norm": 101.31135559082031, | |
| "learning_rate": 1.9318507311095686e-06, | |
| "loss": 1.0425605773925781, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.5084175084175084, | |
| "grad_norm": 29.326383590698242, | |
| "learning_rate": 1.9312116122292414e-06, | |
| "loss": 1.0084577798843384, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.51010101010101, | |
| "grad_norm": 4.6560163497924805, | |
| "learning_rate": 1.9305696294751707e-06, | |
| "loss": 1.0687224864959717, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.5117845117845118, | |
| "grad_norm": 31.829082489013672, | |
| "learning_rate": 1.9299247850592575e-06, | |
| "loss": 0.5714974999427795, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.5134680134680135, | |
| "grad_norm": 3.3935041427612305, | |
| "learning_rate": 1.9292770812032626e-06, | |
| "loss": 0.9293146133422852, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5151515151515151, | |
| "grad_norm": 35.04014587402344, | |
| "learning_rate": 1.9286265201387966e-06, | |
| "loss": 0.8598051071166992, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.5168350168350169, | |
| "grad_norm": 5.506503105163574, | |
| "learning_rate": 1.9279731041073177e-06, | |
| "loss": 0.7148240804672241, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.5185185185185185, | |
| "grad_norm": 7.014071941375732, | |
| "learning_rate": 1.9273168353601185e-06, | |
| "loss": 1.0927050113677979, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.5202020202020202, | |
| "grad_norm": 11.175944328308105, | |
| "learning_rate": 1.9266577161583207e-06, | |
| "loss": 1.0155811309814453, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.5218855218855218, | |
| "grad_norm": 4.795597076416016, | |
| "learning_rate": 1.925995748772868e-06, | |
| "loss": 0.9794735312461853, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5235690235690236, | |
| "grad_norm": 24.483413696289062, | |
| "learning_rate": 1.925330935484516e-06, | |
| "loss": 1.045680284500122, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.5252525252525253, | |
| "grad_norm": 2.9763712882995605, | |
| "learning_rate": 1.9246632785838263e-06, | |
| "loss": 0.7627449631690979, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.5269360269360269, | |
| "grad_norm": 19.479745864868164, | |
| "learning_rate": 1.9239927803711578e-06, | |
| "loss": 0.945065975189209, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.5286195286195287, | |
| "grad_norm": 2.6288349628448486, | |
| "learning_rate": 1.923319443156659e-06, | |
| "loss": 0.839026153087616, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.5303030303030303, | |
| "grad_norm": 14.550789833068848, | |
| "learning_rate": 1.92264326926026e-06, | |
| "loss": 0.7562347054481506, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.531986531986532, | |
| "grad_norm": 7.969823360443115, | |
| "learning_rate": 1.9219642610116647e-06, | |
| "loss": 1.1040418148040771, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.5336700336700336, | |
| "grad_norm": 9.72048568725586, | |
| "learning_rate": 1.9212824207503415e-06, | |
| "loss": 0.9238873720169067, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.5353535353535354, | |
| "grad_norm": 4.213377475738525, | |
| "learning_rate": 1.920597750825517e-06, | |
| "loss": 0.8101857900619507, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.5370370370370371, | |
| "grad_norm": 13.104752540588379, | |
| "learning_rate": 1.919910253596168e-06, | |
| "loss": 0.9694643020629883, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.5387205387205387, | |
| "grad_norm": 10.729632377624512, | |
| "learning_rate": 1.919219931431011e-06, | |
| "loss": 0.8188080191612244, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5404040404040404, | |
| "grad_norm": 4.642938613891602, | |
| "learning_rate": 1.918526786708497e-06, | |
| "loss": 0.944012463092804, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.5420875420875421, | |
| "grad_norm": 4.087347984313965, | |
| "learning_rate": 1.9178308218168e-06, | |
| "loss": 0.8914910554885864, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.5437710437710438, | |
| "grad_norm": 3.8000528812408447, | |
| "learning_rate": 1.9171320391538132e-06, | |
| "loss": 0.893518328666687, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 9.262425422668457, | |
| "learning_rate": 1.9164304411271364e-06, | |
| "loss": 0.984040379524231, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.5471380471380471, | |
| "grad_norm": 10.015108108520508, | |
| "learning_rate": 1.9157260301540697e-06, | |
| "loss": 1.140836477279663, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5488215488215489, | |
| "grad_norm": 160.21282958984375, | |
| "learning_rate": 1.9150188086616055e-06, | |
| "loss": 1.0449649095535278, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.5505050505050505, | |
| "grad_norm": 4.650694847106934, | |
| "learning_rate": 1.91430877908642e-06, | |
| "loss": 1.0726298093795776, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.5521885521885522, | |
| "grad_norm": 11.116467475891113, | |
| "learning_rate": 1.9135959438748626e-06, | |
| "loss": 0.9272226095199585, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.5538720538720538, | |
| "grad_norm": 7.265547752380371, | |
| "learning_rate": 1.9128803054829515e-06, | |
| "loss": 0.7893900871276855, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 35.09156799316406, | |
| "learning_rate": 1.912161866376362e-06, | |
| "loss": 0.7798557281494141, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5572390572390572, | |
| "grad_norm": 3.4387574195861816, | |
| "learning_rate": 1.9114406290304186e-06, | |
| "loss": 1.0308525562286377, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.5589225589225589, | |
| "grad_norm": 3.3560092449188232, | |
| "learning_rate": 1.910716595930088e-06, | |
| "loss": 1.0922589302062988, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.5606060606060606, | |
| "grad_norm": 12.50266170501709, | |
| "learning_rate": 1.9099897695699684e-06, | |
| "loss": 0.4920412600040436, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.5622895622895623, | |
| "grad_norm": 5.19976282119751, | |
| "learning_rate": 1.9092601524542828e-06, | |
| "loss": 0.6655771136283875, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.563973063973064, | |
| "grad_norm": 17.65725326538086, | |
| "learning_rate": 1.9085277470968692e-06, | |
| "loss": 1.0704545974731445, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5656565656565656, | |
| "grad_norm": 13.295573234558105, | |
| "learning_rate": 1.907792556021171e-06, | |
| "loss": 0.5930483341217041, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.5673400673400674, | |
| "grad_norm": 5.582085609436035, | |
| "learning_rate": 1.9070545817602328e-06, | |
| "loss": 0.5818225145339966, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.569023569023569, | |
| "grad_norm": 7.926098823547363, | |
| "learning_rate": 1.9063138268566851e-06, | |
| "loss": 0.6757692098617554, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.5707070707070707, | |
| "grad_norm": 9.610929489135742, | |
| "learning_rate": 1.9055702938627407e-06, | |
| "loss": 1.3059725761413574, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.5723905723905723, | |
| "grad_norm": 14.765951156616211, | |
| "learning_rate": 1.9048239853401833e-06, | |
| "loss": 0.42610985040664673, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5740740740740741, | |
| "grad_norm": 6.197120189666748, | |
| "learning_rate": 1.9040749038603602e-06, | |
| "loss": 1.0255128145217896, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.5757575757575758, | |
| "grad_norm": 6.4059038162231445, | |
| "learning_rate": 1.9033230520041719e-06, | |
| "loss": 1.1382319927215576, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.5774410774410774, | |
| "grad_norm": 6.532130241394043, | |
| "learning_rate": 1.9025684323620645e-06, | |
| "loss": 1.1159263849258423, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.5791245791245792, | |
| "grad_norm": 2.4945201873779297, | |
| "learning_rate": 1.9018110475340203e-06, | |
| "loss": 0.8307312726974487, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.5808080808080808, | |
| "grad_norm": 20.23617935180664, | |
| "learning_rate": 1.9010509001295485e-06, | |
| "loss": 0.7440475821495056, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5824915824915825, | |
| "grad_norm": 4.1981072425842285, | |
| "learning_rate": 1.9002879927676767e-06, | |
| "loss": 0.8382600545883179, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.5841750841750841, | |
| "grad_norm": 2.907876491546631, | |
| "learning_rate": 1.8995223280769424e-06, | |
| "loss": 0.9814774990081787, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.5858585858585859, | |
| "grad_norm": 5.83011531829834, | |
| "learning_rate": 1.8987539086953819e-06, | |
| "loss": 0.8996963500976562, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.5875420875420876, | |
| "grad_norm": 8.185150146484375, | |
| "learning_rate": 1.8979827372705233e-06, | |
| "loss": 0.8781136274337769, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.5892255892255892, | |
| "grad_norm": 9.394926071166992, | |
| "learning_rate": 1.8972088164593771e-06, | |
| "loss": 0.8234498500823975, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5909090909090909, | |
| "grad_norm": 15.942888259887695, | |
| "learning_rate": 1.896432148928426e-06, | |
| "loss": 0.9446474313735962, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 4.5268330574035645, | |
| "learning_rate": 1.895652737353616e-06, | |
| "loss": 1.0645607709884644, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.5942760942760943, | |
| "grad_norm": 6.5960612297058105, | |
| "learning_rate": 1.8948705844203482e-06, | |
| "loss": 0.9992242455482483, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.5959595959595959, | |
| "grad_norm": 25.13721466064453, | |
| "learning_rate": 1.8940856928234689e-06, | |
| "loss": 0.746535062789917, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.5976430976430976, | |
| "grad_norm": 6.828306674957275, | |
| "learning_rate": 1.8932980652672597e-06, | |
| "loss": 0.8305199146270752, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5993265993265994, | |
| "grad_norm": 5.863089561462402, | |
| "learning_rate": 1.8925077044654288e-06, | |
| "loss": 1.1452956199645996, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.601010101010101, | |
| "grad_norm": 3.158170700073242, | |
| "learning_rate": 1.8917146131411015e-06, | |
| "loss": 1.0598926544189453, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.6026936026936027, | |
| "grad_norm": 6.218857288360596, | |
| "learning_rate": 1.8909187940268115e-06, | |
| "loss": 0.7409163117408752, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.6043771043771043, | |
| "grad_norm": 6.748631000518799, | |
| "learning_rate": 1.89012024986449e-06, | |
| "loss": 0.9013140201568604, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 4.563135623931885, | |
| "learning_rate": 1.8893189834054586e-06, | |
| "loss": 0.9499297738075256, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6077441077441077, | |
| "grad_norm": 12.914100646972656, | |
| "learning_rate": 1.8885149974104164e-06, | |
| "loss": 0.9684711694717407, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.6094276094276094, | |
| "grad_norm": 16.68248748779297, | |
| "learning_rate": 1.8877082946494339e-06, | |
| "loss": 0.8916200995445251, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.6111111111111112, | |
| "grad_norm": 31.8973388671875, | |
| "learning_rate": 1.8868988779019414e-06, | |
| "loss": 0.9836832284927368, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.6127946127946128, | |
| "grad_norm": 38.546356201171875, | |
| "learning_rate": 1.8860867499567203e-06, | |
| "loss": 0.8979325294494629, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.6144781144781145, | |
| "grad_norm": 3.1298513412475586, | |
| "learning_rate": 1.885271913611893e-06, | |
| "loss": 1.1511611938476562, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6161616161616161, | |
| "grad_norm": 4.0303263664245605, | |
| "learning_rate": 1.8844543716749134e-06, | |
| "loss": 1.0997979640960693, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.6178451178451179, | |
| "grad_norm": 4.650604724884033, | |
| "learning_rate": 1.8836341269625578e-06, | |
| "loss": 0.7802401781082153, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.6195286195286195, | |
| "grad_norm": 8.960386276245117, | |
| "learning_rate": 1.882811182300914e-06, | |
| "loss": 0.8063424229621887, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.6212121212121212, | |
| "grad_norm": 20.323410034179688, | |
| "learning_rate": 1.881985540525373e-06, | |
| "loss": 0.689705491065979, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.622895622895623, | |
| "grad_norm": 4.956573963165283, | |
| "learning_rate": 1.8811572044806178e-06, | |
| "loss": 1.2354564666748047, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6245791245791246, | |
| "grad_norm": 4.285037040710449, | |
| "learning_rate": 1.8803261770206149e-06, | |
| "loss": 1.0013043880462646, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.6262626262626263, | |
| "grad_norm": 2.563471794128418, | |
| "learning_rate": 1.8794924610086031e-06, | |
| "loss": 1.2029197216033936, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.6279461279461279, | |
| "grad_norm": 2.987870216369629, | |
| "learning_rate": 1.8786560593170854e-06, | |
| "loss": 0.9561195969581604, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.6296296296296297, | |
| "grad_norm": 3.021315336227417, | |
| "learning_rate": 1.877816974827817e-06, | |
| "loss": 1.202516794204712, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.6313131313131313, | |
| "grad_norm": 3.505037307739258, | |
| "learning_rate": 1.8769752104317973e-06, | |
| "loss": 1.2894848585128784, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.632996632996633, | |
| "grad_norm": 8.464410781860352, | |
| "learning_rate": 1.8761307690292589e-06, | |
| "loss": 0.7271798849105835, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.6346801346801347, | |
| "grad_norm": 26.4637508392334, | |
| "learning_rate": 1.875283653529658e-06, | |
| "loss": 0.9941682815551758, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 2.6587889194488525, | |
| "learning_rate": 1.874433866851663e-06, | |
| "loss": 0.7514116168022156, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.6380471380471381, | |
| "grad_norm": 10.891627311706543, | |
| "learning_rate": 1.8735814119231475e-06, | |
| "loss": 0.8671576976776123, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.6397306397306397, | |
| "grad_norm": 25.072734832763672, | |
| "learning_rate": 1.872726291681177e-06, | |
| "loss": 0.6143717169761658, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6414141414141414, | |
| "grad_norm": 4.057854175567627, | |
| "learning_rate": 1.8718685090720004e-06, | |
| "loss": 0.46186384558677673, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.6430976430976431, | |
| "grad_norm": 10.258670806884766, | |
| "learning_rate": 1.8710080670510402e-06, | |
| "loss": 1.0092180967330933, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.6447811447811448, | |
| "grad_norm": 4.200110912322998, | |
| "learning_rate": 1.8701449685828806e-06, | |
| "loss": 1.0899416208267212, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.6464646464646465, | |
| "grad_norm": 10.581267356872559, | |
| "learning_rate": 1.8692792166412595e-06, | |
| "loss": 0.7667125463485718, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.6481481481481481, | |
| "grad_norm": 5.673297882080078, | |
| "learning_rate": 1.8684108142090562e-06, | |
| "loss": 0.7934967279434204, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6498316498316499, | |
| "grad_norm": 3.9210774898529053, | |
| "learning_rate": 1.8675397642782827e-06, | |
| "loss": 0.7912408113479614, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.6515151515151515, | |
| "grad_norm": 12.99809455871582, | |
| "learning_rate": 1.8666660698500726e-06, | |
| "loss": 0.6966930627822876, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.6531986531986532, | |
| "grad_norm": 2.608152389526367, | |
| "learning_rate": 1.8657897339346707e-06, | |
| "loss": 0.9161090850830078, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.6548821548821548, | |
| "grad_norm": 4.8470282554626465, | |
| "learning_rate": 1.8649107595514226e-06, | |
| "loss": 1.050070881843567, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.6565656565656566, | |
| "grad_norm": 38.622154235839844, | |
| "learning_rate": 1.8640291497287654e-06, | |
| "loss": 0.948337197303772, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6582491582491582, | |
| "grad_norm": 19.695106506347656, | |
| "learning_rate": 1.8631449075042156e-06, | |
| "loss": 1.065544605255127, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.6599326599326599, | |
| "grad_norm": 6.196758270263672, | |
| "learning_rate": 1.8622580359243601e-06, | |
| "loss": 0.9903167486190796, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.6616161616161617, | |
| "grad_norm": 11.652655601501465, | |
| "learning_rate": 1.8613685380448441e-06, | |
| "loss": 1.0705502033233643, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.6632996632996633, | |
| "grad_norm": 21.967121124267578, | |
| "learning_rate": 1.8604764169303626e-06, | |
| "loss": 0.8703781366348267, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.664983164983165, | |
| "grad_norm": 2.8076608180999756, | |
| "learning_rate": 1.8595816756546477e-06, | |
| "loss": 0.9413682222366333, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 12.699344635009766, | |
| "learning_rate": 1.8586843173004598e-06, | |
| "loss": 0.9941300749778748, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.6683501683501684, | |
| "grad_norm": 2.5356881618499756, | |
| "learning_rate": 1.8577843449595763e-06, | |
| "loss": 0.6315573453903198, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.67003367003367, | |
| "grad_norm": 3.684738874435425, | |
| "learning_rate": 1.85688176173278e-06, | |
| "loss": 0.9797836542129517, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.6717171717171717, | |
| "grad_norm": 4.553958415985107, | |
| "learning_rate": 1.8559765707298502e-06, | |
| "loss": 1.0133525133132935, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.6734006734006734, | |
| "grad_norm": 5.8083367347717285, | |
| "learning_rate": 1.8550687750695509e-06, | |
| "loss": 0.635034441947937, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6750841750841751, | |
| "grad_norm": 2.6168251037597656, | |
| "learning_rate": 1.8541583778796196e-06, | |
| "loss": 0.9916131496429443, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.6767676767676768, | |
| "grad_norm": 10.899927139282227, | |
| "learning_rate": 1.8532453822967584e-06, | |
| "loss": 0.7682900428771973, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.6784511784511784, | |
| "grad_norm": 11.195059776306152, | |
| "learning_rate": 1.8523297914666207e-06, | |
| "loss": 0.6411112546920776, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.6801346801346801, | |
| "grad_norm": 8.76089859008789, | |
| "learning_rate": 1.8514116085438027e-06, | |
| "loss": 1.0669599771499634, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 2.9080264568328857, | |
| "learning_rate": 1.8504908366918302e-06, | |
| "loss": 0.9828901886940002, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6835016835016835, | |
| "grad_norm": 4.848678112030029, | |
| "learning_rate": 1.84956747908315e-06, | |
| "loss": 1.1542444229125977, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.6851851851851852, | |
| "grad_norm": 6.960413932800293, | |
| "learning_rate": 1.8486415388991173e-06, | |
| "loss": 0.5982141494750977, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.6868686868686869, | |
| "grad_norm": 2.6384944915771484, | |
| "learning_rate": 1.8477130193299863e-06, | |
| "loss": 1.1131889820098877, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.6885521885521886, | |
| "grad_norm": 9.800881385803223, | |
| "learning_rate": 1.846781923574897e-06, | |
| "loss": 0.7944687604904175, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.6902356902356902, | |
| "grad_norm": 40.63787078857422, | |
| "learning_rate": 1.8458482548418661e-06, | |
| "loss": 0.7440886497497559, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6919191919191919, | |
| "grad_norm": 3.366387367248535, | |
| "learning_rate": 1.8449120163477753e-06, | |
| "loss": 0.7828149199485779, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.6936026936026936, | |
| "grad_norm": 4.786665916442871, | |
| "learning_rate": 1.8439732113183607e-06, | |
| "loss": 0.8565751314163208, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.6952861952861953, | |
| "grad_norm": 9.01762866973877, | |
| "learning_rate": 1.8430318429881997e-06, | |
| "loss": 0.8942912817001343, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.696969696969697, | |
| "grad_norm": 2.231179714202881, | |
| "learning_rate": 1.8420879146007025e-06, | |
| "loss": 0.8027513027191162, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.6986531986531986, | |
| "grad_norm": 3.190427541732788, | |
| "learning_rate": 1.8411414294081003e-06, | |
| "loss": 1.2244315147399902, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7003367003367004, | |
| "grad_norm": 8.976424217224121, | |
| "learning_rate": 1.8401923906714321e-06, | |
| "loss": 0.8990939855575562, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.702020202020202, | |
| "grad_norm": 11.49886703491211, | |
| "learning_rate": 1.8392408016605358e-06, | |
| "loss": 0.6986100673675537, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.7037037037037037, | |
| "grad_norm": 10.203569412231445, | |
| "learning_rate": 1.8382866656540361e-06, | |
| "loss": 0.8804981708526611, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.7053872053872053, | |
| "grad_norm": 6.145118713378906, | |
| "learning_rate": 1.8373299859393326e-06, | |
| "loss": 0.5913242697715759, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.7070707070707071, | |
| "grad_norm": 4.84503698348999, | |
| "learning_rate": 1.8363707658125905e-06, | |
| "loss": 1.2492575645446777, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7087542087542088, | |
| "grad_norm": 6.014354228973389, | |
| "learning_rate": 1.8354090085787252e-06, | |
| "loss": 1.122812271118164, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.7104377104377104, | |
| "grad_norm": 10.91385269165039, | |
| "learning_rate": 1.8344447175513965e-06, | |
| "loss": 1.0250314474105835, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.7121212121212122, | |
| "grad_norm": 5.709978103637695, | |
| "learning_rate": 1.8334778960529916e-06, | |
| "loss": 0.8772053718566895, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.7138047138047138, | |
| "grad_norm": 25.334754943847656, | |
| "learning_rate": 1.8325085474146178e-06, | |
| "loss": 0.7974849939346313, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.7154882154882155, | |
| "grad_norm": 30.209260940551758, | |
| "learning_rate": 1.8315366749760892e-06, | |
| "loss": 0.9543988704681396, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7171717171717171, | |
| "grad_norm": 3.697704315185547, | |
| "learning_rate": 1.8305622820859153e-06, | |
| "loss": 0.7927026748657227, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.7188552188552189, | |
| "grad_norm": 10.00793743133545, | |
| "learning_rate": 1.829585372101289e-06, | |
| "loss": 0.78277987241745, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.7205387205387206, | |
| "grad_norm": 7.505032539367676, | |
| "learning_rate": 1.828605948388077e-06, | |
| "loss": 1.1311378479003906, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.7222222222222222, | |
| "grad_norm": 4.7181572914123535, | |
| "learning_rate": 1.8276240143208054e-06, | |
| "loss": 0.7503079175949097, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.7239057239057239, | |
| "grad_norm": 3.523047924041748, | |
| "learning_rate": 1.8266395732826508e-06, | |
| "loss": 0.9047625064849854, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7255892255892256, | |
| "grad_norm": 2.024121046066284, | |
| "learning_rate": 1.8256526286654264e-06, | |
| "loss": 1.1868062019348145, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 14.294280052185059, | |
| "learning_rate": 1.824663183869572e-06, | |
| "loss": 1.0042986869812012, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.7289562289562289, | |
| "grad_norm": 17.085304260253906, | |
| "learning_rate": 1.8236712423041408e-06, | |
| "loss": 0.9877347946166992, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.7306397306397306, | |
| "grad_norm": 6.132120609283447, | |
| "learning_rate": 1.822676807386789e-06, | |
| "loss": 1.2511956691741943, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.7323232323232324, | |
| "grad_norm": 5.884708881378174, | |
| "learning_rate": 1.8216798825437635e-06, | |
| "loss": 1.1776090860366821, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.734006734006734, | |
| "grad_norm": 5.7460737228393555, | |
| "learning_rate": 1.8206804712098903e-06, | |
| "loss": 1.0924787521362305, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.7356902356902357, | |
| "grad_norm": 2.724154233932495, | |
| "learning_rate": 1.819678576828561e-06, | |
| "loss": 1.0940457582473755, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.7373737373737373, | |
| "grad_norm": 21.470823287963867, | |
| "learning_rate": 1.8186742028517237e-06, | |
| "loss": 0.8332981467247009, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.7390572390572391, | |
| "grad_norm": 7.482705116271973, | |
| "learning_rate": 1.8176673527398694e-06, | |
| "loss": 0.6369479894638062, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 15.344402313232422, | |
| "learning_rate": 1.8166580299620202e-06, | |
| "loss": 0.612411618232727, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.7424242424242424, | |
| "grad_norm": 2.4508793354034424, | |
| "learning_rate": 1.815646237995718e-06, | |
| "loss": 1.1662663221359253, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.7441077441077442, | |
| "grad_norm": 3.4642128944396973, | |
| "learning_rate": 1.814631980327012e-06, | |
| "loss": 1.1108534336090088, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.7457912457912458, | |
| "grad_norm": 2.681384801864624, | |
| "learning_rate": 1.813615260450446e-06, | |
| "loss": 0.6596791744232178, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.7474747474747475, | |
| "grad_norm": 1.7828519344329834, | |
| "learning_rate": 1.8125960818690485e-06, | |
| "loss": 1.0084741115570068, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.7491582491582491, | |
| "grad_norm": 34.723270416259766, | |
| "learning_rate": 1.811574448094318e-06, | |
| "loss": 0.9112769961357117, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7508417508417509, | |
| "grad_norm": 10.580464363098145, | |
| "learning_rate": 1.8105503626462129e-06, | |
| "loss": 0.9600024819374084, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.7525252525252525, | |
| "grad_norm": 17.393407821655273, | |
| "learning_rate": 1.8095238290531385e-06, | |
| "loss": 0.7573001384735107, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.7542087542087542, | |
| "grad_norm": 8.820290565490723, | |
| "learning_rate": 1.8084948508519346e-06, | |
| "loss": 0.8571316003799438, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.7558922558922558, | |
| "grad_norm": 15.848811149597168, | |
| "learning_rate": 1.8074634315878644e-06, | |
| "loss": 0.6229598522186279, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.7575757575757576, | |
| "grad_norm": 5.893372058868408, | |
| "learning_rate": 1.8064295748146014e-06, | |
| "loss": 0.8924508094787598, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7592592592592593, | |
| "grad_norm": 21.465091705322266, | |
| "learning_rate": 1.8053932840942175e-06, | |
| "loss": 0.6515762209892273, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.7609427609427609, | |
| "grad_norm": 3.3033552169799805, | |
| "learning_rate": 1.8043545629971689e-06, | |
| "loss": 1.2100439071655273, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.7626262626262627, | |
| "grad_norm": 3.6212236881256104, | |
| "learning_rate": 1.8033134151022881e-06, | |
| "loss": 0.9367895126342773, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.7643097643097643, | |
| "grad_norm": 11.270123481750488, | |
| "learning_rate": 1.8022698439967673e-06, | |
| "loss": 0.9181069731712341, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.765993265993266, | |
| "grad_norm": 4.863030433654785, | |
| "learning_rate": 1.8012238532761476e-06, | |
| "loss": 0.8502522110939026, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7676767676767676, | |
| "grad_norm": 7.718131065368652, | |
| "learning_rate": 1.8001754465443078e-06, | |
| "loss": 0.9918288588523865, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.7693602693602694, | |
| "grad_norm": 10.74516773223877, | |
| "learning_rate": 1.79912462741345e-06, | |
| "loss": 0.8540866374969482, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.7710437710437711, | |
| "grad_norm": 6.144227027893066, | |
| "learning_rate": 1.798071399504088e-06, | |
| "loss": 0.9551119804382324, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.7727272727272727, | |
| "grad_norm": 3.8601930141448975, | |
| "learning_rate": 1.7970157664450357e-06, | |
| "loss": 0.6338967084884644, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.7744107744107744, | |
| "grad_norm": 11.050410270690918, | |
| "learning_rate": 1.7959577318733925e-06, | |
| "loss": 0.5116314888000488, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7760942760942761, | |
| "grad_norm": 4.513789176940918, | |
| "learning_rate": 1.7948972994345328e-06, | |
| "loss": 0.6171036958694458, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 8.82806396484375, | |
| "learning_rate": 1.7938344727820928e-06, | |
| "loss": 0.9206382632255554, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.7794612794612794, | |
| "grad_norm": 4.373292446136475, | |
| "learning_rate": 1.7927692555779577e-06, | |
| "loss": 1.1664514541625977, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.7811447811447811, | |
| "grad_norm": 3.1802244186401367, | |
| "learning_rate": 1.791701651492248e-06, | |
| "loss": 0.48759081959724426, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.7828282828282829, | |
| "grad_norm": 6.313639163970947, | |
| "learning_rate": 1.7906316642033099e-06, | |
| "loss": 1.3327703475952148, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7845117845117845, | |
| "grad_norm": 22.747098922729492, | |
| "learning_rate": 1.7895592973976998e-06, | |
| "loss": 0.8829092383384705, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.7861952861952862, | |
| "grad_norm": 3.2088170051574707, | |
| "learning_rate": 1.7884845547701721e-06, | |
| "loss": 1.0014090538024902, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.7878787878787878, | |
| "grad_norm": 12.781431198120117, | |
| "learning_rate": 1.7874074400236677e-06, | |
| "loss": 0.8620262145996094, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.7895622895622896, | |
| "grad_norm": 2.6499383449554443, | |
| "learning_rate": 1.7863279568692999e-06, | |
| "loss": 0.8909909725189209, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.7912457912457912, | |
| "grad_norm": 2.3473894596099854, | |
| "learning_rate": 1.7852461090263422e-06, | |
| "loss": 1.0048516988754272, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7929292929292929, | |
| "grad_norm": 16.40445327758789, | |
| "learning_rate": 1.7841619002222164e-06, | |
| "loss": 0.3737819790840149, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.7946127946127947, | |
| "grad_norm": 3.327476978302002, | |
| "learning_rate": 1.7830753341924768e-06, | |
| "loss": 0.9010682106018066, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.7962962962962963, | |
| "grad_norm": 2.6396255493164062, | |
| "learning_rate": 1.781986414680802e-06, | |
| "loss": 0.925070583820343, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.797979797979798, | |
| "grad_norm": 3.3719475269317627, | |
| "learning_rate": 1.7808951454389761e-06, | |
| "loss": 1.036871075630188, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.7996632996632996, | |
| "grad_norm": 5.47444486618042, | |
| "learning_rate": 1.7798015302268826e-06, | |
| "loss": 0.8623565435409546, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8013468013468014, | |
| "grad_norm": 11.89119815826416, | |
| "learning_rate": 1.7787055728124853e-06, | |
| "loss": 0.4426053762435913, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.803030303030303, | |
| "grad_norm": 3.086700916290283, | |
| "learning_rate": 1.777607276971818e-06, | |
| "loss": 0.9516481161117554, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.8047138047138047, | |
| "grad_norm": 11.045938491821289, | |
| "learning_rate": 1.7765066464889729e-06, | |
| "loss": 0.9658932685852051, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.8063973063973064, | |
| "grad_norm": 10.93420696258545, | |
| "learning_rate": 1.775403685156085e-06, | |
| "loss": 1.1045958995819092, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 2.5317461490631104, | |
| "learning_rate": 1.77429839677332e-06, | |
| "loss": 0.673387348651886, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8097643097643098, | |
| "grad_norm": 4.62790584564209, | |
| "learning_rate": 1.773190785148861e-06, | |
| "loss": 0.771082878112793, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.8114478114478114, | |
| "grad_norm": 6.418295860290527, | |
| "learning_rate": 1.7720808540988965e-06, | |
| "loss": 0.6905859112739563, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.8131313131313131, | |
| "grad_norm": 2.9778709411621094, | |
| "learning_rate": 1.770968607447606e-06, | |
| "loss": 0.9952410459518433, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.8148148148148148, | |
| "grad_norm": 17.664697647094727, | |
| "learning_rate": 1.7698540490271475e-06, | |
| "loss": 1.1883214712142944, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.8164983164983165, | |
| "grad_norm": 3.8164806365966797, | |
| "learning_rate": 1.7687371826776432e-06, | |
| "loss": 0.9806801080703735, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 10.780609130859375, | |
| "learning_rate": 1.7676180122471677e-06, | |
| "loss": 0.9630722403526306, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.8198653198653199, | |
| "grad_norm": 6.188197612762451, | |
| "learning_rate": 1.7664965415917342e-06, | |
| "loss": 0.7298092842102051, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.8215488215488216, | |
| "grad_norm": 4.687350749969482, | |
| "learning_rate": 1.765372774575281e-06, | |
| "loss": 0.9373712539672852, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.8232323232323232, | |
| "grad_norm": 5.430413722991943, | |
| "learning_rate": 1.764246715069658e-06, | |
| "loss": 1.1954350471496582, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.8249158249158249, | |
| "grad_norm": 3.7986605167388916, | |
| "learning_rate": 1.7631183669546146e-06, | |
| "loss": 1.161393404006958, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.8265993265993266, | |
| "grad_norm": 4.60081672668457, | |
| "learning_rate": 1.761987734117784e-06, | |
| "loss": 1.046337366104126, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.8282828282828283, | |
| "grad_norm": 3.7046844959259033, | |
| "learning_rate": 1.7608548204546724e-06, | |
| "loss": 1.0424065589904785, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.82996632996633, | |
| "grad_norm": 19.03668212890625, | |
| "learning_rate": 1.7597196298686446e-06, | |
| "loss": 0.9536873698234558, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.8316498316498316, | |
| "grad_norm": 32.48857498168945, | |
| "learning_rate": 1.7585821662709088e-06, | |
| "loss": 0.8443811535835266, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 11.665223121643066, | |
| "learning_rate": 1.7574424335805066e-06, | |
| "loss": 0.8324294686317444, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.835016835016835, | |
| "grad_norm": 21.848285675048828, | |
| "learning_rate": 1.7563004357242962e-06, | |
| "loss": 0.6908457279205322, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.8367003367003367, | |
| "grad_norm": 2.1612720489501953, | |
| "learning_rate": 1.755156176636941e-06, | |
| "loss": 0.9239605069160461, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.8383838383838383, | |
| "grad_norm": 4.865361213684082, | |
| "learning_rate": 1.7540096602608946e-06, | |
| "loss": 0.6591212153434753, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.8400673400673401, | |
| "grad_norm": 3.861494779586792, | |
| "learning_rate": 1.7528608905463881e-06, | |
| "loss": 0.9056419134140015, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.8417508417508418, | |
| "grad_norm": 2.9562947750091553, | |
| "learning_rate": 1.7517098714514175e-06, | |
| "loss": 1.0812749862670898, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8434343434343434, | |
| "grad_norm": 3.0346264839172363, | |
| "learning_rate": 1.7505566069417272e-06, | |
| "loss": 0.7617006301879883, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.8451178451178452, | |
| "grad_norm": 3.785036325454712, | |
| "learning_rate": 1.749401100990799e-06, | |
| "loss": 0.6745568513870239, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.8468013468013468, | |
| "grad_norm": 5.557058334350586, | |
| "learning_rate": 1.748243357579837e-06, | |
| "loss": 1.0811188220977783, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.8484848484848485, | |
| "grad_norm": 1.9689534902572632, | |
| "learning_rate": 1.747083380697754e-06, | |
| "loss": 0.5900795459747314, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.8501683501683501, | |
| "grad_norm": 14.491848945617676, | |
| "learning_rate": 1.7459211743411589e-06, | |
| "loss": 0.9504165649414062, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.8518518518518519, | |
| "grad_norm": 21.8311767578125, | |
| "learning_rate": 1.7447567425143413e-06, | |
| "loss": 0.8922120928764343, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.8535353535353535, | |
| "grad_norm": 13.790666580200195, | |
| "learning_rate": 1.7435900892292593e-06, | |
| "loss": 0.7710224390029907, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.8552188552188552, | |
| "grad_norm": 20.326784133911133, | |
| "learning_rate": 1.7424212185055236e-06, | |
| "loss": 0.6666241884231567, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.8569023569023569, | |
| "grad_norm": 18.170595169067383, | |
| "learning_rate": 1.7412501343703858e-06, | |
| "loss": 0.967223048210144, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.8585858585858586, | |
| "grad_norm": 3.054368257522583, | |
| "learning_rate": 1.740076840858724e-06, | |
| "loss": 1.2456423044204712, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8602693602693603, | |
| "grad_norm": 26.2432861328125, | |
| "learning_rate": 1.7389013420130278e-06, | |
| "loss": 0.9183678030967712, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.8619528619528619, | |
| "grad_norm": 4.530948162078857, | |
| "learning_rate": 1.7377236418833855e-06, | |
| "loss": 0.953632652759552, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.8636363636363636, | |
| "grad_norm": 4.451155185699463, | |
| "learning_rate": 1.736543744527469e-06, | |
| "loss": 0.8909140825271606, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.8653198653198653, | |
| "grad_norm": 3.3854105472564697, | |
| "learning_rate": 1.7353616540105214e-06, | |
| "loss": 0.9759948253631592, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.867003367003367, | |
| "grad_norm": 7.278261184692383, | |
| "learning_rate": 1.7341773744053423e-06, | |
| "loss": 0.643425703048706, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8686868686868687, | |
| "grad_norm": 3.562976360321045, | |
| "learning_rate": 1.7329909097922726e-06, | |
| "loss": 0.8528425693511963, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.8703703703703703, | |
| "grad_norm": 4.631925106048584, | |
| "learning_rate": 1.7318022642591826e-06, | |
| "loss": 0.9317729473114014, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.8720538720538721, | |
| "grad_norm": 2.9623520374298096, | |
| "learning_rate": 1.730611441901456e-06, | |
| "loss": 0.9544110298156738, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.8737373737373737, | |
| "grad_norm": 7.970090389251709, | |
| "learning_rate": 1.7294184468219768e-06, | |
| "loss": 1.1069408655166626, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.8754208754208754, | |
| "grad_norm": 5.28152322769165, | |
| "learning_rate": 1.728223283131116e-06, | |
| "loss": 1.0873464345932007, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.877104377104377, | |
| "grad_norm": 5.224731922149658, | |
| "learning_rate": 1.727025954946714e-06, | |
| "loss": 0.9729514718055725, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.8787878787878788, | |
| "grad_norm": 13.218440055847168, | |
| "learning_rate": 1.7258264663940706e-06, | |
| "loss": 1.0898833274841309, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.8804713804713805, | |
| "grad_norm": 2.7989261150360107, | |
| "learning_rate": 1.724624821605929e-06, | |
| "loss": 1.0561833381652832, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.8821548821548821, | |
| "grad_norm": 13.938822746276855, | |
| "learning_rate": 1.7234210247224608e-06, | |
| "loss": 0.9620407223701477, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.8838383838383839, | |
| "grad_norm": 14.411212921142578, | |
| "learning_rate": 1.7222150798912527e-06, | |
| "loss": 0.7809741497039795, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8855218855218855, | |
| "grad_norm": 6.374806880950928, | |
| "learning_rate": 1.7210069912672924e-06, | |
| "loss": 1.0467114448547363, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.8872053872053872, | |
| "grad_norm": 9.24715805053711, | |
| "learning_rate": 1.7197967630129533e-06, | |
| "loss": 0.5621042251586914, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 3.0764286518096924, | |
| "learning_rate": 1.7185843992979805e-06, | |
| "loss": 0.9588031768798828, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.8905723905723906, | |
| "grad_norm": 3.0444071292877197, | |
| "learning_rate": 1.7173699042994778e-06, | |
| "loss": 0.9131466150283813, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.8922558922558923, | |
| "grad_norm": 7.547487735748291, | |
| "learning_rate": 1.716153282201891e-06, | |
| "loss": 0.9909827709197998, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8939393939393939, | |
| "grad_norm": 3.859555959701538, | |
| "learning_rate": 1.7149345371969958e-06, | |
| "loss": 0.8949623107910156, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.8956228956228957, | |
| "grad_norm": 10.671557426452637, | |
| "learning_rate": 1.7137136734838809e-06, | |
| "loss": 0.8130732774734497, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.8973063973063973, | |
| "grad_norm": 2.6384527683258057, | |
| "learning_rate": 1.7124906952689354e-06, | |
| "loss": 1.0677348375320435, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.898989898989899, | |
| "grad_norm": 2.3599157333374023, | |
| "learning_rate": 1.7112656067658345e-06, | |
| "loss": 0.8169218301773071, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.9006734006734006, | |
| "grad_norm": 6.580990314483643, | |
| "learning_rate": 1.7100384121955229e-06, | |
| "loss": 0.9567373991012573, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9023569023569024, | |
| "grad_norm": 2.7122886180877686, | |
| "learning_rate": 1.7088091157862026e-06, | |
| "loss": 1.2019579410552979, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.9040404040404041, | |
| "grad_norm": 2.5349674224853516, | |
| "learning_rate": 1.7075777217733169e-06, | |
| "loss": 0.8406597971916199, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.9057239057239057, | |
| "grad_norm": 6.190466403961182, | |
| "learning_rate": 1.7063442343995361e-06, | |
| "loss": 0.4906361401081085, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.9074074074074074, | |
| "grad_norm": 26.555025100708008, | |
| "learning_rate": 1.7051086579147436e-06, | |
| "loss": 1.0886037349700928, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 3.0735490322113037, | |
| "learning_rate": 1.7038709965760198e-06, | |
| "loss": 0.9269078969955444, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9107744107744108, | |
| "grad_norm": 2.295616865158081, | |
| "learning_rate": 1.7026312546476292e-06, | |
| "loss": 0.9460815191268921, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.9124579124579124, | |
| "grad_norm": 14.62086009979248, | |
| "learning_rate": 1.701389436401004e-06, | |
| "loss": 0.7059042453765869, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.9141414141414141, | |
| "grad_norm": 4.020232200622559, | |
| "learning_rate": 1.700145546114731e-06, | |
| "loss": 1.15854811668396, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.9158249158249159, | |
| "grad_norm": 4.303004264831543, | |
| "learning_rate": 1.698899588074535e-06, | |
| "loss": 0.9253766536712646, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.9175084175084175, | |
| "grad_norm": 2.722356081008911, | |
| "learning_rate": 1.6976515665732663e-06, | |
| "loss": 0.9150590896606445, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.9191919191919192, | |
| "grad_norm": 8.33704948425293, | |
| "learning_rate": 1.6964014859108837e-06, | |
| "loss": 1.0268497467041016, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.9208754208754208, | |
| "grad_norm": 4.683021068572998, | |
| "learning_rate": 1.6951493503944414e-06, | |
| "loss": 0.9068109393119812, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.9225589225589226, | |
| "grad_norm": 10.631436347961426, | |
| "learning_rate": 1.693895164338073e-06, | |
| "loss": 0.7467716932296753, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.9242424242424242, | |
| "grad_norm": 8.113303184509277, | |
| "learning_rate": 1.6926389320629768e-06, | |
| "loss": 0.384426474571228, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 5.846349239349365, | |
| "learning_rate": 1.6913806578974016e-06, | |
| "loss": 0.9705697298049927, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9276094276094277, | |
| "grad_norm": 23.626840591430664, | |
| "learning_rate": 1.690120346176632e-06, | |
| "loss": 0.5436959266662598, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.9292929292929293, | |
| "grad_norm": 4.793126106262207, | |
| "learning_rate": 1.6888580012429717e-06, | |
| "loss": 1.117484450340271, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.930976430976431, | |
| "grad_norm": 10.387064933776855, | |
| "learning_rate": 1.68759362744573e-06, | |
| "loss": 1.031156301498413, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.9326599326599326, | |
| "grad_norm": 14.877448081970215, | |
| "learning_rate": 1.686327229141207e-06, | |
| "loss": 0.8722270131111145, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.9343434343434344, | |
| "grad_norm": 3.464400053024292, | |
| "learning_rate": 1.6850588106926773e-06, | |
| "loss": 1.2158129215240479, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.936026936026936, | |
| "grad_norm": 4.9829421043396, | |
| "learning_rate": 1.6837883764703765e-06, | |
| "loss": 1.1986503601074219, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.9377104377104377, | |
| "grad_norm": 3.5053603649139404, | |
| "learning_rate": 1.6825159308514847e-06, | |
| "loss": 1.0430546998977661, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.9393939393939394, | |
| "grad_norm": 6.993835926055908, | |
| "learning_rate": 1.6812414782201127e-06, | |
| "loss": 1.1407470703125, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.9410774410774411, | |
| "grad_norm": 6.774454116821289, | |
| "learning_rate": 1.6799650229672862e-06, | |
| "loss": 1.0087709426879883, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.9427609427609428, | |
| "grad_norm": 3.8694427013397217, | |
| "learning_rate": 1.6786865694909301e-06, | |
| "loss": 1.2728749513626099, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9444444444444444, | |
| "grad_norm": 8.199234962463379, | |
| "learning_rate": 1.6774061221958552e-06, | |
| "loss": 0.7386917471885681, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.9461279461279462, | |
| "grad_norm": 3.474858283996582, | |
| "learning_rate": 1.6761236854937406e-06, | |
| "loss": 0.8540256023406982, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.9478114478114478, | |
| "grad_norm": 5.611124038696289, | |
| "learning_rate": 1.674839263803121e-06, | |
| "loss": 0.849441409111023, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.9494949494949495, | |
| "grad_norm": 3.0861027240753174, | |
| "learning_rate": 1.6735528615493686e-06, | |
| "loss": 0.9585309028625488, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.9511784511784511, | |
| "grad_norm": 20.665544509887695, | |
| "learning_rate": 1.6722644831646815e-06, | |
| "loss": 0.9195750951766968, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.9528619528619529, | |
| "grad_norm": 2.3980801105499268, | |
| "learning_rate": 1.6709741330880644e-06, | |
| "loss": 0.9300163984298706, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.9545454545454546, | |
| "grad_norm": 11.30346393585205, | |
| "learning_rate": 1.6696818157653172e-06, | |
| "loss": 0.9436147212982178, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.9562289562289562, | |
| "grad_norm": 15.200255393981934, | |
| "learning_rate": 1.6683875356490157e-06, | |
| "loss": 0.83840012550354, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.9579124579124579, | |
| "grad_norm": 11.014248847961426, | |
| "learning_rate": 1.6670912971985002e-06, | |
| "loss": 0.7340762615203857, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.9595959595959596, | |
| "grad_norm": 3.3604698181152344, | |
| "learning_rate": 1.6657931048798576e-06, | |
| "loss": 0.5434874296188354, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.9612794612794613, | |
| "grad_norm": 8.75454330444336, | |
| "learning_rate": 1.6644929631659061e-06, | |
| "loss": 0.8939019441604614, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.9629629629629629, | |
| "grad_norm": 14.948843955993652, | |
| "learning_rate": 1.6631908765361818e-06, | |
| "loss": 0.6150766611099243, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.9646464646464646, | |
| "grad_norm": 2.9250028133392334, | |
| "learning_rate": 1.6618868494769202e-06, | |
| "loss": 0.8925027847290039, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.9663299663299664, | |
| "grad_norm": 10.11111831665039, | |
| "learning_rate": 1.6605808864810437e-06, | |
| "loss": 0.7491191029548645, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.968013468013468, | |
| "grad_norm": 8.039884567260742, | |
| "learning_rate": 1.6592729920481443e-06, | |
| "loss": 0.9510982036590576, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 11.84205150604248, | |
| "learning_rate": 1.6579631706844683e-06, | |
| "loss": 0.6039742231369019, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.9713804713804713, | |
| "grad_norm": 26.592609405517578, | |
| "learning_rate": 1.6566514269029015e-06, | |
| "loss": 0.9072830677032471, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.9730639730639731, | |
| "grad_norm": 4.943899154663086, | |
| "learning_rate": 1.6553377652229536e-06, | |
| "loss": 0.5825839042663574, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.9747474747474747, | |
| "grad_norm": 5.413260459899902, | |
| "learning_rate": 1.6540221901707413e-06, | |
| "loss": 0.9307392835617065, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.9764309764309764, | |
| "grad_norm": 6.360762119293213, | |
| "learning_rate": 1.6527047062789743e-06, | |
| "loss": 0.4215626120567322, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9781144781144782, | |
| "grad_norm": 9.286370277404785, | |
| "learning_rate": 1.6513853180869391e-06, | |
| "loss": 1.088386058807373, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.9797979797979798, | |
| "grad_norm": 6.5988993644714355, | |
| "learning_rate": 1.6500640301404832e-06, | |
| "loss": 0.6811473965644836, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.9814814814814815, | |
| "grad_norm": 9.595373153686523, | |
| "learning_rate": 1.6487408469919992e-06, | |
| "loss": 0.7789331674575806, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.9831649831649831, | |
| "grad_norm": 5.964288234710693, | |
| "learning_rate": 1.6474157732004101e-06, | |
| "loss": 0.8091530203819275, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.9848484848484849, | |
| "grad_norm": 11.993547439575195, | |
| "learning_rate": 1.6460888133311526e-06, | |
| "loss": 0.832628607749939, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9865319865319865, | |
| "grad_norm": 3.2034716606140137, | |
| "learning_rate": 1.6447599719561616e-06, | |
| "loss": 0.612036406993866, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.9882154882154882, | |
| "grad_norm": 5.53648567199707, | |
| "learning_rate": 1.6434292536538547e-06, | |
| "loss": 0.9042845964431763, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.98989898989899, | |
| "grad_norm": 5.690428733825684, | |
| "learning_rate": 1.6420966630091168e-06, | |
| "loss": 0.44773343205451965, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.9915824915824916, | |
| "grad_norm": 11.099560737609863, | |
| "learning_rate": 1.6407622046132831e-06, | |
| "loss": 1.0306243896484375, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.9932659932659933, | |
| "grad_norm": 11.031452178955078, | |
| "learning_rate": 1.6394258830641243e-06, | |
| "loss": 0.42686060070991516, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.9949494949494949, | |
| "grad_norm": 2.295154094696045, | |
| "learning_rate": 1.6380877029658303e-06, | |
| "loss": 0.8935648202896118, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.9966329966329966, | |
| "grad_norm": 5.188049793243408, | |
| "learning_rate": 1.6367476689289947e-06, | |
| "loss": 1.000899076461792, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.9983164983164983, | |
| "grad_norm": 5.049581527709961, | |
| "learning_rate": 1.6354057855705984e-06, | |
| "loss": 0.6279634833335876, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 15.246573448181152, | |
| "learning_rate": 1.6340620575139947e-06, | |
| "loss": 0.6900116205215454, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.0016835016835017, | |
| "grad_norm": 5.413362503051758, | |
| "learning_rate": 1.6327164893888913e-06, | |
| "loss": 0.39591357111930847, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.0033670033670035, | |
| "grad_norm": 7.250094890594482, | |
| "learning_rate": 1.6313690858313374e-06, | |
| "loss": 0.41023939847946167, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.005050505050505, | |
| "grad_norm": 4.482004642486572, | |
| "learning_rate": 1.6300198514837045e-06, | |
| "loss": 1.090850591659546, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.0067340067340067, | |
| "grad_norm": 15.401289939880371, | |
| "learning_rate": 1.6286687909946732e-06, | |
| "loss": 0.8496726751327515, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.0084175084175084, | |
| "grad_norm": 2.563889741897583, | |
| "learning_rate": 1.6273159090192152e-06, | |
| "loss": 0.9915731549263, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.0101010101010102, | |
| "grad_norm": 8.505236625671387, | |
| "learning_rate": 1.6259612102185778e-06, | |
| "loss": 1.0761607885360718, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.0117845117845117, | |
| "grad_norm": 2.467069625854492, | |
| "learning_rate": 1.6246046992602685e-06, | |
| "loss": 0.9234099984169006, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.0134680134680134, | |
| "grad_norm": 2.2489092350006104, | |
| "learning_rate": 1.6232463808180385e-06, | |
| "loss": 0.9091596007347107, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.0151515151515151, | |
| "grad_norm": 24.074737548828125, | |
| "learning_rate": 1.6218862595718664e-06, | |
| "loss": 1.0585005283355713, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.0168350168350169, | |
| "grad_norm": 11.167364120483398, | |
| "learning_rate": 1.620524340207942e-06, | |
| "loss": 0.6014789938926697, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.0185185185185186, | |
| "grad_norm": 9.423373222351074, | |
| "learning_rate": 1.6191606274186504e-06, | |
| "loss": 0.5883907079696655, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.02020202020202, | |
| "grad_norm": 4.673365592956543, | |
| "learning_rate": 1.6177951259025562e-06, | |
| "loss": 0.5414766669273376, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.0218855218855218, | |
| "grad_norm": 8.354643821716309, | |
| "learning_rate": 1.6164278403643867e-06, | |
| "loss": 0.7363089919090271, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.0235690235690236, | |
| "grad_norm": 6.500521183013916, | |
| "learning_rate": 1.6150587755150158e-06, | |
| "loss": 0.38967499136924744, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.0252525252525253, | |
| "grad_norm": 24.9106388092041, | |
| "learning_rate": 1.6136879360714478e-06, | |
| "loss": 0.9002467393875122, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.026936026936027, | |
| "grad_norm": 3.819883346557617, | |
| "learning_rate": 1.612315326756802e-06, | |
| "loss": 0.7683883905410767, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.0286195286195285, | |
| "grad_norm": 34.932952880859375, | |
| "learning_rate": 1.6109409523002942e-06, | |
| "loss": 0.9174226522445679, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.0303030303030303, | |
| "grad_norm": 2.4514238834381104, | |
| "learning_rate": 1.6095648174372231e-06, | |
| "loss": 1.0709283351898193, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.031986531986532, | |
| "grad_norm": 4.087513446807861, | |
| "learning_rate": 1.6081869269089522e-06, | |
| "loss": 0.6256165504455566, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.0336700336700337, | |
| "grad_norm": 3.7036447525024414, | |
| "learning_rate": 1.606807285462894e-06, | |
| "loss": 0.8476806282997131, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.0353535353535352, | |
| "grad_norm": 2.504366397857666, | |
| "learning_rate": 1.6054258978524943e-06, | |
| "loss": 0.8022794127464294, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.037037037037037, | |
| "grad_norm": 11.632919311523438, | |
| "learning_rate": 1.6040427688372143e-06, | |
| "loss": 0.4790239632129669, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.0387205387205387, | |
| "grad_norm": 1.2272193431854248, | |
| "learning_rate": 1.602657903182515e-06, | |
| "loss": 0.7812309265136719, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.0404040404040404, | |
| "grad_norm": 1.8513426780700684, | |
| "learning_rate": 1.6012713056598423e-06, | |
| "loss": 0.7921426892280579, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.0420875420875422, | |
| "grad_norm": 4.828263282775879, | |
| "learning_rate": 1.599882981046607e-06, | |
| "loss": 0.5412895679473877, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.0437710437710437, | |
| "grad_norm": 2.7645084857940674, | |
| "learning_rate": 1.5984929341261724e-06, | |
| "loss": 0.9840224981307983, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.0454545454545454, | |
| "grad_norm": 3.864872455596924, | |
| "learning_rate": 1.5971011696878342e-06, | |
| "loss": 0.9463930130004883, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.0471380471380471, | |
| "grad_norm": 4.084227561950684, | |
| "learning_rate": 1.5957076925268072e-06, | |
| "loss": 0.639992356300354, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.0488215488215489, | |
| "grad_norm": 3.3840675354003906, | |
| "learning_rate": 1.5943125074442064e-06, | |
| "loss": 0.6726884841918945, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.0505050505050506, | |
| "grad_norm": 2.852729558944702, | |
| "learning_rate": 1.5929156192470313e-06, | |
| "loss": 0.9147169589996338, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.0521885521885521, | |
| "grad_norm": 4.347400665283203, | |
| "learning_rate": 1.5915170327481491e-06, | |
| "loss": 0.7575803995132446, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.0538720538720538, | |
| "grad_norm": 12.422771453857422, | |
| "learning_rate": 1.5901167527662796e-06, | |
| "loss": 0.6838544607162476, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.0555555555555556, | |
| "grad_norm": 11.088696479797363, | |
| "learning_rate": 1.5887147841259758e-06, | |
| "loss": 0.9683138728141785, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.0572390572390573, | |
| "grad_norm": 5.527649879455566, | |
| "learning_rate": 1.5873111316576102e-06, | |
| "loss": 0.7508020401000977, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.0589225589225588, | |
| "grad_norm": 4.718619346618652, | |
| "learning_rate": 1.5859058001973555e-06, | |
| "loss": 0.5126559734344482, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.0606060606060606, | |
| "grad_norm": 5.101532459259033, | |
| "learning_rate": 1.5844987945871701e-06, | |
| "loss": 0.77130526304245, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.0622895622895623, | |
| "grad_norm": 5.325422763824463, | |
| "learning_rate": 1.5830901196747805e-06, | |
| "loss": 0.6283507347106934, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.063973063973064, | |
| "grad_norm": 15.08485221862793, | |
| "learning_rate": 1.5816797803136647e-06, | |
| "loss": 0.7283768653869629, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.0656565656565657, | |
| "grad_norm": 3.9415273666381836, | |
| "learning_rate": 1.5802677813630348e-06, | |
| "loss": 0.6957473754882812, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.0673400673400673, | |
| "grad_norm": 10.470375061035156, | |
| "learning_rate": 1.5788541276878212e-06, | |
| "loss": 0.6225847005844116, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.069023569023569, | |
| "grad_norm": 13.44847583770752, | |
| "learning_rate": 1.577438824158656e-06, | |
| "loss": 0.6269044280052185, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.0707070707070707, | |
| "grad_norm": 1.2674486637115479, | |
| "learning_rate": 1.5760218756518548e-06, | |
| "loss": 0.6266012191772461, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.0723905723905724, | |
| "grad_norm": 25.154924392700195, | |
| "learning_rate": 1.5746032870494022e-06, | |
| "loss": 0.4940655827522278, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.074074074074074, | |
| "grad_norm": 5.607649326324463, | |
| "learning_rate": 1.5731830632389322e-06, | |
| "loss": 0.6989841461181641, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.0757575757575757, | |
| "grad_norm": 111.35026550292969, | |
| "learning_rate": 1.5717612091137137e-06, | |
| "loss": 0.9674046039581299, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.0774410774410774, | |
| "grad_norm": 36.46900939941406, | |
| "learning_rate": 1.570337729572632e-06, | |
| "loss": 0.5374500751495361, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.0791245791245792, | |
| "grad_norm": 7.345931529998779, | |
| "learning_rate": 1.5689126295201738e-06, | |
| "loss": 0.3302645683288574, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.0808080808080809, | |
| "grad_norm": 4.141447067260742, | |
| "learning_rate": 1.5674859138664076e-06, | |
| "loss": 1.053006887435913, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.0824915824915824, | |
| "grad_norm": 18.335811614990234, | |
| "learning_rate": 1.5660575875269696e-06, | |
| "loss": 0.9029141664505005, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.0841750841750841, | |
| "grad_norm": 4.0398850440979, | |
| "learning_rate": 1.5646276554230454e-06, | |
| "loss": 0.5438280701637268, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.0858585858585859, | |
| "grad_norm": 20.008378982543945, | |
| "learning_rate": 1.563196122481352e-06, | |
| "loss": 0.6676660776138306, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.0875420875420876, | |
| "grad_norm": 3.3898210525512695, | |
| "learning_rate": 1.5617629936341225e-06, | |
| "loss": 1.1070988178253174, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.0892255892255893, | |
| "grad_norm": 5.172207355499268, | |
| "learning_rate": 1.5603282738190898e-06, | |
| "loss": 0.7852774858474731, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 14.538901329040527, | |
| "learning_rate": 1.5588919679794668e-06, | |
| "loss": 0.583429753780365, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.0925925925925926, | |
| "grad_norm": 6.987974166870117, | |
| "learning_rate": 1.5574540810639312e-06, | |
| "loss": 0.6342300176620483, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.0942760942760943, | |
| "grad_norm": 13.806412696838379, | |
| "learning_rate": 1.556014618026609e-06, | |
| "loss": 0.6277361512184143, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.095959595959596, | |
| "grad_norm": 11.233121871948242, | |
| "learning_rate": 1.5545735838270556e-06, | |
| "loss": 0.6347372531890869, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.0976430976430978, | |
| "grad_norm": 4.906972885131836, | |
| "learning_rate": 1.5531309834302403e-06, | |
| "loss": 0.5694692134857178, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.0993265993265993, | |
| "grad_norm": 13.255314826965332, | |
| "learning_rate": 1.5516868218065283e-06, | |
| "loss": 0.5988457798957825, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.101010101010101, | |
| "grad_norm": 18.89320182800293, | |
| "learning_rate": 1.5502411039316642e-06, | |
| "loss": 0.5894651412963867, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.1026936026936027, | |
| "grad_norm": 2.3720078468322754, | |
| "learning_rate": 1.5487938347867542e-06, | |
| "loss": 0.39072656631469727, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.1043771043771045, | |
| "grad_norm": 3.8021674156188965, | |
| "learning_rate": 1.5473450193582498e-06, | |
| "loss": 1.1303743124008179, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.106060606060606, | |
| "grad_norm": 12.77686882019043, | |
| "learning_rate": 1.5458946626379293e-06, | |
| "loss": 0.9466381072998047, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.1077441077441077, | |
| "grad_norm": 16.367809295654297, | |
| "learning_rate": 1.5444427696228822e-06, | |
| "loss": 0.896185576915741, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.1094276094276094, | |
| "grad_norm": 4.367947578430176, | |
| "learning_rate": 1.5429893453154906e-06, | |
| "loss": 0.9018317461013794, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 11.2949857711792, | |
| "learning_rate": 1.5415343947234132e-06, | |
| "loss": 0.5716771483421326, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.112794612794613, | |
| "grad_norm": 3.638136386871338, | |
| "learning_rate": 1.5400779228595663e-06, | |
| "loss": 0.8265483379364014, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.1144781144781144, | |
| "grad_norm": 23.661731719970703, | |
| "learning_rate": 1.538619934742109e-06, | |
| "loss": 0.5200953483581543, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.1161616161616161, | |
| "grad_norm": 5.394420146942139, | |
| "learning_rate": 1.5371604353944235e-06, | |
| "loss": 0.8769002556800842, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.1178451178451179, | |
| "grad_norm": 3.2108795642852783, | |
| "learning_rate": 1.5356994298450989e-06, | |
| "loss": 0.6526933312416077, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.1195286195286196, | |
| "grad_norm": 6.397909164428711, | |
| "learning_rate": 1.5342369231279145e-06, | |
| "loss": 0.994263768196106, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.121212121212121, | |
| "grad_norm": 5.88171911239624, | |
| "learning_rate": 1.5327729202818212e-06, | |
| "loss": 0.7015285491943359, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.1228956228956228, | |
| "grad_norm": 2.6668052673339844, | |
| "learning_rate": 1.5313074263509242e-06, | |
| "loss": 1.0788037776947021, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.1245791245791246, | |
| "grad_norm": 5.609066009521484, | |
| "learning_rate": 1.5298404463844675e-06, | |
| "loss": 0.5919516086578369, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.1262626262626263, | |
| "grad_norm": 3.103581428527832, | |
| "learning_rate": 1.5283719854368142e-06, | |
| "loss": 0.6757215857505798, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.127946127946128, | |
| "grad_norm": 2.8614747524261475, | |
| "learning_rate": 1.5269020485674299e-06, | |
| "loss": 0.4805062413215637, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.1296296296296295, | |
| "grad_norm": 4.264964580535889, | |
| "learning_rate": 1.5254306408408657e-06, | |
| "loss": 0.8218073844909668, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.1313131313131313, | |
| "grad_norm": 3.358206272125244, | |
| "learning_rate": 1.5239577673267401e-06, | |
| "loss": 1.1272187232971191, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.132996632996633, | |
| "grad_norm": 5.68251371383667, | |
| "learning_rate": 1.5224834330997222e-06, | |
| "loss": 1.0079560279846191, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.1346801346801347, | |
| "grad_norm": 5.610229969024658, | |
| "learning_rate": 1.5210076432395138e-06, | |
| "loss": 0.6960790157318115, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.1363636363636362, | |
| "grad_norm": 6.409191608428955, | |
| "learning_rate": 1.5195304028308324e-06, | |
| "loss": 0.48329275846481323, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.138047138047138, | |
| "grad_norm": 17.214502334594727, | |
| "learning_rate": 1.5180517169633914e-06, | |
| "loss": 0.2905687391757965, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.1397306397306397, | |
| "grad_norm": 4.7634406089782715, | |
| "learning_rate": 1.5165715907318874e-06, | |
| "loss": 0.9956916570663452, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.1414141414141414, | |
| "grad_norm": 3.8894872665405273, | |
| "learning_rate": 1.5150900292359775e-06, | |
| "loss": 1.0472840070724487, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.1430976430976432, | |
| "grad_norm": 28.076671600341797, | |
| "learning_rate": 1.513607037580264e-06, | |
| "loss": 0.7530080676078796, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.144781144781145, | |
| "grad_norm": 5.491020679473877, | |
| "learning_rate": 1.5121226208742771e-06, | |
| "loss": 0.6445476412773132, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.1464646464646464, | |
| "grad_norm": 2.686913251876831, | |
| "learning_rate": 1.5106367842324578e-06, | |
| "loss": 0.8437654376029968, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.1481481481481481, | |
| "grad_norm": 35.050662994384766, | |
| "learning_rate": 1.5091495327741375e-06, | |
| "loss": 0.8638776540756226, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.1498316498316499, | |
| "grad_norm": 3.9783761501312256, | |
| "learning_rate": 1.507660871623524e-06, | |
| "loss": 0.7111606597900391, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.1515151515151516, | |
| "grad_norm": 14.50291633605957, | |
| "learning_rate": 1.5061708059096807e-06, | |
| "loss": 0.764883279800415, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.1531986531986531, | |
| "grad_norm": 2.154838800430298, | |
| "learning_rate": 1.5046793407665114e-06, | |
| "loss": 1.0397025346755981, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.1548821548821548, | |
| "grad_norm": 2.365380048751831, | |
| "learning_rate": 1.503186481332741e-06, | |
| "loss": 1.0539653301239014, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.1565656565656566, | |
| "grad_norm": 8.504420280456543, | |
| "learning_rate": 1.5016922327518986e-06, | |
| "loss": 0.4366611838340759, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.1582491582491583, | |
| "grad_norm": 2.675044298171997, | |
| "learning_rate": 1.5001966001722986e-06, | |
| "loss": 0.398744136095047, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.15993265993266, | |
| "grad_norm": 8.629570960998535, | |
| "learning_rate": 1.4986995887470248e-06, | |
| "loss": 0.8844636678695679, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.1616161616161615, | |
| "grad_norm": 2.5665788650512695, | |
| "learning_rate": 1.497201203633912e-06, | |
| "loss": 0.6772328019142151, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.1632996632996633, | |
| "grad_norm": 9.2289457321167, | |
| "learning_rate": 1.4957014499955265e-06, | |
| "loss": 0.5273948907852173, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.164983164983165, | |
| "grad_norm": 4.406887054443359, | |
| "learning_rate": 1.4942003329991513e-06, | |
| "loss": 0.36302030086517334, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.1666666666666667, | |
| "grad_norm": 14.721182823181152, | |
| "learning_rate": 1.492697857816766e-06, | |
| "loss": 0.5152138471603394, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.1683501683501682, | |
| "grad_norm": 2.9244027137756348, | |
| "learning_rate": 1.491194029625029e-06, | |
| "loss": 0.6069843173027039, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.17003367003367, | |
| "grad_norm": 5.622206687927246, | |
| "learning_rate": 1.489688853605262e-06, | |
| "loss": 0.8698340654373169, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.1717171717171717, | |
| "grad_norm": 3.113487482070923, | |
| "learning_rate": 1.4881823349434296e-06, | |
| "loss": 0.8122848272323608, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.1734006734006734, | |
| "grad_norm": 8.594972610473633, | |
| "learning_rate": 1.4866744788301226e-06, | |
| "loss": 0.681936502456665, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.1750841750841752, | |
| "grad_norm": 2.1322364807128906, | |
| "learning_rate": 1.485165290460539e-06, | |
| "loss": 0.571365237236023, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.1767676767676767, | |
| "grad_norm": 3.1892471313476562, | |
| "learning_rate": 1.4836547750344688e-06, | |
| "loss": 0.7035370469093323, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.1784511784511784, | |
| "grad_norm": 15.387435913085938, | |
| "learning_rate": 1.4821429377562725e-06, | |
| "loss": 0.49107053875923157, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1801346801346801, | |
| "grad_norm": 2.782883644104004, | |
| "learning_rate": 1.4806297838348653e-06, | |
| "loss": 0.9246771931648254, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 5.081911563873291, | |
| "learning_rate": 1.4791153184837e-06, | |
| "loss": 0.7164801955223083, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.1835016835016834, | |
| "grad_norm": 11.42972469329834, | |
| "learning_rate": 1.4775995469207467e-06, | |
| "loss": 0.6407367587089539, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.1851851851851851, | |
| "grad_norm": 5.799728870391846, | |
| "learning_rate": 1.476082474368476e-06, | |
| "loss": 0.9986523389816284, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.1868686868686869, | |
| "grad_norm": 4.796317100524902, | |
| "learning_rate": 1.4745641060538407e-06, | |
| "loss": 0.700546145439148, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.1885521885521886, | |
| "grad_norm": 21.660324096679688, | |
| "learning_rate": 1.4730444472082597e-06, | |
| "loss": 0.741939902305603, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.1902356902356903, | |
| "grad_norm": 3.5754830837249756, | |
| "learning_rate": 1.471523503067596e-06, | |
| "loss": 0.7933897972106934, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.1919191919191918, | |
| "grad_norm": 6.275886535644531, | |
| "learning_rate": 1.4700012788721431e-06, | |
| "loss": 0.7294763326644897, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.1936026936026936, | |
| "grad_norm": 11.374263763427734, | |
| "learning_rate": 1.4684777798666028e-06, | |
| "loss": 1.066422939300537, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.1952861952861953, | |
| "grad_norm": 8.107324600219727, | |
| "learning_rate": 1.4669530113000712e-06, | |
| "loss": 0.8409990072250366, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.196969696969697, | |
| "grad_norm": 5.618307590484619, | |
| "learning_rate": 1.465426978426017e-06, | |
| "loss": 0.750501275062561, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.1986531986531987, | |
| "grad_norm": 3.1983511447906494, | |
| "learning_rate": 1.4638996865022658e-06, | |
| "loss": 0.611116886138916, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.2003367003367003, | |
| "grad_norm": 7.185869216918945, | |
| "learning_rate": 1.4623711407909802e-06, | |
| "loss": 0.8342564105987549, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.202020202020202, | |
| "grad_norm": 5.156131267547607, | |
| "learning_rate": 1.4608413465586444e-06, | |
| "loss": 0.528020441532135, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.2037037037037037, | |
| "grad_norm": 4.284945964813232, | |
| "learning_rate": 1.4593103090760426e-06, | |
| "loss": 0.867672324180603, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.2053872053872055, | |
| "grad_norm": 4.11072301864624, | |
| "learning_rate": 1.4577780336182429e-06, | |
| "loss": 0.6711719036102295, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.2070707070707072, | |
| "grad_norm": 2.3299851417541504, | |
| "learning_rate": 1.4562445254645793e-06, | |
| "loss": 1.1435985565185547, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.2087542087542087, | |
| "grad_norm": 7.548894882202148, | |
| "learning_rate": 1.4547097898986332e-06, | |
| "loss": 0.5709949731826782, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.2104377104377104, | |
| "grad_norm": 12.143434524536133, | |
| "learning_rate": 1.453173832208213e-06, | |
| "loss": 0.40696626901626587, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.2121212121212122, | |
| "grad_norm": 3.1169068813323975, | |
| "learning_rate": 1.4516366576853406e-06, | |
| "loss": 0.4268173575401306, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.2138047138047139, | |
| "grad_norm": 4.227779388427734, | |
| "learning_rate": 1.450098271626228e-06, | |
| "loss": 0.7122896313667297, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.2154882154882154, | |
| "grad_norm": 7.247793674468994, | |
| "learning_rate": 1.448558679331263e-06, | |
| "loss": 0.8614311814308167, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.2171717171717171, | |
| "grad_norm": 6.6793212890625, | |
| "learning_rate": 1.4470178861049886e-06, | |
| "loss": 0.8972820043563843, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.2188552188552189, | |
| "grad_norm": 4.615921974182129, | |
| "learning_rate": 1.4454758972560863e-06, | |
| "loss": 0.6717212200164795, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.2205387205387206, | |
| "grad_norm": 4.018466949462891, | |
| "learning_rate": 1.4439327180973556e-06, | |
| "loss": 0.8775206208229065, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.2222222222222223, | |
| "grad_norm": 4.282815456390381, | |
| "learning_rate": 1.4423883539456987e-06, | |
| "loss": 0.867609977722168, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.2239057239057238, | |
| "grad_norm": 5.375484466552734, | |
| "learning_rate": 1.4408428101220997e-06, | |
| "loss": 0.6089876294136047, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.2255892255892256, | |
| "grad_norm": 4.924765110015869, | |
| "learning_rate": 1.439296091951607e-06, | |
| "loss": 0.852953314781189, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.2272727272727273, | |
| "grad_norm": 6.108055591583252, | |
| "learning_rate": 1.4377482047633162e-06, | |
| "loss": 0.8556865453720093, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.228956228956229, | |
| "grad_norm": 7.242824077606201, | |
| "learning_rate": 1.4361991538903495e-06, | |
| "loss": 0.9425716400146484, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.2306397306397305, | |
| "grad_norm": 8.90245532989502, | |
| "learning_rate": 1.4346489446698388e-06, | |
| "loss": 0.6341677904129028, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.2323232323232323, | |
| "grad_norm": 4.452878475189209, | |
| "learning_rate": 1.4330975824429076e-06, | |
| "loss": 0.6499779224395752, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.234006734006734, | |
| "grad_norm": 2.3086910247802734, | |
| "learning_rate": 1.4315450725546516e-06, | |
| "loss": 0.8102267384529114, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.2356902356902357, | |
| "grad_norm": 4.407566070556641, | |
| "learning_rate": 1.42999142035412e-06, | |
| "loss": 0.9032129049301147, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.2373737373737375, | |
| "grad_norm": 3.0299272537231445, | |
| "learning_rate": 1.4284366311942985e-06, | |
| "loss": 1.0671682357788086, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.239057239057239, | |
| "grad_norm": 5.777866840362549, | |
| "learning_rate": 1.42688071043209e-06, | |
| "loss": 0.5841819047927856, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.2407407407407407, | |
| "grad_norm": 11.622872352600098, | |
| "learning_rate": 1.4253236634282964e-06, | |
| "loss": 0.6392555236816406, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.2424242424242424, | |
| "grad_norm": 25.52138328552246, | |
| "learning_rate": 1.4237654955475997e-06, | |
| "loss": 0.45820027589797974, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.2441077441077442, | |
| "grad_norm": 7.492943286895752, | |
| "learning_rate": 1.4222062121585438e-06, | |
| "loss": 0.6932016611099243, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.2457912457912457, | |
| "grad_norm": 4.440412998199463, | |
| "learning_rate": 1.4206458186335158e-06, | |
| "loss": 0.7317427396774292, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.2474747474747474, | |
| "grad_norm": 3.7973439693450928, | |
| "learning_rate": 1.4190843203487285e-06, | |
| "loss": 0.7156742811203003, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.2491582491582491, | |
| "grad_norm": 5.348301410675049, | |
| "learning_rate": 1.4175217226842e-06, | |
| "loss": 0.4319908320903778, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.2508417508417509, | |
| "grad_norm": 3.68155574798584, | |
| "learning_rate": 1.4159580310237368e-06, | |
| "loss": 0.5716394186019897, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.2525252525252526, | |
| "grad_norm": 12.937089920043945, | |
| "learning_rate": 1.414393250754915e-06, | |
| "loss": 0.7173076272010803, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.2542087542087543, | |
| "grad_norm": 4.815293312072754, | |
| "learning_rate": 1.4128273872690608e-06, | |
| "loss": 0.6426496505737305, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.2558922558922558, | |
| "grad_norm": 6.455201148986816, | |
| "learning_rate": 1.4112604459612326e-06, | |
| "loss": 0.7094147801399231, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.2575757575757576, | |
| "grad_norm": 2.647298812866211, | |
| "learning_rate": 1.4096924322302025e-06, | |
| "loss": 0.7964801788330078, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.2592592592592593, | |
| "grad_norm": 10.454304695129395, | |
| "learning_rate": 1.4081233514784377e-06, | |
| "loss": 0.6100042462348938, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.2609427609427608, | |
| "grad_norm": 3.6101741790771484, | |
| "learning_rate": 1.4065532091120815e-06, | |
| "loss": 0.9467732906341553, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.2626262626262625, | |
| "grad_norm": 4.737046718597412, | |
| "learning_rate": 1.4049820105409354e-06, | |
| "loss": 0.9984631538391113, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2643097643097643, | |
| "grad_norm": 7.123760223388672, | |
| "learning_rate": 1.4034097611784388e-06, | |
| "loss": 0.5069697499275208, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.265993265993266, | |
| "grad_norm": 6.340135097503662, | |
| "learning_rate": 1.4018364664416531e-06, | |
| "loss": 0.7557004690170288, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.2676767676767677, | |
| "grad_norm": 2.5414600372314453, | |
| "learning_rate": 1.4002621317512402e-06, | |
| "loss": 1.086498498916626, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.2693602693602695, | |
| "grad_norm": 6.803100109100342, | |
| "learning_rate": 1.3986867625314453e-06, | |
| "loss": 1.1087901592254639, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.271043771043771, | |
| "grad_norm": 17.501358032226562, | |
| "learning_rate": 1.397110364210079e-06, | |
| "loss": 0.5395207405090332, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 17.035667419433594, | |
| "learning_rate": 1.395532942218496e-06, | |
| "loss": 0.5006218552589417, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.2744107744107744, | |
| "grad_norm": 13.554049491882324, | |
| "learning_rate": 1.393954501991579e-06, | |
| "loss": 0.597407341003418, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.2760942760942762, | |
| "grad_norm": 5.359893321990967, | |
| "learning_rate": 1.3923750489677192e-06, | |
| "loss": 0.7979379892349243, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.2777777777777777, | |
| "grad_norm": 3.440288782119751, | |
| "learning_rate": 1.3907945885887963e-06, | |
| "loss": 0.7031858563423157, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.2794612794612794, | |
| "grad_norm": 2.3797640800476074, | |
| "learning_rate": 1.389213126300161e-06, | |
| "loss": 0.8979378342628479, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.2811447811447811, | |
| "grad_norm": 14.381575584411621, | |
| "learning_rate": 1.3876306675506176e-06, | |
| "loss": 0.6173551082611084, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.2828282828282829, | |
| "grad_norm": 22.606948852539062, | |
| "learning_rate": 1.3860472177924008e-06, | |
| "loss": 0.5981260538101196, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.2845117845117846, | |
| "grad_norm": 9.574856758117676, | |
| "learning_rate": 1.3844627824811623e-06, | |
| "loss": 0.8161386847496033, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.2861952861952861, | |
| "grad_norm": 23.1750431060791, | |
| "learning_rate": 1.3828773670759476e-06, | |
| "loss": 0.7269278764724731, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.2878787878787878, | |
| "grad_norm": 4.434001922607422, | |
| "learning_rate": 1.3812909770391808e-06, | |
| "loss": 0.3289014399051666, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.2895622895622896, | |
| "grad_norm": 4.015097141265869, | |
| "learning_rate": 1.3797036178366422e-06, | |
| "loss": 0.7394604086875916, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.2912457912457913, | |
| "grad_norm": 2.247042179107666, | |
| "learning_rate": 1.3781152949374526e-06, | |
| "loss": 1.0114760398864746, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.2929292929292928, | |
| "grad_norm": 10.264386177062988, | |
| "learning_rate": 1.3765260138140523e-06, | |
| "loss": 0.9329554438591003, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.2946127946127945, | |
| "grad_norm": 7.6681647300720215, | |
| "learning_rate": 1.3749357799421846e-06, | |
| "loss": 0.5743855237960815, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.2962962962962963, | |
| "grad_norm": 51.10832977294922, | |
| "learning_rate": 1.3733445988008729e-06, | |
| "loss": 0.6765563488006592, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.297979797979798, | |
| "grad_norm": 7.140315055847168, | |
| "learning_rate": 1.3717524758724065e-06, | |
| "loss": 0.5998942255973816, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.2996632996632997, | |
| "grad_norm": 5.197514533996582, | |
| "learning_rate": 1.3701594166423182e-06, | |
| "loss": 0.8821581602096558, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.3013468013468015, | |
| "grad_norm": 6.277469158172607, | |
| "learning_rate": 1.3685654265993682e-06, | |
| "loss": 0.767001211643219, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.303030303030303, | |
| "grad_norm": 7.22768497467041, | |
| "learning_rate": 1.366970511235522e-06, | |
| "loss": 0.7709823250770569, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.3047138047138047, | |
| "grad_norm": 4.289220333099365, | |
| "learning_rate": 1.3653746760459345e-06, | |
| "loss": 0.5894149541854858, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.3063973063973064, | |
| "grad_norm": 7.390477657318115, | |
| "learning_rate": 1.3637779265289299e-06, | |
| "loss": 0.8726404905319214, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.308080808080808, | |
| "grad_norm": 10.008243560791016, | |
| "learning_rate": 1.3621802681859812e-06, | |
| "loss": 0.947807788848877, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.3097643097643097, | |
| "grad_norm": 2.8453805446624756, | |
| "learning_rate": 1.3605817065216944e-06, | |
| "loss": 0.8847697973251343, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.3114478114478114, | |
| "grad_norm": 7.134622573852539, | |
| "learning_rate": 1.3589822470437864e-06, | |
| "loss": 0.8395899534225464, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.3131313131313131, | |
| "grad_norm": 22.481409072875977, | |
| "learning_rate": 1.3573818952630683e-06, | |
| "loss": 0.42701858282089233, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.3148148148148149, | |
| "grad_norm": 8.535077095031738, | |
| "learning_rate": 1.3557806566934256e-06, | |
| "loss": 0.5510627627372742, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.3164983164983166, | |
| "grad_norm": 14.953362464904785, | |
| "learning_rate": 1.354178536851799e-06, | |
| "loss": 0.5616642236709595, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.3181818181818181, | |
| "grad_norm": 3.324460983276367, | |
| "learning_rate": 1.3525755412581645e-06, | |
| "loss": 1.04994535446167, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.3198653198653199, | |
| "grad_norm": 11.0078706741333, | |
| "learning_rate": 1.3509716754355174e-06, | |
| "loss": 0.5438690185546875, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.3215488215488216, | |
| "grad_norm": 9.554030418395996, | |
| "learning_rate": 1.34936694490985e-06, | |
| "loss": 0.901394248008728, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.3232323232323233, | |
| "grad_norm": 9.29176139831543, | |
| "learning_rate": 1.3477613552101344e-06, | |
| "loss": 0.7927477359771729, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.3249158249158248, | |
| "grad_norm": 3.3643555641174316, | |
| "learning_rate": 1.3461549118683023e-06, | |
| "loss": 0.6502416133880615, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.3265993265993266, | |
| "grad_norm": 3.0709450244903564, | |
| "learning_rate": 1.344547620419227e-06, | |
| "loss": 0.9406764507293701, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.3282828282828283, | |
| "grad_norm": 74.16036224365234, | |
| "learning_rate": 1.3429394864007037e-06, | |
| "loss": 0.6865894794464111, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.32996632996633, | |
| "grad_norm": 14.486356735229492, | |
| "learning_rate": 1.3413305153534313e-06, | |
| "loss": 0.49478814005851746, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.3316498316498318, | |
| "grad_norm": 64.50064849853516, | |
| "learning_rate": 1.3397207128209916e-06, | |
| "loss": 0.6601588726043701, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 2.4977774620056152, | |
| "learning_rate": 1.3381100843498315e-06, | |
| "loss": 0.9941089153289795, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.335016835016835, | |
| "grad_norm": 5.635324478149414, | |
| "learning_rate": 1.3364986354892442e-06, | |
| "loss": 0.8192329406738281, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.3367003367003367, | |
| "grad_norm": 3.7212777137756348, | |
| "learning_rate": 1.3348863717913485e-06, | |
| "loss": 0.4632367491722107, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.3383838383838385, | |
| "grad_norm": 2.295429229736328, | |
| "learning_rate": 1.3332732988110717e-06, | |
| "loss": 0.6560972332954407, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.34006734006734, | |
| "grad_norm": 14.497373580932617, | |
| "learning_rate": 1.3316594221061293e-06, | |
| "loss": 0.553842306137085, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.3417508417508417, | |
| "grad_norm": 2.9581053256988525, | |
| "learning_rate": 1.3300447472370047e-06, | |
| "loss": 0.9532322883605957, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.3434343434343434, | |
| "grad_norm": 19.73745346069336, | |
| "learning_rate": 1.3284292797669325e-06, | |
| "loss": 0.3680313229560852, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.3451178451178452, | |
| "grad_norm": 3.8030846118927, | |
| "learning_rate": 1.326813025261878e-06, | |
| "loss": 0.8829873204231262, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.3468013468013469, | |
| "grad_norm": 9.470124244689941, | |
| "learning_rate": 1.3251959892905183e-06, | |
| "loss": 0.7422173023223877, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3484848484848486, | |
| "grad_norm": 4.198265075683594, | |
| "learning_rate": 1.3235781774242221e-06, | |
| "loss": 0.6670169830322266, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.3501683501683501, | |
| "grad_norm": 11.831036567687988, | |
| "learning_rate": 1.321959595237032e-06, | |
| "loss": 0.8272008895874023, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.3518518518518519, | |
| "grad_norm": 4.924741744995117, | |
| "learning_rate": 1.3203402483056457e-06, | |
| "loss": 1.091449499130249, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.3535353535353536, | |
| "grad_norm": 4.869316101074219, | |
| "learning_rate": 1.3187201422093937e-06, | |
| "loss": 0.8597755432128906, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.355218855218855, | |
| "grad_norm": 9.370150566101074, | |
| "learning_rate": 1.3170992825302231e-06, | |
| "loss": 0.38254064321517944, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.3569023569023568, | |
| "grad_norm": 5.126072883605957, | |
| "learning_rate": 1.315477674852678e-06, | |
| "loss": 0.9957524538040161, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.3585858585858586, | |
| "grad_norm": 4.2908172607421875, | |
| "learning_rate": 1.3138553247638793e-06, | |
| "loss": 0.6559964418411255, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.3602693602693603, | |
| "grad_norm": 9.646893501281738, | |
| "learning_rate": 1.3122322378535052e-06, | |
| "loss": 0.6425015330314636, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.361952861952862, | |
| "grad_norm": 2.957890510559082, | |
| "learning_rate": 1.310608419713773e-06, | |
| "loss": 0.8944872617721558, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 3.4394900798797607, | |
| "learning_rate": 1.3089838759394198e-06, | |
| "loss": 0.6483921408653259, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.3653198653198653, | |
| "grad_norm": 2.6076972484588623, | |
| "learning_rate": 1.3073586121276824e-06, | |
| "loss": 0.9400961995124817, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.367003367003367, | |
| "grad_norm": 2.1458706855773926, | |
| "learning_rate": 1.3057326338782782e-06, | |
| "loss": 0.8825739622116089, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.3686868686868687, | |
| "grad_norm": 21.359161376953125, | |
| "learning_rate": 1.3041059467933864e-06, | |
| "loss": 0.6030191779136658, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.3703703703703702, | |
| "grad_norm": 19.883914947509766, | |
| "learning_rate": 1.3024785564776287e-06, | |
| "loss": 0.8803253173828125, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.372053872053872, | |
| "grad_norm": 5.972216606140137, | |
| "learning_rate": 1.3008504685380493e-06, | |
| "loss": 0.8786773085594177, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.3737373737373737, | |
| "grad_norm": 4.644904613494873, | |
| "learning_rate": 1.2992216885840964e-06, | |
| "loss": 1.0024290084838867, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.3754208754208754, | |
| "grad_norm": 6.252418041229248, | |
| "learning_rate": 1.297592222227602e-06, | |
| "loss": 0.6154271364212036, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.3771043771043772, | |
| "grad_norm": 5.154648780822754, | |
| "learning_rate": 1.2959620750827637e-06, | |
| "loss": 0.3709207773208618, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 1.378787878787879, | |
| "grad_norm": 4.736825466156006, | |
| "learning_rate": 1.2943312527661236e-06, | |
| "loss": 0.5821201801300049, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 1.3804713804713804, | |
| "grad_norm": 2.9232895374298096, | |
| "learning_rate": 1.2926997608965515e-06, | |
| "loss": 0.6593613624572754, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.3821548821548821, | |
| "grad_norm": 3.258718729019165, | |
| "learning_rate": 1.2910676050952232e-06, | |
| "loss": 0.9339215755462646, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 1.3838383838383839, | |
| "grad_norm": 2.4435172080993652, | |
| "learning_rate": 1.2894347909856021e-06, | |
| "loss": 1.130608081817627, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.3855218855218856, | |
| "grad_norm": 5.7142791748046875, | |
| "learning_rate": 1.2878013241934195e-06, | |
| "loss": 0.7692638635635376, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 1.387205387205387, | |
| "grad_norm": 2.420278310775757, | |
| "learning_rate": 1.2861672103466564e-06, | |
| "loss": 0.93665611743927, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "grad_norm": 3.4516067504882812, | |
| "learning_rate": 1.284532455075522e-06, | |
| "loss": 0.8558226823806763, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.3905723905723906, | |
| "grad_norm": 4.455197811126709, | |
| "learning_rate": 1.2828970640124361e-06, | |
| "loss": 1.1693918704986572, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 1.3922558922558923, | |
| "grad_norm": 4.881862640380859, | |
| "learning_rate": 1.281261042792009e-06, | |
| "loss": 0.9461103677749634, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 1.393939393939394, | |
| "grad_norm": 10.862548828125, | |
| "learning_rate": 1.2796243970510232e-06, | |
| "loss": 0.5996136665344238, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.3956228956228955, | |
| "grad_norm": 3.589484930038452, | |
| "learning_rate": 1.2779871324284106e-06, | |
| "loss": 0.6074084043502808, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 1.3973063973063973, | |
| "grad_norm": 11.17980670928955, | |
| "learning_rate": 1.2763492545652373e-06, | |
| "loss": 0.9331209659576416, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.398989898989899, | |
| "grad_norm": 19.434432983398438, | |
| "learning_rate": 1.2747107691046815e-06, | |
| "loss": 0.7953930497169495, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 1.4006734006734007, | |
| "grad_norm": 42.425941467285156, | |
| "learning_rate": 1.2730716816920151e-06, | |
| "loss": 0.7052454352378845, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 1.4023569023569022, | |
| "grad_norm": 5.138425827026367, | |
| "learning_rate": 1.271431997974584e-06, | |
| "loss": 0.424437016248703, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 1.404040404040404, | |
| "grad_norm": 9.087939262390137, | |
| "learning_rate": 1.2697917236017886e-06, | |
| "loss": 0.814346194267273, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 1.4057239057239057, | |
| "grad_norm": 3.4287939071655273, | |
| "learning_rate": 1.2681508642250637e-06, | |
| "loss": 0.7924845218658447, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.4074074074074074, | |
| "grad_norm": 2.349846601486206, | |
| "learning_rate": 1.266509425497861e-06, | |
| "loss": 0.7972933650016785, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 1.4090909090909092, | |
| "grad_norm": 3.433432102203369, | |
| "learning_rate": 1.2648674130756271e-06, | |
| "loss": 1.136865258216858, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 1.410774410774411, | |
| "grad_norm": 18.93527603149414, | |
| "learning_rate": 1.2632248326157854e-06, | |
| "loss": 0.4568125009536743, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 1.4124579124579124, | |
| "grad_norm": 21.089004516601562, | |
| "learning_rate": 1.2615816897777176e-06, | |
| "loss": 0.9250065088272095, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 1.4141414141414141, | |
| "grad_norm": 3.9571752548217773, | |
| "learning_rate": 1.2599379902227419e-06, | |
| "loss": 1.0160582065582275, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.4158249158249159, | |
| "grad_norm": 2.4356608390808105, | |
| "learning_rate": 1.258293739614094e-06, | |
| "loss": 0.5913569927215576, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 1.4175084175084174, | |
| "grad_norm": 14.787010192871094, | |
| "learning_rate": 1.2566489436169101e-06, | |
| "loss": 0.46613961458206177, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 1.4191919191919191, | |
| "grad_norm": 11.936421394348145, | |
| "learning_rate": 1.255003607898204e-06, | |
| "loss": 0.6293203830718994, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 1.4208754208754208, | |
| "grad_norm": 3.085696220397949, | |
| "learning_rate": 1.2533577381268495e-06, | |
| "loss": 1.1134471893310547, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 1.4225589225589226, | |
| "grad_norm": 8.348203659057617, | |
| "learning_rate": 1.2517113399735608e-06, | |
| "loss": 0.5143088698387146, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.4242424242424243, | |
| "grad_norm": 21.37081527709961, | |
| "learning_rate": 1.250064419110872e-06, | |
| "loss": 0.6192675828933716, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 1.425925925925926, | |
| "grad_norm": 3.3926167488098145, | |
| "learning_rate": 1.2484169812131184e-06, | |
| "loss": 0.563998818397522, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 1.4276094276094276, | |
| "grad_norm": 2.4411673545837402, | |
| "learning_rate": 1.246769031956417e-06, | |
| "loss": 1.2114120721817017, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 1.4292929292929293, | |
| "grad_norm": 4.939236640930176, | |
| "learning_rate": 1.245120577018646e-06, | |
| "loss": 1.056166410446167, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 1.430976430976431, | |
| "grad_norm": 3.1179447174072266, | |
| "learning_rate": 1.2434716220794265e-06, | |
| "loss": 0.8100858926773071, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.4326599326599325, | |
| "grad_norm": 2.682645320892334, | |
| "learning_rate": 1.2418221728201023e-06, | |
| "loss": 0.8299959897994995, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 1.4343434343434343, | |
| "grad_norm": 3.0754740238189697, | |
| "learning_rate": 1.2401722349237198e-06, | |
| "loss": 0.33164000511169434, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 1.436026936026936, | |
| "grad_norm": 4.3346381187438965, | |
| "learning_rate": 1.238521814075009e-06, | |
| "loss": 0.4199884235858917, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 1.4377104377104377, | |
| "grad_norm": 12.329163551330566, | |
| "learning_rate": 1.236870915960365e-06, | |
| "loss": 0.9520546197891235, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 1.4393939393939394, | |
| "grad_norm": 2.5863959789276123, | |
| "learning_rate": 1.2352195462678257e-06, | |
| "loss": 1.0822396278381348, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.4410774410774412, | |
| "grad_norm": 5.638743877410889, | |
| "learning_rate": 1.2335677106870546e-06, | |
| "loss": 0.9755090475082397, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 1.4427609427609427, | |
| "grad_norm": 2.6220881938934326, | |
| "learning_rate": 1.2319154149093202e-06, | |
| "loss": 0.8935360312461853, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 1.4444444444444444, | |
| "grad_norm": 10.807649612426758, | |
| "learning_rate": 1.2302626646274773e-06, | |
| "loss": 0.8985303044319153, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 1.4461279461279462, | |
| "grad_norm": 3.802117109298706, | |
| "learning_rate": 1.228609465535946e-06, | |
| "loss": 0.6814161539077759, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 1.4478114478114479, | |
| "grad_norm": 8.011700630187988, | |
| "learning_rate": 1.2269558233306918e-06, | |
| "loss": 0.7456521391868591, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.4494949494949494, | |
| "grad_norm": 6.23107385635376, | |
| "learning_rate": 1.2253017437092088e-06, | |
| "loss": 0.589634358882904, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 1.4511784511784511, | |
| "grad_norm": 3.2185349464416504, | |
| "learning_rate": 1.2236472323704971e-06, | |
| "loss": 0.7695318460464478, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 1.4528619528619529, | |
| "grad_norm": 5.373349189758301, | |
| "learning_rate": 1.221992295015044e-06, | |
| "loss": 0.8508809208869934, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 6.226076602935791, | |
| "learning_rate": 1.2203369373448053e-06, | |
| "loss": 0.664426863193512, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.4562289562289563, | |
| "grad_norm": 3.2036166191101074, | |
| "learning_rate": 1.2186811650631847e-06, | |
| "loss": 0.9715543389320374, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.457912457912458, | |
| "grad_norm": 2.1510095596313477, | |
| "learning_rate": 1.217024983875014e-06, | |
| "loss": 1.2159640789031982, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 1.4595959595959596, | |
| "grad_norm": 2.128190040588379, | |
| "learning_rate": 1.2153683994865354e-06, | |
| "loss": 0.8712791800498962, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 1.4612794612794613, | |
| "grad_norm": 15.694469451904297, | |
| "learning_rate": 1.213711417605378e-06, | |
| "loss": 0.6612798571586609, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 1.462962962962963, | |
| "grad_norm": 3.5540852546691895, | |
| "learning_rate": 1.2120540439405418e-06, | |
| "loss": 0.6000321507453918, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 1.4646464646464645, | |
| "grad_norm": 5.9053730964660645, | |
| "learning_rate": 1.2103962842023765e-06, | |
| "loss": 1.0903751850128174, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.4663299663299663, | |
| "grad_norm": 3.0747792720794678, | |
| "learning_rate": 1.2087381441025624e-06, | |
| "loss": 0.6912112236022949, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 1.468013468013468, | |
| "grad_norm": 4.498322010040283, | |
| "learning_rate": 1.2070796293540887e-06, | |
| "loss": 0.5265808701515198, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 1.4696969696969697, | |
| "grad_norm": 3.914283275604248, | |
| "learning_rate": 1.2054207456712377e-06, | |
| "loss": 0.9266606569290161, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 1.4713804713804715, | |
| "grad_norm": 3.2208728790283203, | |
| "learning_rate": 1.2037614987695609e-06, | |
| "loss": 0.9809207916259766, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 1.4730639730639732, | |
| "grad_norm": 4.662408828735352, | |
| "learning_rate": 1.2021018943658623e-06, | |
| "loss": 0.7404388189315796, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.4747474747474747, | |
| "grad_norm": 2.950866460800171, | |
| "learning_rate": 1.2004419381781779e-06, | |
| "loss": 0.6600291728973389, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 1.4764309764309764, | |
| "grad_norm": 7.190127372741699, | |
| "learning_rate": 1.1987816359257543e-06, | |
| "loss": 0.6781315803527832, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 1.4781144781144782, | |
| "grad_norm": 9.120945930480957, | |
| "learning_rate": 1.1971209933290318e-06, | |
| "loss": 0.8286664485931396, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 1.4797979797979797, | |
| "grad_norm": 46.43217468261719, | |
| "learning_rate": 1.1954600161096226e-06, | |
| "loss": 0.6408827900886536, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 3.931215286254883, | |
| "learning_rate": 1.1937987099902927e-06, | |
| "loss": 0.7160297632217407, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.4831649831649831, | |
| "grad_norm": 2.768970251083374, | |
| "learning_rate": 1.19213708069494e-06, | |
| "loss": 0.9132235050201416, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 1.4848484848484849, | |
| "grad_norm": 3.2081525325775146, | |
| "learning_rate": 1.190475133948577e-06, | |
| "loss": 0.8853850364685059, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 1.4865319865319866, | |
| "grad_norm": 7.524960041046143, | |
| "learning_rate": 1.1888128754773092e-06, | |
| "loss": 0.6852905750274658, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 1.4882154882154883, | |
| "grad_norm": 4.307741165161133, | |
| "learning_rate": 1.1871503110083167e-06, | |
| "loss": 0.7655327320098877, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 1.4898989898989898, | |
| "grad_norm": 3.650569200515747, | |
| "learning_rate": 1.1854874462698337e-06, | |
| "loss": 0.9417293071746826, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.4915824915824916, | |
| "grad_norm": 5.581574440002441, | |
| "learning_rate": 1.1838242869911285e-06, | |
| "loss": 0.3258330821990967, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 1.4932659932659933, | |
| "grad_norm": 2.098912000656128, | |
| "learning_rate": 1.182160838902485e-06, | |
| "loss": 0.826897144317627, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 1.494949494949495, | |
| "grad_norm": 7.627374172210693, | |
| "learning_rate": 1.1804971077351818e-06, | |
| "loss": 0.7514946460723877, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 1.4966329966329965, | |
| "grad_norm": 3.7137930393218994, | |
| "learning_rate": 1.1788330992214724e-06, | |
| "loss": 0.8887453079223633, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 1.4983164983164983, | |
| "grad_norm": 8.848133087158203, | |
| "learning_rate": 1.1771688190945664e-06, | |
| "loss": 0.9019075036048889, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 8.9419584274292, | |
| "learning_rate": 1.1755042730886093e-06, | |
| "loss": 0.5869305729866028, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 1.5016835016835017, | |
| "grad_norm": 2.39841365814209, | |
| "learning_rate": 1.1738394669386621e-06, | |
| "loss": 1.1196240186691284, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 1.5033670033670035, | |
| "grad_norm": 6.431698322296143, | |
| "learning_rate": 1.172174406380683e-06, | |
| "loss": 0.807545006275177, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 1.5050505050505052, | |
| "grad_norm": 3.8912956714630127, | |
| "learning_rate": 1.170509097151506e-06, | |
| "loss": 0.9450180530548096, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 1.5067340067340067, | |
| "grad_norm": 22.158241271972656, | |
| "learning_rate": 1.168843544988822e-06, | |
| "loss": 0.6185091733932495, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.5084175084175084, | |
| "grad_norm": 7.974305629730225, | |
| "learning_rate": 1.1671777556311587e-06, | |
| "loss": 0.6012750267982483, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 1.51010101010101, | |
| "grad_norm": 2.431042432785034, | |
| "learning_rate": 1.1655117348178619e-06, | |
| "loss": 0.8983908891677856, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 1.5117845117845117, | |
| "grad_norm": 10.86044692993164, | |
| "learning_rate": 1.163845488289074e-06, | |
| "loss": 0.8865917921066284, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 1.5134680134680134, | |
| "grad_norm": 12.615477561950684, | |
| "learning_rate": 1.1621790217857153e-06, | |
| "loss": 0.9755824208259583, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 1.5151515151515151, | |
| "grad_norm": 4.471153736114502, | |
| "learning_rate": 1.1605123410494643e-06, | |
| "loss": 0.678105890750885, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.5168350168350169, | |
| "grad_norm": 2.3955981731414795, | |
| "learning_rate": 1.1588454518227375e-06, | |
| "loss": 1.0274368524551392, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 1.5185185185185186, | |
| "grad_norm": 3.6730523109436035, | |
| "learning_rate": 1.157178359848669e-06, | |
| "loss": 0.9852594137191772, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 1.5202020202020203, | |
| "grad_norm": 4.832586288452148, | |
| "learning_rate": 1.155511070871093e-06, | |
| "loss": 0.7990705966949463, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 1.5218855218855218, | |
| "grad_norm": 7.295440196990967, | |
| "learning_rate": 1.1538435906345213e-06, | |
| "loss": 0.7585336565971375, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 1.5235690235690236, | |
| "grad_norm": 5.79640531539917, | |
| "learning_rate": 1.1521759248841237e-06, | |
| "loss": 0.6978878974914551, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.5252525252525253, | |
| "grad_norm": 3.875293016433716, | |
| "learning_rate": 1.1505080793657124e-06, | |
| "loss": 0.22595882415771484, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 1.5269360269360268, | |
| "grad_norm": 3.867565870285034, | |
| "learning_rate": 1.1488400598257157e-06, | |
| "loss": 1.1055881977081299, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 1.5286195286195285, | |
| "grad_norm": 50.10768127441406, | |
| "learning_rate": 1.1471718720111629e-06, | |
| "loss": 0.7640130519866943, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 1.5303030303030303, | |
| "grad_norm": 20.99407196044922, | |
| "learning_rate": 1.1455035216696634e-06, | |
| "loss": 0.8898581266403198, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 1.531986531986532, | |
| "grad_norm": 3.8618974685668945, | |
| "learning_rate": 1.1438350145493853e-06, | |
| "loss": 0.7621004581451416, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.5336700336700337, | |
| "grad_norm": 5.8136162757873535, | |
| "learning_rate": 1.1421663563990383e-06, | |
| "loss": 0.7234241962432861, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 1.5353535353535355, | |
| "grad_norm": 2.8319544792175293, | |
| "learning_rate": 1.1404975529678515e-06, | |
| "loss": 0.9921367168426514, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 1.5370370370370372, | |
| "grad_norm": 2.6894915103912354, | |
| "learning_rate": 1.1388286100055555e-06, | |
| "loss": 0.841090738773346, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 1.5387205387205387, | |
| "grad_norm": 2.3952138423919678, | |
| "learning_rate": 1.1371595332623601e-06, | |
| "loss": 0.8845152258872986, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 1.5404040404040404, | |
| "grad_norm": 7.501322269439697, | |
| "learning_rate": 1.1354903284889377e-06, | |
| "loss": 0.7155517935752869, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.542087542087542, | |
| "grad_norm": 7.9082136154174805, | |
| "learning_rate": 1.133821001436401e-06, | |
| "loss": 0.7049411535263062, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 1.5437710437710437, | |
| "grad_norm": 2.185568332672119, | |
| "learning_rate": 1.1321515578562835e-06, | |
| "loss": 1.0648796558380127, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 17.329938888549805, | |
| "learning_rate": 1.1304820035005211e-06, | |
| "loss": 0.8813831806182861, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 1.5471380471380471, | |
| "grad_norm": 1.5673277378082275, | |
| "learning_rate": 1.1288123441214315e-06, | |
| "loss": 0.45255547761917114, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 1.5488215488215489, | |
| "grad_norm": 3.232985258102417, | |
| "learning_rate": 1.1271425854716931e-06, | |
| "loss": 0.6964028477668762, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.5505050505050506, | |
| "grad_norm": 4.322386741638184, | |
| "learning_rate": 1.125472733304327e-06, | |
| "loss": 0.6157456636428833, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 1.5521885521885523, | |
| "grad_norm": 4.216830730438232, | |
| "learning_rate": 1.1238027933726776e-06, | |
| "loss": 0.4383459687232971, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 1.5538720538720538, | |
| "grad_norm": 3.0813772678375244, | |
| "learning_rate": 1.122132771430389e-06, | |
| "loss": 0.9130579233169556, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 4.2144975662231445, | |
| "learning_rate": 1.1204626732313907e-06, | |
| "loss": 0.9694530963897705, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 1.557239057239057, | |
| "grad_norm": 3.75293231010437, | |
| "learning_rate": 1.1187925045298732e-06, | |
| "loss": 0.7483557462692261, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.5589225589225588, | |
| "grad_norm": 7.035089015960693, | |
| "learning_rate": 1.1171222710802704e-06, | |
| "loss": 0.9532842040061951, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 1.5606060606060606, | |
| "grad_norm": 4.142365455627441, | |
| "learning_rate": 1.1154519786372392e-06, | |
| "loss": 0.5940355658531189, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 1.5622895622895623, | |
| "grad_norm": 1.9475144147872925, | |
| "learning_rate": 1.1137816329556403e-06, | |
| "loss": 0.6380103826522827, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 1.563973063973064, | |
| "grad_norm": 2.4910194873809814, | |
| "learning_rate": 1.112111239790517e-06, | |
| "loss": 0.9142417907714844, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 1.5656565656565657, | |
| "grad_norm": 5.697439193725586, | |
| "learning_rate": 1.1104408048970765e-06, | |
| "loss": 0.4324049949645996, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.5673400673400675, | |
| "grad_norm": 7.662766456604004, | |
| "learning_rate": 1.1087703340306707e-06, | |
| "loss": 0.9493654370307922, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 1.569023569023569, | |
| "grad_norm": 2.1827774047851562, | |
| "learning_rate": 1.1070998329467738e-06, | |
| "loss": 0.355845183134079, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 1.5707070707070707, | |
| "grad_norm": 7.288192272186279, | |
| "learning_rate": 1.1054293074009646e-06, | |
| "loss": 1.0024428367614746, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 1.5723905723905722, | |
| "grad_norm": 7.846567630767822, | |
| "learning_rate": 1.1037587631489077e-06, | |
| "loss": 0.600260853767395, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 1.574074074074074, | |
| "grad_norm": 3.9028728008270264, | |
| "learning_rate": 1.1020882059463297e-06, | |
| "loss": 0.8100777268409729, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.5757575757575757, | |
| "grad_norm": 4.646785736083984, | |
| "learning_rate": 1.1004176415490036e-06, | |
| "loss": 0.7916046380996704, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 1.5774410774410774, | |
| "grad_norm": 2.543654680252075, | |
| "learning_rate": 1.0987470757127267e-06, | |
| "loss": 0.9251663684844971, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 1.5791245791245792, | |
| "grad_norm": 21.24106788635254, | |
| "learning_rate": 1.0970765141933012e-06, | |
| "loss": 0.5762704610824585, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 1.5808080808080809, | |
| "grad_norm": 2.501488447189331, | |
| "learning_rate": 1.0954059627465144e-06, | |
| "loss": 1.1238887310028076, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 1.5824915824915826, | |
| "grad_norm": 5.235997200012207, | |
| "learning_rate": 1.093735427128119e-06, | |
| "loss": 0.7707400321960449, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.5841750841750841, | |
| "grad_norm": 5.779091835021973, | |
| "learning_rate": 1.092064913093813e-06, | |
| "loss": 0.4793959856033325, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 1.5858585858585859, | |
| "grad_norm": 7.471992015838623, | |
| "learning_rate": 1.09039442639922e-06, | |
| "loss": 0.5366681814193726, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 1.5875420875420876, | |
| "grad_norm": 3.628077983856201, | |
| "learning_rate": 1.0887239727998697e-06, | |
| "loss": 0.6487268209457397, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 1.589225589225589, | |
| "grad_norm": 3.7435550689697266, | |
| "learning_rate": 1.0870535580511778e-06, | |
| "loss": 0.996959388256073, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 1.5909090909090908, | |
| "grad_norm": 4.557770252227783, | |
| "learning_rate": 1.0853831879084254e-06, | |
| "loss": 0.2108735740184784, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.5925925925925926, | |
| "grad_norm": 4.259451389312744, | |
| "learning_rate": 1.0837128681267409e-06, | |
| "loss": 1.057731032371521, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 1.5942760942760943, | |
| "grad_norm": 3.0099260807037354, | |
| "learning_rate": 1.082042604461079e-06, | |
| "loss": 0.8130640983581543, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 1.595959595959596, | |
| "grad_norm": 7.435500144958496, | |
| "learning_rate": 1.0803724026662e-06, | |
| "loss": 0.9344555139541626, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 1.5976430976430978, | |
| "grad_norm": 4.205924034118652, | |
| "learning_rate": 1.0787022684966524e-06, | |
| "loss": 0.8660852313041687, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 1.5993265993265995, | |
| "grad_norm": 14.64234447479248, | |
| "learning_rate": 1.0770322077067512e-06, | |
| "loss": 0.7825689315795898, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.601010101010101, | |
| "grad_norm": 2.525815725326538, | |
| "learning_rate": 1.0753622260505582e-06, | |
| "loss": 0.8996245265007019, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 1.6026936026936027, | |
| "grad_norm": 5.750382423400879, | |
| "learning_rate": 1.0736923292818631e-06, | |
| "loss": 0.7357829213142395, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 1.6043771043771042, | |
| "grad_norm": 2.830305814743042, | |
| "learning_rate": 1.0720225231541629e-06, | |
| "loss": 1.1233978271484375, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 1.606060606060606, | |
| "grad_norm": 6.201582908630371, | |
| "learning_rate": 1.0703528134206418e-06, | |
| "loss": 0.9390593767166138, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 1.6077441077441077, | |
| "grad_norm": 2.511575698852539, | |
| "learning_rate": 1.0686832058341534e-06, | |
| "loss": 0.5838450789451599, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.6094276094276094, | |
| "grad_norm": 9.2995023727417, | |
| "learning_rate": 1.0670137061471972e-06, | |
| "loss": 0.5779824256896973, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 1.6111111111111112, | |
| "grad_norm": 10.087990760803223, | |
| "learning_rate": 1.0653443201119026e-06, | |
| "loss": 0.7840274572372437, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 1.612794612794613, | |
| "grad_norm": 3.4181957244873047, | |
| "learning_rate": 1.063675053480007e-06, | |
| "loss": 0.6986541152000427, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 1.6144781144781146, | |
| "grad_norm": 29.79077911376953, | |
| "learning_rate": 1.0620059120028363e-06, | |
| "loss": 0.6631942987442017, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 1.6161616161616161, | |
| "grad_norm": 7.215582370758057, | |
| "learning_rate": 1.0603369014312848e-06, | |
| "loss": 0.6879869699478149, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.6178451178451179, | |
| "grad_norm": 2.632085084915161, | |
| "learning_rate": 1.0586680275157966e-06, | |
| "loss": 0.8899586200714111, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 1.6195286195286194, | |
| "grad_norm": 2.167722225189209, | |
| "learning_rate": 1.0569992960063445e-06, | |
| "loss": 0.5768526792526245, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 1.621212121212121, | |
| "grad_norm": 4.157503604888916, | |
| "learning_rate": 1.0553307126524105e-06, | |
| "loss": 0.6109682321548462, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 1.6228956228956228, | |
| "grad_norm": 2.805830478668213, | |
| "learning_rate": 1.0536622832029663e-06, | |
| "loss": 0.741910457611084, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 1.6245791245791246, | |
| "grad_norm": 8.529329299926758, | |
| "learning_rate": 1.0519940134064535e-06, | |
| "loss": 0.8265746831893921, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.6262626262626263, | |
| "grad_norm": 3.2494988441467285, | |
| "learning_rate": 1.0503259090107635e-06, | |
| "loss": 0.664577841758728, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 1.627946127946128, | |
| "grad_norm": 5.897353172302246, | |
| "learning_rate": 1.0486579757632177e-06, | |
| "loss": 0.9694902896881104, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 1.6296296296296298, | |
| "grad_norm": 5.868167400360107, | |
| "learning_rate": 1.046990219410548e-06, | |
| "loss": 0.9580270648002625, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 1.6313131313131313, | |
| "grad_norm": 5.813265323638916, | |
| "learning_rate": 1.0453226456988766e-06, | |
| "loss": 1.0353319644927979, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 1.632996632996633, | |
| "grad_norm": 8.491958618164062, | |
| "learning_rate": 1.0436552603736967e-06, | |
| "loss": 0.8483461141586304, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.6346801346801347, | |
| "grad_norm": 2.543708086013794, | |
| "learning_rate": 1.0419880691798526e-06, | |
| "loss": 1.0242235660552979, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 2.253805160522461, | |
| "learning_rate": 1.040321077861519e-06, | |
| "loss": 0.7730292677879333, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 1.638047138047138, | |
| "grad_norm": 2.954116106033325, | |
| "learning_rate": 1.0386542921621824e-06, | |
| "loss": 0.4111822545528412, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 1.6397306397306397, | |
| "grad_norm": 6.733564853668213, | |
| "learning_rate": 1.036987717824621e-06, | |
| "loss": 0.9653711318969727, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 1.6414141414141414, | |
| "grad_norm": 4.305788993835449, | |
| "learning_rate": 1.0353213605908854e-06, | |
| "loss": 0.9876930713653564, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.6430976430976432, | |
| "grad_norm": 5.421419143676758, | |
| "learning_rate": 1.0336552262022756e-06, | |
| "loss": 0.49330899119377136, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 1.644781144781145, | |
| "grad_norm": 6.326197624206543, | |
| "learning_rate": 1.0319893203993276e-06, | |
| "loss": 0.42090070247650146, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 1.6464646464646466, | |
| "grad_norm": 16.150659561157227, | |
| "learning_rate": 1.0303236489217863e-06, | |
| "loss": 0.22029098868370056, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 1.6481481481481481, | |
| "grad_norm": 5.668072700500488, | |
| "learning_rate": 1.0286582175085913e-06, | |
| "loss": 0.6529502868652344, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 1.6498316498316499, | |
| "grad_norm": 2.8413267135620117, | |
| "learning_rate": 1.0269930318978552e-06, | |
| "loss": 0.7630746960639954, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.6515151515151514, | |
| "grad_norm": 10.319131851196289, | |
| "learning_rate": 1.0253280978268421e-06, | |
| "loss": 0.6666793823242188, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 1.6531986531986531, | |
| "grad_norm": 9.414068222045898, | |
| "learning_rate": 1.0236634210319507e-06, | |
| "loss": 0.5435478687286377, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 1.6548821548821548, | |
| "grad_norm": 12.622198104858398, | |
| "learning_rate": 1.0219990072486938e-06, | |
| "loss": 0.6335460543632507, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 1.6565656565656566, | |
| "grad_norm": 1.7483079433441162, | |
| "learning_rate": 1.020334862211676e-06, | |
| "loss": 0.8370047211647034, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 1.6582491582491583, | |
| "grad_norm": 12.047608375549316, | |
| "learning_rate": 1.0186709916545775e-06, | |
| "loss": 0.7684140205383301, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.65993265993266, | |
| "grad_norm": 10.904447555541992, | |
| "learning_rate": 1.0170074013101329e-06, | |
| "loss": 0.9606258869171143, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 1.6616161616161618, | |
| "grad_norm": 2.283515453338623, | |
| "learning_rate": 1.0153440969101103e-06, | |
| "loss": 0.7740556001663208, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 1.6632996632996633, | |
| "grad_norm": 3.3896608352661133, | |
| "learning_rate": 1.0136810841852937e-06, | |
| "loss": 0.7479045391082764, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 1.664983164983165, | |
| "grad_norm": 11.400617599487305, | |
| "learning_rate": 1.0120183688654616e-06, | |
| "loss": 0.743224024772644, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 1.4617348909378052, | |
| "learning_rate": 1.0103559566793679e-06, | |
| "loss": 0.7983130216598511, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.6683501683501682, | |
| "grad_norm": 7.328155994415283, | |
| "learning_rate": 1.0086938533547213e-06, | |
| "loss": 0.5365386009216309, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 1.67003367003367, | |
| "grad_norm": 12.693415641784668, | |
| "learning_rate": 1.0070320646181684e-06, | |
| "loss": 0.46709537506103516, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 1.6717171717171717, | |
| "grad_norm": 3.179992437362671, | |
| "learning_rate": 1.0053705961952697e-06, | |
| "loss": 1.0043718814849854, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 1.6734006734006734, | |
| "grad_norm": 2.304699182510376, | |
| "learning_rate": 1.0037094538104832e-06, | |
| "loss": 0.8764192461967468, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 1.6750841750841752, | |
| "grad_norm": 2.90543270111084, | |
| "learning_rate": 1.002048643187143e-06, | |
| "loss": 0.6470460891723633, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.676767676767677, | |
| "grad_norm": 3.9131369590759277, | |
| "learning_rate": 1.0003881700474415e-06, | |
| "loss": 1.0713807344436646, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 1.6784511784511784, | |
| "grad_norm": 12.474353790283203, | |
| "learning_rate": 9.987280401124063e-07, | |
| "loss": 0.6647155284881592, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 1.6801346801346801, | |
| "grad_norm": 2.7717926502227783, | |
| "learning_rate": 9.970682591018842e-07, | |
| "loss": 0.6175976991653442, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 1.6818181818181817, | |
| "grad_norm": 1.6829837560653687, | |
| "learning_rate": 9.95408832734519e-07, | |
| "loss": 0.9222723245620728, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 1.6835016835016834, | |
| "grad_norm": 61.439422607421875, | |
| "learning_rate": 9.937497667277322e-07, | |
| "loss": 0.7147092819213867, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6851851851851851, | |
| "grad_norm": 4.989965438842773, | |
| "learning_rate": 9.92091066797705e-07, | |
| "loss": 0.6293914914131165, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 1.6868686868686869, | |
| "grad_norm": 11.067621231079102, | |
| "learning_rate": 9.904327386593563e-07, | |
| "loss": 0.652735710144043, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 1.6885521885521886, | |
| "grad_norm": 7.8212666511535645, | |
| "learning_rate": 9.887747880263236e-07, | |
| "loss": 0.6376103162765503, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 1.6902356902356903, | |
| "grad_norm": 3.7688381671905518, | |
| "learning_rate": 9.871172206109458e-07, | |
| "loss": 0.9424273371696472, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 1.691919191919192, | |
| "grad_norm": 5.420353889465332, | |
| "learning_rate": 9.854600421242396e-07, | |
| "loss": 0.5027921199798584, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.6936026936026936, | |
| "grad_norm": 4.543862819671631, | |
| "learning_rate": 9.838032582758814e-07, | |
| "loss": 0.82335364818573, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 1.6952861952861953, | |
| "grad_norm": 3.9203450679779053, | |
| "learning_rate": 9.821468747741893e-07, | |
| "loss": 0.5697500705718994, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 1.696969696969697, | |
| "grad_norm": 4.254537582397461, | |
| "learning_rate": 9.804908973261012e-07, | |
| "loss": 0.7458208799362183, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 1.6986531986531985, | |
| "grad_norm": 35.745418548583984, | |
| "learning_rate": 9.788353316371562e-07, | |
| "loss": 0.7252602577209473, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 1.7003367003367003, | |
| "grad_norm": 5.118950366973877, | |
| "learning_rate": 9.771801834114748e-07, | |
| "loss": 0.721235454082489, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.702020202020202, | |
| "grad_norm": 8.20414924621582, | |
| "learning_rate": 9.755254583517394e-07, | |
| "loss": 1.0950629711151123, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 1.7037037037037037, | |
| "grad_norm": 3.2535030841827393, | |
| "learning_rate": 9.738711621591733e-07, | |
| "loss": 0.7883695363998413, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 1.7053872053872055, | |
| "grad_norm": 2.4924561977386475, | |
| "learning_rate": 9.722173005335235e-07, | |
| "loss": 0.8893304467201233, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 1.7070707070707072, | |
| "grad_norm": 3.33543062210083, | |
| "learning_rate": 9.705638791730391e-07, | |
| "loss": 0.9973706007003784, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 1.708754208754209, | |
| "grad_norm": 12.050497055053711, | |
| "learning_rate": 9.689109037744522e-07, | |
| "loss": 0.6256110668182373, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.7104377104377104, | |
| "grad_norm": 7.641107082366943, | |
| "learning_rate": 9.672583800329585e-07, | |
| "loss": 0.4611208438873291, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 1.7121212121212122, | |
| "grad_norm": 4.1710405349731445, | |
| "learning_rate": 9.65606313642198e-07, | |
| "loss": 0.8477398157119751, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 1.7138047138047137, | |
| "grad_norm": 12.162333488464355, | |
| "learning_rate": 9.63954710294234e-07, | |
| "loss": 0.7969092130661011, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 1.7154882154882154, | |
| "grad_norm": 6.495959281921387, | |
| "learning_rate": 9.623035756795352e-07, | |
| "loss": 0.41181480884552, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 1.7171717171717171, | |
| "grad_norm": 5.608903408050537, | |
| "learning_rate": 9.606529154869556e-07, | |
| "loss": 0.45445549488067627, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.7188552188552189, | |
| "grad_norm": 3.937591552734375, | |
| "learning_rate": 9.590027354037134e-07, | |
| "loss": 0.8946130275726318, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 1.7205387205387206, | |
| "grad_norm": 3.99568247795105, | |
| "learning_rate": 9.573530411153732e-07, | |
| "loss": 0.8655031323432922, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 1.7222222222222223, | |
| "grad_norm": 7.455286502838135, | |
| "learning_rate": 9.557038383058265e-07, | |
| "loss": 1.0632479190826416, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 1.723905723905724, | |
| "grad_norm": 2.330151081085205, | |
| "learning_rate": 9.540551326572709e-07, | |
| "loss": 1.0349470376968384, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 1.7255892255892256, | |
| "grad_norm": 6.064199924468994, | |
| "learning_rate": 9.524069298501902e-07, | |
| "loss": 0.41284000873565674, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 1.6751161813735962, | |
| "learning_rate": 9.507592355633376e-07, | |
| "loss": 1.0285980701446533, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 1.7289562289562288, | |
| "grad_norm": 26.606491088867188, | |
| "learning_rate": 9.491120554737126e-07, | |
| "loss": 0.9353586435317993, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 1.7306397306397305, | |
| "grad_norm": 4.331685543060303, | |
| "learning_rate": 9.474653952565439e-07, | |
| "loss": 0.7286108732223511, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 1.7323232323232323, | |
| "grad_norm": 2.1677701473236084, | |
| "learning_rate": 9.458192605852691e-07, | |
| "loss": 1.0569818019866943, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 1.734006734006734, | |
| "grad_norm": 2.619204521179199, | |
| "learning_rate": 9.441736571315142e-07, | |
| "loss": 0.620589554309845, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.7356902356902357, | |
| "grad_norm": 5.867666721343994, | |
| "learning_rate": 9.425285905650755e-07, | |
| "loss": 0.9633854627609253, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 1.7373737373737375, | |
| "grad_norm": 16.939653396606445, | |
| "learning_rate": 9.408840665538999e-07, | |
| "loss": 0.6605305671691895, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 1.7390572390572392, | |
| "grad_norm": 2.5597705841064453, | |
| "learning_rate": 9.392400907640645e-07, | |
| "loss": 0.6780143976211548, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 1.7407407407407407, | |
| "grad_norm": 14.445930480957031, | |
| "learning_rate": 9.375966688597572e-07, | |
| "loss": 0.8258605003356934, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 1.7424242424242424, | |
| "grad_norm": 5.176375389099121, | |
| "learning_rate": 9.359538065032586e-07, | |
| "loss": 0.7047204971313477, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.7441077441077442, | |
| "grad_norm": 9.773624420166016, | |
| "learning_rate": 9.343115093549203e-07, | |
| "loss": 0.6722849011421204, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 1.7457912457912457, | |
| "grad_norm": 3.369567394256592, | |
| "learning_rate": 9.32669783073147e-07, | |
| "loss": 0.49055272340774536, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 1.7474747474747474, | |
| "grad_norm": 16.458398818969727, | |
| "learning_rate": 9.310286333143767e-07, | |
| "loss": 1.0591087341308594, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 1.7491582491582491, | |
| "grad_norm": 3.6667587757110596, | |
| "learning_rate": 9.293880657330604e-07, | |
| "loss": 0.8024224042892456, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 1.7508417508417509, | |
| "grad_norm": 3.5527923107147217, | |
| "learning_rate": 9.277480859816444e-07, | |
| "loss": 0.9343531131744385, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.7525252525252526, | |
| "grad_norm": 4.238471984863281, | |
| "learning_rate": 9.261086997105487e-07, | |
| "loss": 0.6490952968597412, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 1.7542087542087543, | |
| "grad_norm": 2.784026861190796, | |
| "learning_rate": 9.244699125681485e-07, | |
| "loss": 1.1208921670913696, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 1.7558922558922558, | |
| "grad_norm": 3.683945655822754, | |
| "learning_rate": 9.228317302007556e-07, | |
| "loss": 0.788274884223938, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 1.7575757575757576, | |
| "grad_norm": 8.775335311889648, | |
| "learning_rate": 9.211941582525968e-07, | |
| "loss": 0.4447941184043884, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 1.7592592592592593, | |
| "grad_norm": 35.036190032958984, | |
| "learning_rate": 9.195572023657969e-07, | |
| "loss": 0.5342724323272705, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.7609427609427608, | |
| "grad_norm": 11.131832122802734, | |
| "learning_rate": 9.179208681803579e-07, | |
| "loss": 0.535330057144165, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 1.7626262626262625, | |
| "grad_norm": 4.160572052001953, | |
| "learning_rate": 9.162851613341389e-07, | |
| "loss": 0.3984565734863281, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 1.7643097643097643, | |
| "grad_norm": 3.6985437870025635, | |
| "learning_rate": 9.146500874628391e-07, | |
| "loss": 0.6421704292297363, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 1.765993265993266, | |
| "grad_norm": 2.077662467956543, | |
| "learning_rate": 9.130156521999757e-07, | |
| "loss": 1.0149686336517334, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 1.7676767676767677, | |
| "grad_norm": 2.065174102783203, | |
| "learning_rate": 9.113818611768654e-07, | |
| "loss": 0.8843855857849121, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.7693602693602695, | |
| "grad_norm": 2.7010414600372314, | |
| "learning_rate": 9.097487200226059e-07, | |
| "loss": 0.8571631908416748, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 1.7710437710437712, | |
| "grad_norm": 9.685044288635254, | |
| "learning_rate": 9.081162343640561e-07, | |
| "loss": 0.5381686687469482, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 1.7727272727272727, | |
| "grad_norm": 3.8229737281799316, | |
| "learning_rate": 9.064844098258153e-07, | |
| "loss": 0.6796019077301025, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 1.7744107744107744, | |
| "grad_norm": 6.055543899536133, | |
| "learning_rate": 9.048532520302061e-07, | |
| "loss": 0.8706216812133789, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 1.776094276094276, | |
| "grad_norm": 7.083333969116211, | |
| "learning_rate": 9.032227665972534e-07, | |
| "loss": 0.5699350237846375, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 2.0101730823516846, | |
| "learning_rate": 9.015929591446651e-07, | |
| "loss": 0.8485995531082153, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 1.7794612794612794, | |
| "grad_norm": 2.6497552394866943, | |
| "learning_rate": 8.999638352878142e-07, | |
| "loss": 0.8866308927536011, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 1.7811447811447811, | |
| "grad_norm": 2.9094290733337402, | |
| "learning_rate": 8.983354006397177e-07, | |
| "loss": 0.9138184785842896, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 1.7828282828282829, | |
| "grad_norm": 2.6958985328674316, | |
| "learning_rate": 8.96707660811018e-07, | |
| "loss": 0.9850746989250183, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 1.7845117845117846, | |
| "grad_norm": 21.947837829589844, | |
| "learning_rate": 8.950806214099638e-07, | |
| "loss": 0.6375728249549866, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.7861952861952863, | |
| "grad_norm": 4.920895099639893, | |
| "learning_rate": 8.934542880423903e-07, | |
| "loss": 0.5961431860923767, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 1.7878787878787878, | |
| "grad_norm": 2.1890132427215576, | |
| "learning_rate": 8.918286663117005e-07, | |
| "loss": 0.659866452217102, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 1.7895622895622896, | |
| "grad_norm": 10.51028823852539, | |
| "learning_rate": 8.902037618188449e-07, | |
| "loss": 0.6706059575080872, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 1.791245791245791, | |
| "grad_norm": 6.073541164398193, | |
| "learning_rate": 8.885795801623035e-07, | |
| "loss": 0.6864989995956421, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 1.7929292929292928, | |
| "grad_norm": 19.274333953857422, | |
| "learning_rate": 8.869561269380652e-07, | |
| "loss": 0.674058198928833, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.7946127946127945, | |
| "grad_norm": 3.4625072479248047, | |
| "learning_rate": 8.853334077396098e-07, | |
| "loss": 0.5736150741577148, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 1.7962962962962963, | |
| "grad_norm": 1.9551900625228882, | |
| "learning_rate": 8.837114281578872e-07, | |
| "loss": 0.6773728728294373, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 1.797979797979798, | |
| "grad_norm": 6.7064208984375, | |
| "learning_rate": 8.820901937813003e-07, | |
| "loss": 0.347098171710968, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 1.7996632996632997, | |
| "grad_norm": 1.6629834175109863, | |
| "learning_rate": 8.804697101956828e-07, | |
| "loss": 0.9595216512680054, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 1.8013468013468015, | |
| "grad_norm": 3.6944870948791504, | |
| "learning_rate": 8.78849982984283e-07, | |
| "loss": 0.7999200820922852, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.803030303030303, | |
| "grad_norm": 3.7662339210510254, | |
| "learning_rate": 8.772310177277427e-07, | |
| "loss": 0.7555183172225952, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 1.8047138047138047, | |
| "grad_norm": 2.7332985401153564, | |
| "learning_rate": 8.756128200040782e-07, | |
| "loss": 0.7414171099662781, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 1.8063973063973064, | |
| "grad_norm": 5.167442798614502, | |
| "learning_rate": 8.739953953886614e-07, | |
| "loss": 0.904849112033844, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 1.808080808080808, | |
| "grad_norm": 7.448000907897949, | |
| "learning_rate": 8.72378749454201e-07, | |
| "loss": 0.8806520104408264, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 1.8097643097643097, | |
| "grad_norm": 2.8185012340545654, | |
| "learning_rate": 8.707628877707221e-07, | |
| "loss": 0.9877094030380249, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.8114478114478114, | |
| "grad_norm": 3.56538987159729, | |
| "learning_rate": 8.691478159055483e-07, | |
| "loss": 0.9566267728805542, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 1.8131313131313131, | |
| "grad_norm": 6.516078472137451, | |
| "learning_rate": 8.675335394232819e-07, | |
| "loss": 0.8102941513061523, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 1.8148148148148149, | |
| "grad_norm": 5.387680530548096, | |
| "learning_rate": 8.659200638857845e-07, | |
| "loss": 0.655036449432373, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 1.8164983164983166, | |
| "grad_norm": 4.142063140869141, | |
| "learning_rate": 8.643073948521576e-07, | |
| "loss": 0.44311749935150146, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 1.9489187002182007, | |
| "learning_rate": 8.626955378787256e-07, | |
| "loss": 0.8758860230445862, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.8198653198653199, | |
| "grad_norm": 8.752238273620605, | |
| "learning_rate": 8.610844985190127e-07, | |
| "loss": 0.7219128608703613, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 1.8215488215488216, | |
| "grad_norm": 8.243671417236328, | |
| "learning_rate": 8.594742823237287e-07, | |
| "loss": 0.8195970058441162, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 1.823232323232323, | |
| "grad_norm": 8.444494247436523, | |
| "learning_rate": 8.578648948407452e-07, | |
| "loss": 0.9344632625579834, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 1.8249158249158248, | |
| "grad_norm": 1.946562647819519, | |
| "learning_rate": 8.562563416150794e-07, | |
| "loss": 0.8328951597213745, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 1.8265993265993266, | |
| "grad_norm": 4.5011749267578125, | |
| "learning_rate": 8.546486281888739e-07, | |
| "loss": 0.5535922050476074, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.8282828282828283, | |
| "grad_norm": 10.435762405395508, | |
| "learning_rate": 8.53041760101378e-07, | |
| "loss": 0.733657956123352, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 1.82996632996633, | |
| "grad_norm": 6.214064121246338, | |
| "learning_rate": 8.51435742888928e-07, | |
| "loss": 0.40798521041870117, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 1.8316498316498318, | |
| "grad_norm": 4.490242958068848, | |
| "learning_rate": 8.498305820849296e-07, | |
| "loss": 0.45203477144241333, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 1.8333333333333335, | |
| "grad_norm": 6.816056251525879, | |
| "learning_rate": 8.482262832198365e-07, | |
| "loss": 0.6513058543205261, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 1.835016835016835, | |
| "grad_norm": 2.1644816398620605, | |
| "learning_rate": 8.46622851821134e-07, | |
| "loss": 0.7746816277503967, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.8367003367003367, | |
| "grad_norm": 11.113990783691406, | |
| "learning_rate": 8.450202934133174e-07, | |
| "loss": 0.4632836580276489, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 1.8383838383838382, | |
| "grad_norm": 4.4734086990356445, | |
| "learning_rate": 8.434186135178749e-07, | |
| "loss": 0.899796724319458, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 1.84006734006734, | |
| "grad_norm": 2.3766531944274902, | |
| "learning_rate": 8.418178176532674e-07, | |
| "loss": 0.90257328748703, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 1.8417508417508417, | |
| "grad_norm": 13.302746772766113, | |
| "learning_rate": 8.402179113349106e-07, | |
| "loss": 0.8778829574584961, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 1.8434343434343434, | |
| "grad_norm": 10.324798583984375, | |
| "learning_rate": 8.386189000751544e-07, | |
| "loss": 0.5610869526863098, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.8451178451178452, | |
| "grad_norm": 3.937783718109131, | |
| "learning_rate": 8.370207893832661e-07, | |
| "loss": 0.7988660335540771, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 1.8468013468013469, | |
| "grad_norm": 7.830168724060059, | |
| "learning_rate": 8.354235847654092e-07, | |
| "loss": 0.6106054782867432, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 1.8484848484848486, | |
| "grad_norm": 7.153279781341553, | |
| "learning_rate": 8.338272917246252e-07, | |
| "loss": 0.7764344215393066, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 1.8501683501683501, | |
| "grad_norm": 6.39476203918457, | |
| "learning_rate": 8.322319157608158e-07, | |
| "loss": 0.48035871982574463, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 7.486396312713623, | |
| "learning_rate": 8.306374623707222e-07, | |
| "loss": 0.9800804853439331, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.8535353535353534, | |
| "grad_norm": 3.6824681758880615, | |
| "learning_rate": 8.29043937047907e-07, | |
| "loss": 0.7192468643188477, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 1.855218855218855, | |
| "grad_norm": 6.612771987915039, | |
| "learning_rate": 8.274513452827361e-07, | |
| "loss": 0.5936028957366943, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 1.8569023569023568, | |
| "grad_norm": 3.079265832901001, | |
| "learning_rate": 8.258596925623578e-07, | |
| "loss": 0.9140318632125854, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 1.8585858585858586, | |
| "grad_norm": 10.242953300476074, | |
| "learning_rate": 8.242689843706852e-07, | |
| "loss": 0.713873028755188, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 1.8602693602693603, | |
| "grad_norm": 26.58353042602539, | |
| "learning_rate": 8.226792261883777e-07, | |
| "loss": 0.29191094636917114, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.861952861952862, | |
| "grad_norm": 6.435546398162842, | |
| "learning_rate": 8.210904234928213e-07, | |
| "loss": 0.8298804759979248, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 1.8636363636363638, | |
| "grad_norm": 2.913339853286743, | |
| "learning_rate": 8.195025817581092e-07, | |
| "loss": 1.0796676874160767, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 1.8653198653198653, | |
| "grad_norm": 42.50606155395508, | |
| "learning_rate": 8.179157064550246e-07, | |
| "loss": 0.3906444311141968, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 1.867003367003367, | |
| "grad_norm": 13.17294692993164, | |
| "learning_rate": 8.163298030510208e-07, | |
| "loss": 0.5464171171188354, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 1.8686868686868687, | |
| "grad_norm": 17.247772216796875, | |
| "learning_rate": 8.147448770102019e-07, | |
| "loss": 0.48076120018959045, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.8703703703703702, | |
| "grad_norm": 5.142391681671143, | |
| "learning_rate": 8.131609337933054e-07, | |
| "loss": 0.6968168616294861, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 1.872053872053872, | |
| "grad_norm": 4.890412330627441, | |
| "learning_rate": 8.115779788576818e-07, | |
| "loss": 0.9484931230545044, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 1.8737373737373737, | |
| "grad_norm": 4.0591044425964355, | |
| "learning_rate": 8.099960176572768e-07, | |
| "loss": 0.5798113346099854, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 1.8754208754208754, | |
| "grad_norm": 16.09890365600586, | |
| "learning_rate": 8.08415055642613e-07, | |
| "loss": 0.35563382506370544, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 1.8771043771043772, | |
| "grad_norm": 6.097412109375, | |
| "learning_rate": 8.068350982607693e-07, | |
| "loss": 1.0293006896972656, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.878787878787879, | |
| "grad_norm": 3.246103525161743, | |
| "learning_rate": 8.052561509553633e-07, | |
| "loss": 0.9102228879928589, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 1.8804713804713806, | |
| "grad_norm": 6.635921001434326, | |
| "learning_rate": 8.03678219166533e-07, | |
| "loss": 0.515903115272522, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 1.8821548821548821, | |
| "grad_norm": 5.258808135986328, | |
| "learning_rate": 8.021013083309181e-07, | |
| "loss": 0.7250782251358032, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 1.8838383838383839, | |
| "grad_norm": 27.69781494140625, | |
| "learning_rate": 8.005254238816392e-07, | |
| "loss": 0.9729253053665161, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 1.8855218855218854, | |
| "grad_norm": 2.754936933517456, | |
| "learning_rate": 7.989505712482814e-07, | |
| "loss": 1.1490654945373535, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.887205387205387, | |
| "grad_norm": 1.9234169721603394, | |
| "learning_rate": 7.973767558568749e-07, | |
| "loss": 0.9823436737060547, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "grad_norm": 2.8880441188812256, | |
| "learning_rate": 7.95803983129876e-07, | |
| "loss": 0.8976832032203674, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 1.8905723905723906, | |
| "grad_norm": 4.514529228210449, | |
| "learning_rate": 7.942322584861476e-07, | |
| "loss": 0.9340039491653442, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 1.8922558922558923, | |
| "grad_norm": 7.478911876678467, | |
| "learning_rate": 7.926615873409435e-07, | |
| "loss": 0.8636904954910278, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 1.893939393939394, | |
| "grad_norm": 2.7240192890167236, | |
| "learning_rate": 7.910919751058863e-07, | |
| "loss": 0.9821701049804688, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.8956228956228958, | |
| "grad_norm": 2.6539080142974854, | |
| "learning_rate": 7.895234271889502e-07, | |
| "loss": 1.1389049291610718, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 1.8973063973063973, | |
| "grad_norm": 2.555716037750244, | |
| "learning_rate": 7.879559489944431e-07, | |
| "loss": 0.8757186532020569, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 1.898989898989899, | |
| "grad_norm": 3.2359490394592285, | |
| "learning_rate": 7.86389545922987e-07, | |
| "loss": 0.7967367172241211, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 1.9006734006734005, | |
| "grad_norm": 2.5815160274505615, | |
| "learning_rate": 7.848242233714992e-07, | |
| "loss": 0.9813891649246216, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 1.9023569023569022, | |
| "grad_norm": 5.316218852996826, | |
| "learning_rate": 7.832599867331751e-07, | |
| "loss": 0.6991989612579346, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.904040404040404, | |
| "grad_norm": 3.514714241027832, | |
| "learning_rate": 7.816968413974676e-07, | |
| "loss": 0.7938976883888245, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 1.9057239057239057, | |
| "grad_norm": 6.5592474937438965, | |
| "learning_rate": 7.801347927500701e-07, | |
| "loss": 0.46941909193992615, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 1.9074074074074074, | |
| "grad_norm": 11.761022567749023, | |
| "learning_rate": 7.785738461728975e-07, | |
| "loss": 0.7285200953483582, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 1.9090909090909092, | |
| "grad_norm": 7.991189002990723, | |
| "learning_rate": 7.770140070440679e-07, | |
| "loss": 0.6555970907211304, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 1.910774410774411, | |
| "grad_norm": 4.922752857208252, | |
| "learning_rate": 7.754552807378827e-07, | |
| "loss": 0.7720062136650085, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.9124579124579124, | |
| "grad_norm": 2.78389573097229, | |
| "learning_rate": 7.738976726248105e-07, | |
| "loss": 0.8745548725128174, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 1.9141414141414141, | |
| "grad_norm": 10.283120155334473, | |
| "learning_rate": 7.723411880714663e-07, | |
| "loss": 0.7076643705368042, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 1.9158249158249159, | |
| "grad_norm": 13.527719497680664, | |
| "learning_rate": 7.707858324405945e-07, | |
| "loss": 0.8855887651443481, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 1.9175084175084174, | |
| "grad_norm": 13.780444145202637, | |
| "learning_rate": 7.692316110910495e-07, | |
| "loss": 0.5699777603149414, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 1.9191919191919191, | |
| "grad_norm": 7.046093940734863, | |
| "learning_rate": 7.676785293777779e-07, | |
| "loss": 0.20726297795772552, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.9208754208754208, | |
| "grad_norm": 5.450412750244141, | |
| "learning_rate": 7.661265926517997e-07, | |
| "loss": 0.960862398147583, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 1.9225589225589226, | |
| "grad_norm": 13.540059089660645, | |
| "learning_rate": 7.6457580626019e-07, | |
| "loss": 0.44127357006073, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 1.9242424242424243, | |
| "grad_norm": 5.831504821777344, | |
| "learning_rate": 7.630261755460598e-07, | |
| "loss": 0.5103174448013306, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 1.925925925925926, | |
| "grad_norm": 7.158233165740967, | |
| "learning_rate": 7.614777058485398e-07, | |
| "loss": 0.9973621368408203, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 1.9276094276094278, | |
| "grad_norm": 3.2046473026275635, | |
| "learning_rate": 7.59930402502759e-07, | |
| "loss": 0.6976436972618103, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.9292929292929293, | |
| "grad_norm": 9.439109802246094, | |
| "learning_rate": 7.58384270839829e-07, | |
| "loss": 0.4523466229438782, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 1.930976430976431, | |
| "grad_norm": 6.197632789611816, | |
| "learning_rate": 7.568393161868234e-07, | |
| "loss": 0.9106472134590149, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 1.9326599326599325, | |
| "grad_norm": 8.470841407775879, | |
| "learning_rate": 7.552955438667612e-07, | |
| "loss": 0.7909121513366699, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 1.9343434343434343, | |
| "grad_norm": 3.3162317276000977, | |
| "learning_rate": 7.537529591985879e-07, | |
| "loss": 0.7960456609725952, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 1.936026936026936, | |
| "grad_norm": 7.409903526306152, | |
| "learning_rate": 7.522115674971564e-07, | |
| "loss": 0.6709874868392944, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.9377104377104377, | |
| "grad_norm": 8.22396183013916, | |
| "learning_rate": 7.506713740732098e-07, | |
| "loss": 1.1500425338745117, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 1.9393939393939394, | |
| "grad_norm": 3.9755733013153076, | |
| "learning_rate": 7.491323842333626e-07, | |
| "loss": 0.9240370988845825, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 1.9410774410774412, | |
| "grad_norm": 7.245258331298828, | |
| "learning_rate": 7.47594603280082e-07, | |
| "loss": 0.30636048316955566, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 1.942760942760943, | |
| "grad_norm": 4.102907180786133, | |
| "learning_rate": 7.460580365116704e-07, | |
| "loss": 0.8063202500343323, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 1.9444444444444444, | |
| "grad_norm": 2.798117160797119, | |
| "learning_rate": 7.445226892222476e-07, | |
| "loss": 1.042150855064392, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.9461279461279462, | |
| "grad_norm": 11.515227317810059, | |
| "learning_rate": 7.429885667017301e-07, | |
| "loss": 0.9472934603691101, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 1.9478114478114477, | |
| "grad_norm": 5.401071548461914, | |
| "learning_rate": 7.41455674235816e-07, | |
| "loss": 0.9147957563400269, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 1.9494949494949494, | |
| "grad_norm": 3.730478048324585, | |
| "learning_rate": 7.399240171059649e-07, | |
| "loss": 0.7157914638519287, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 1.9511784511784511, | |
| "grad_norm": 4.426076889038086, | |
| "learning_rate": 7.383936005893798e-07, | |
| "loss": 0.8011871576309204, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 1.9528619528619529, | |
| "grad_norm": 6.439156532287598, | |
| "learning_rate": 7.368644299589894e-07, | |
| "loss": 0.8518431186676025, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.9545454545454546, | |
| "grad_norm": 2.613004446029663, | |
| "learning_rate": 7.353365104834304e-07, | |
| "loss": 0.936795711517334, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 1.9562289562289563, | |
| "grad_norm": 6.956838130950928, | |
| "learning_rate": 7.338098474270277e-07, | |
| "loss": 0.7357702851295471, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 1.957912457912458, | |
| "grad_norm": 13.74077320098877, | |
| "learning_rate": 7.322844460497783e-07, | |
| "loss": 0.5305231809616089, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 1.9595959595959596, | |
| "grad_norm": 2.220991373062134, | |
| "learning_rate": 7.307603116073317e-07, | |
| "loss": 0.9905499219894409, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 1.9612794612794613, | |
| "grad_norm": 1.9964042901992798, | |
| "learning_rate": 7.292374493509725e-07, | |
| "loss": 1.0259349346160889, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.9629629629629628, | |
| "grad_norm": 3.4638054370880127, | |
| "learning_rate": 7.277158645276014e-07, | |
| "loss": 0.9553219079971313, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 1.9646464646464645, | |
| "grad_norm": 2.130671977996826, | |
| "learning_rate": 7.261955623797189e-07, | |
| "loss": 0.9786357283592224, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 1.9663299663299663, | |
| "grad_norm": 2.262347459793091, | |
| "learning_rate": 7.246765481454056e-07, | |
| "loss": 0.8999519348144531, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 1.968013468013468, | |
| "grad_norm": 8.985565185546875, | |
| "learning_rate": 7.23158827058304e-07, | |
| "loss": 1.0301485061645508, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 1.9696969696969697, | |
| "grad_norm": 15.289015769958496, | |
| "learning_rate": 7.216424043476022e-07, | |
| "loss": 0.4213113784790039, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.9713804713804715, | |
| "grad_norm": 5.483232498168945, | |
| "learning_rate": 7.20127285238015e-07, | |
| "loss": 0.6755249500274658, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 1.9730639730639732, | |
| "grad_norm": 5.321086883544922, | |
| "learning_rate": 7.186134749497645e-07, | |
| "loss": 0.5112136602401733, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 1.9747474747474747, | |
| "grad_norm": 6.330574989318848, | |
| "learning_rate": 7.171009786985642e-07, | |
| "loss": 0.7962218523025513, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 1.9764309764309764, | |
| "grad_norm": 7.868488788604736, | |
| "learning_rate": 7.155898016956008e-07, | |
| "loss": 0.6971943378448486, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 1.9781144781144782, | |
| "grad_norm": 51.063167572021484, | |
| "learning_rate": 7.14079949147514e-07, | |
| "loss": 0.6931584477424622, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.9797979797979797, | |
| "grad_norm": 5.527878761291504, | |
| "learning_rate": 7.125714262563814e-07, | |
| "loss": 0.6461153030395508, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 1.9814814814814814, | |
| "grad_norm": 3.8143720626831055, | |
| "learning_rate": 7.110642382196996e-07, | |
| "loss": 0.4134939908981323, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 1.9831649831649831, | |
| "grad_norm": 2.772143840789795, | |
| "learning_rate": 7.095583902303648e-07, | |
| "loss": 1.014623999595642, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 1.9848484848484849, | |
| "grad_norm": 2.1666996479034424, | |
| "learning_rate": 7.080538874766573e-07, | |
| "loss": 0.8629425764083862, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 1.9865319865319866, | |
| "grad_norm": 1.9438031911849976, | |
| "learning_rate": 7.06550735142222e-07, | |
| "loss": 0.8896007537841797, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.9882154882154883, | |
| "grad_norm": 6.1856369972229, | |
| "learning_rate": 7.050489384060512e-07, | |
| "loss": 0.6207383275032043, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 1.98989898989899, | |
| "grad_norm": 2.3403923511505127, | |
| "learning_rate": 7.035485024424666e-07, | |
| "loss": 0.912721574306488, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 1.9915824915824916, | |
| "grad_norm": 11.149336814880371, | |
| "learning_rate": 7.020494324211017e-07, | |
| "loss": 0.8143168687820435, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 1.9932659932659933, | |
| "grad_norm": 2.9151461124420166, | |
| "learning_rate": 7.005517335068827e-07, | |
| "loss": 0.9495657682418823, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 1.9949494949494948, | |
| "grad_norm": 1.9637680053710938, | |
| "learning_rate": 6.99055410860013e-07, | |
| "loss": 0.26862990856170654, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.9966329966329965, | |
| "grad_norm": 17.319799423217773, | |
| "learning_rate": 6.975604696359542e-07, | |
| "loss": 0.5134755969047546, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 1.9983164983164983, | |
| "grad_norm": 5.046746730804443, | |
| "learning_rate": 6.960669149854068e-07, | |
| "loss": 0.8662137985229492, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.1785898208618164, | |
| "learning_rate": 6.945747520542955e-07, | |
| "loss": 0.8281479477882385, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.0016835016835017, | |
| "grad_norm": 15.919424057006836, | |
| "learning_rate": 6.930839859837496e-07, | |
| "loss": 0.5921661853790283, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.0033670033670035, | |
| "grad_norm": 13.652657508850098, | |
| "learning_rate": 6.915946219100852e-07, | |
| "loss": 1.0555100440979004, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.005050505050505, | |
| "grad_norm": 5.170054912567139, | |
| "learning_rate": 6.901066649647887e-07, | |
| "loss": 0.6134198904037476, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.006734006734007, | |
| "grad_norm": 3.480863094329834, | |
| "learning_rate": 6.886201202744972e-07, | |
| "loss": 0.48556286096572876, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.008417508417508, | |
| "grad_norm": 1.9658989906311035, | |
| "learning_rate": 6.871349929609826e-07, | |
| "loss": 0.6283817291259766, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.01010101010101, | |
| "grad_norm": 3.805121421813965, | |
| "learning_rate": 6.856512881411343e-07, | |
| "loss": 0.7825635671615601, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.0117845117845117, | |
| "grad_norm": 3.4738574028015137, | |
| "learning_rate": 6.841690109269386e-07, | |
| "loss": 0.9271956086158752, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.0134680134680134, | |
| "grad_norm": 6.440873622894287, | |
| "learning_rate": 6.826881664254646e-07, | |
| "loss": 0.6064585447311401, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.015151515151515, | |
| "grad_norm": 5.510295391082764, | |
| "learning_rate": 6.812087597388452e-07, | |
| "loss": 0.610366940498352, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.016835016835017, | |
| "grad_norm": 2.200218439102173, | |
| "learning_rate": 6.79730795964258e-07, | |
| "loss": 0.7530055046081543, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.0185185185185186, | |
| "grad_norm": 9.030868530273438, | |
| "learning_rate": 6.782542801939105e-07, | |
| "loss": 0.7531571388244629, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.0202020202020203, | |
| "grad_norm": 3.04939866065979, | |
| "learning_rate": 6.767792175150211e-07, | |
| "loss": 0.4959731698036194, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.021885521885522, | |
| "grad_norm": 10.346657752990723, | |
| "learning_rate": 6.753056130098009e-07, | |
| "loss": 0.31336265802383423, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.0235690235690234, | |
| "grad_norm": 2.892493486404419, | |
| "learning_rate": 6.738334717554373e-07, | |
| "loss": 0.7610318660736084, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.025252525252525, | |
| "grad_norm": 13.560941696166992, | |
| "learning_rate": 6.723627988240772e-07, | |
| "loss": 0.6177215576171875, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.026936026936027, | |
| "grad_norm": 2.220264196395874, | |
| "learning_rate": 6.708935992828068e-07, | |
| "loss": 0.6627448797225952, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.0286195286195285, | |
| "grad_norm": 4.267128944396973, | |
| "learning_rate": 6.694258781936369e-07, | |
| "loss": 0.664837121963501, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.0303030303030303, | |
| "grad_norm": 3.3940136432647705, | |
| "learning_rate": 6.679596406134844e-07, | |
| "loss": 0.8382737636566162, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.031986531986532, | |
| "grad_norm": 2.147282838821411, | |
| "learning_rate": 6.664948915941546e-07, | |
| "loss": 0.5983447432518005, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.0336700336700337, | |
| "grad_norm": 2.9526758193969727, | |
| "learning_rate": 6.65031636182324e-07, | |
| "loss": 0.8206237554550171, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.0353535353535355, | |
| "grad_norm": 15.74380874633789, | |
| "learning_rate": 6.635698794195237e-07, | |
| "loss": 0.5850080847740173, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.037037037037037, | |
| "grad_norm": 63.14246368408203, | |
| "learning_rate": 6.621096263421202e-07, | |
| "loss": 0.4533715844154358, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.038720538720539, | |
| "grad_norm": 3.8994693756103516, | |
| "learning_rate": 6.606508819813001e-07, | |
| "loss": 0.7626893520355225, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.04040404040404, | |
| "grad_norm": 3.742114543914795, | |
| "learning_rate": 6.591936513630514e-07, | |
| "loss": 0.17822477221488953, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.042087542087542, | |
| "grad_norm": 12.309547424316406, | |
| "learning_rate": 6.577379395081466e-07, | |
| "loss": 0.38434261083602905, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.0437710437710437, | |
| "grad_norm": 3.1989083290100098, | |
| "learning_rate": 6.562837514321258e-07, | |
| "loss": 0.5980604290962219, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.0454545454545454, | |
| "grad_norm": 32.80799865722656, | |
| "learning_rate": 6.548310921452784e-07, | |
| "loss": 0.716747522354126, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.047138047138047, | |
| "grad_norm": 4.070531368255615, | |
| "learning_rate": 6.533799666526275e-07, | |
| "loss": 0.6677117347717285, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.048821548821549, | |
| "grad_norm": 4.801085472106934, | |
| "learning_rate": 6.519303799539104e-07, | |
| "loss": 0.7861591577529907, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.0505050505050506, | |
| "grad_norm": 3.876065731048584, | |
| "learning_rate": 6.504823370435633e-07, | |
| "loss": 1.105973720550537, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.0521885521885523, | |
| "grad_norm": 2.630798578262329, | |
| "learning_rate": 6.490358429107038e-07, | |
| "loss": 0.6676466464996338, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.053872053872054, | |
| "grad_norm": 3.058680534362793, | |
| "learning_rate": 6.47590902539112e-07, | |
| "loss": 0.824833869934082, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.0555555555555554, | |
| "grad_norm": 5.962945461273193, | |
| "learning_rate": 6.461475209072161e-07, | |
| "loss": 0.7032083868980408, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.057239057239057, | |
| "grad_norm": 2.236006021499634, | |
| "learning_rate": 6.44705702988073e-07, | |
| "loss": 0.7378408908843994, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.058922558922559, | |
| "grad_norm": 5.968637943267822, | |
| "learning_rate": 6.432654537493518e-07, | |
| "loss": 0.9346398115158081, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.0606060606060606, | |
| "grad_norm": 2.306854009628296, | |
| "learning_rate": 6.418267781533173e-07, | |
| "loss": 0.7191810607910156, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.0622895622895623, | |
| "grad_norm": 9.214242935180664, | |
| "learning_rate": 6.403896811568124e-07, | |
| "loss": 0.760452389717102, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.063973063973064, | |
| "grad_norm": 5.0180792808532715, | |
| "learning_rate": 6.389541677112407e-07, | |
| "loss": 0.8763862252235413, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.0656565656565657, | |
| "grad_norm": 4.374032974243164, | |
| "learning_rate": 6.375202427625505e-07, | |
| "loss": 0.6157338619232178, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.0673400673400675, | |
| "grad_norm": 5.687982082366943, | |
| "learning_rate": 6.360879112512159e-07, | |
| "loss": 0.7349066138267517, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 2.069023569023569, | |
| "grad_norm": 2.7759313583374023, | |
| "learning_rate": 6.346571781122218e-07, | |
| "loss": 0.6915404796600342, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 2.0707070707070705, | |
| "grad_norm": 7.065815448760986, | |
| "learning_rate": 6.332280482750466e-07, | |
| "loss": 0.561396062374115, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.0723905723905722, | |
| "grad_norm": 16.879159927368164, | |
| "learning_rate": 6.318005266636428e-07, | |
| "loss": 0.5830413103103638, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 2.074074074074074, | |
| "grad_norm": 9.45602798461914, | |
| "learning_rate": 6.303746181964234e-07, | |
| "loss": 0.6078395843505859, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 2.0757575757575757, | |
| "grad_norm": 6.788721561431885, | |
| "learning_rate": 6.289503277862438e-07, | |
| "loss": 0.7341784238815308, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 2.0774410774410774, | |
| "grad_norm": 2.810659408569336, | |
| "learning_rate": 6.275276603403824e-07, | |
| "loss": 0.5312877893447876, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 2.079124579124579, | |
| "grad_norm": 5.600820541381836, | |
| "learning_rate": 6.26106620760528e-07, | |
| "loss": 0.961767315864563, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.080808080808081, | |
| "grad_norm": 2.6611502170562744, | |
| "learning_rate": 6.246872139427602e-07, | |
| "loss": 0.9193134307861328, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 2.0824915824915826, | |
| "grad_norm": 5.997580528259277, | |
| "learning_rate": 6.232694447775316e-07, | |
| "loss": 0.4731786549091339, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 2.0841750841750843, | |
| "grad_norm": 6.405127048492432, | |
| "learning_rate": 6.218533181496541e-07, | |
| "loss": 0.57915198802948, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 2.0858585858585856, | |
| "grad_norm": 2.907135486602783, | |
| "learning_rate": 6.204388389382804e-07, | |
| "loss": 0.8042079210281372, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 2.0875420875420874, | |
| "grad_norm": 1.9114086627960205, | |
| "learning_rate": 6.190260120168855e-07, | |
| "loss": 0.6323788166046143, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.089225589225589, | |
| "grad_norm": 1.966032862663269, | |
| "learning_rate": 6.17614842253253e-07, | |
| "loss": 0.594678521156311, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 2.090909090909091, | |
| "grad_norm": 2.6483774185180664, | |
| "learning_rate": 6.162053345094569e-07, | |
| "loss": 0.9705860018730164, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 2.0925925925925926, | |
| "grad_norm": 5.268326282501221, | |
| "learning_rate": 6.147974936418436e-07, | |
| "loss": 0.6276801228523254, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 2.0942760942760943, | |
| "grad_norm": 2.3642375469207764, | |
| "learning_rate": 6.133913245010181e-07, | |
| "loss": 0.6014080047607422, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 2.095959595959596, | |
| "grad_norm": 5.21682071685791, | |
| "learning_rate": 6.119868319318244e-07, | |
| "loss": 0.7621322870254517, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.0976430976430978, | |
| "grad_norm": 3.3880903720855713, | |
| "learning_rate": 6.105840207733302e-07, | |
| "loss": 0.8144615888595581, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 2.0993265993265995, | |
| "grad_norm": 2.8465569019317627, | |
| "learning_rate": 6.091828958588101e-07, | |
| "loss": 0.499761700630188, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 2.101010101010101, | |
| "grad_norm": 4.338362216949463, | |
| "learning_rate": 6.077834620157296e-07, | |
| "loss": 0.9024825096130371, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 2.1026936026936025, | |
| "grad_norm": 2.8251841068267822, | |
| "learning_rate": 6.063857240657264e-07, | |
| "loss": 0.4348450303077698, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 2.1043771043771042, | |
| "grad_norm": 4.72477388381958, | |
| "learning_rate": 6.049896868245962e-07, | |
| "loss": 0.613303005695343, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.106060606060606, | |
| "grad_norm": 2.4342687129974365, | |
| "learning_rate": 6.035953551022748e-07, | |
| "loss": 0.9862151145935059, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 2.1077441077441077, | |
| "grad_norm": 11.250151634216309, | |
| "learning_rate": 6.022027337028212e-07, | |
| "loss": 0.7949624061584473, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 2.1094276094276094, | |
| "grad_norm": 8.595945358276367, | |
| "learning_rate": 6.008118274244025e-07, | |
| "loss": 0.844199538230896, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 2.111111111111111, | |
| "grad_norm": 5.953275680541992, | |
| "learning_rate": 5.994226410592762e-07, | |
| "loss": 0.47989651560783386, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 2.112794612794613, | |
| "grad_norm": 5.53914213180542, | |
| "learning_rate": 5.980351793937734e-07, | |
| "loss": 0.5320888757705688, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.1144781144781146, | |
| "grad_norm": 19.394433975219727, | |
| "learning_rate": 5.966494472082832e-07, | |
| "loss": 0.7170990705490112, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 2.1161616161616164, | |
| "grad_norm": 7.686086654663086, | |
| "learning_rate": 5.952654492772369e-07, | |
| "loss": 0.431751549243927, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 2.1178451178451176, | |
| "grad_norm": 3.4454784393310547, | |
| "learning_rate": 5.938831903690887e-07, | |
| "loss": 0.840388834476471, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 2.1195286195286194, | |
| "grad_norm": 4.63939094543457, | |
| "learning_rate": 5.925026752463027e-07, | |
| "loss": 0.17465031147003174, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 2.121212121212121, | |
| "grad_norm": 23.637449264526367, | |
| "learning_rate": 5.911239086653345e-07, | |
| "loss": 0.3789297044277191, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.122895622895623, | |
| "grad_norm": 4.005544185638428, | |
| "learning_rate": 5.89746895376614e-07, | |
| "loss": 0.20194318890571594, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 2.1245791245791246, | |
| "grad_norm": 3.130404233932495, | |
| "learning_rate": 5.883716401245329e-07, | |
| "loss": 0.40525293350219727, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 2.1262626262626263, | |
| "grad_norm": 14.998170852661133, | |
| "learning_rate": 5.869981476474235e-07, | |
| "loss": 0.2688121795654297, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 2.127946127946128, | |
| "grad_norm": 18.963912963867188, | |
| "learning_rate": 5.856264226775451e-07, | |
| "loss": 0.3136770725250244, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 2.1296296296296298, | |
| "grad_norm": 3.191150188446045, | |
| "learning_rate": 5.842564699410676e-07, | |
| "loss": 0.5626152753829956, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.1313131313131315, | |
| "grad_norm": 3.6382803916931152, | |
| "learning_rate": 5.828882941580548e-07, | |
| "loss": 0.7779805660247803, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 2.1329966329966332, | |
| "grad_norm": 3.3205113410949707, | |
| "learning_rate": 5.815219000424475e-07, | |
| "loss": 0.40261930227279663, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 2.1346801346801345, | |
| "grad_norm": 5.565113544464111, | |
| "learning_rate": 5.801572923020486e-07, | |
| "loss": 0.6595053672790527, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 2.1363636363636362, | |
| "grad_norm": 9.94298267364502, | |
| "learning_rate": 5.787944756385061e-07, | |
| "loss": 0.32748013734817505, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 2.138047138047138, | |
| "grad_norm": 0.7888699173927307, | |
| "learning_rate": 5.774334547472963e-07, | |
| "loss": 0.34032267332077026, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.1397306397306397, | |
| "grad_norm": 8.096704483032227, | |
| "learning_rate": 5.760742343177091e-07, | |
| "loss": 0.7002683281898499, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 2.1414141414141414, | |
| "grad_norm": 3.1933655738830566, | |
| "learning_rate": 5.747168190328313e-07, | |
| "loss": 0.10309363156557083, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 2.143097643097643, | |
| "grad_norm": 2.4028244018554688, | |
| "learning_rate": 5.73361213569529e-07, | |
| "loss": 0.323750376701355, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 2.144781144781145, | |
| "grad_norm": 1.90052330493927, | |
| "learning_rate": 5.720074225984335e-07, | |
| "loss": 0.6766308546066284, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 2.1464646464646466, | |
| "grad_norm": 2.1108572483062744, | |
| "learning_rate": 5.706554507839247e-07, | |
| "loss": 0.8565983772277832, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.148148148148148, | |
| "grad_norm": 2.810182571411133, | |
| "learning_rate": 5.693053027841139e-07, | |
| "loss": 0.4966258108615875, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 2.1498316498316496, | |
| "grad_norm": 75.41299438476562, | |
| "learning_rate": 5.679569832508294e-07, | |
| "loss": 0.2292374223470688, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 2.1515151515151514, | |
| "grad_norm": 3.583876132965088, | |
| "learning_rate": 5.666104968295993e-07, | |
| "loss": 0.4831843674182892, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 2.153198653198653, | |
| "grad_norm": 3.617044448852539, | |
| "learning_rate": 5.652658481596355e-07, | |
| "loss": 0.5890083312988281, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 2.154882154882155, | |
| "grad_norm": 4.1594061851501465, | |
| "learning_rate": 5.639230418738186e-07, | |
| "loss": 0.416708379983902, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.1565656565656566, | |
| "grad_norm": 4.285228252410889, | |
| "learning_rate": 5.625820825986818e-07, | |
| "loss": 0.477688729763031, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 2.1582491582491583, | |
| "grad_norm": 3.6317057609558105, | |
| "learning_rate": 5.61242974954393e-07, | |
| "loss": 0.6931259632110596, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 2.15993265993266, | |
| "grad_norm": 7.4866943359375, | |
| "learning_rate": 5.599057235547422e-07, | |
| "loss": 0.4877997040748596, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 2.1616161616161618, | |
| "grad_norm": 5.388299465179443, | |
| "learning_rate": 5.585703330071232e-07, | |
| "loss": 0.391178697347641, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 2.1632996632996635, | |
| "grad_norm": 2.264526605606079, | |
| "learning_rate": 5.572368079125177e-07, | |
| "loss": 0.9337778687477112, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.164983164983165, | |
| "grad_norm": 3.827529191970825, | |
| "learning_rate": 5.559051528654812e-07, | |
| "loss": 1.0406713485717773, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 2.1666666666666665, | |
| "grad_norm": 6.1171650886535645, | |
| "learning_rate": 5.545753724541259e-07, | |
| "loss": 0.7416504621505737, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 2.1683501683501682, | |
| "grad_norm": 9.12820053100586, | |
| "learning_rate": 5.532474712601041e-07, | |
| "loss": 0.1839454025030136, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 2.17003367003367, | |
| "grad_norm": 13.084949493408203, | |
| "learning_rate": 5.519214538585945e-07, | |
| "loss": 0.6754062175750732, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 2.1717171717171717, | |
| "grad_norm": 8.969803810119629, | |
| "learning_rate": 5.505973248182854e-07, | |
| "loss": 0.22235676646232605, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.1734006734006734, | |
| "grad_norm": 6.776020526885986, | |
| "learning_rate": 5.492750887013576e-07, | |
| "loss": 0.41986188292503357, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 2.175084175084175, | |
| "grad_norm": 15.121447563171387, | |
| "learning_rate": 5.479547500634716e-07, | |
| "loss": 0.31534767150878906, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 2.176767676767677, | |
| "grad_norm": 4.160110950469971, | |
| "learning_rate": 5.466363134537495e-07, | |
| "loss": 0.6025125980377197, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 2.1784511784511786, | |
| "grad_norm": 12.059831619262695, | |
| "learning_rate": 5.453197834147596e-07, | |
| "loss": 0.5609304904937744, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 2.18013468013468, | |
| "grad_norm": 8.022695541381836, | |
| "learning_rate": 5.440051644825024e-07, | |
| "loss": 0.6940740346908569, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 11.945213317871094, | |
| "learning_rate": 5.426924611863932e-07, | |
| "loss": 0.523178219795227, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 2.1835016835016834, | |
| "grad_norm": 12.750484466552734, | |
| "learning_rate": 5.413816780492464e-07, | |
| "loss": 0.3450314402580261, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 2.185185185185185, | |
| "grad_norm": 5.865060329437256, | |
| "learning_rate": 5.400728195872627e-07, | |
| "loss": 0.6967110633850098, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 2.186868686868687, | |
| "grad_norm": 2.9188671112060547, | |
| "learning_rate": 5.387658903100093e-07, | |
| "loss": 0.8298006057739258, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 2.1885521885521886, | |
| "grad_norm": 8.126681327819824, | |
| "learning_rate": 5.374608947204078e-07, | |
| "loss": 0.5891833901405334, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.1902356902356903, | |
| "grad_norm": 1.921739101409912, | |
| "learning_rate": 5.361578373147173e-07, | |
| "loss": 0.7303223609924316, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 2.191919191919192, | |
| "grad_norm": 10.952816009521484, | |
| "learning_rate": 5.348567225825182e-07, | |
| "loss": 0.785490870475769, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 2.1936026936026938, | |
| "grad_norm": 9.251832008361816, | |
| "learning_rate": 5.335575550066987e-07, | |
| "loss": 0.46439725160598755, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 2.1952861952861955, | |
| "grad_norm": 5.436981201171875, | |
| "learning_rate": 5.322603390634379e-07, | |
| "loss": 0.895796000957489, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 2.196969696969697, | |
| "grad_norm": 3.214667320251465, | |
| "learning_rate": 5.3096507922219e-07, | |
| "loss": 0.6566123962402344, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.1986531986531985, | |
| "grad_norm": 41.99171447753906, | |
| "learning_rate": 5.296717799456703e-07, | |
| "loss": 0.32645493745803833, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 2.2003367003367003, | |
| "grad_norm": 6.42157506942749, | |
| "learning_rate": 5.283804456898393e-07, | |
| "loss": 0.7071173191070557, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 2.202020202020202, | |
| "grad_norm": 5.982941627502441, | |
| "learning_rate": 5.270910809038866e-07, | |
| "loss": 0.5429423451423645, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 2.2037037037037037, | |
| "grad_norm": 23.397838592529297, | |
| "learning_rate": 5.258036900302162e-07, | |
| "loss": 0.4608469009399414, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 2.2053872053872055, | |
| "grad_norm": 1.3942065238952637, | |
| "learning_rate": 5.245182775044319e-07, | |
| "loss": 0.24561887979507446, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.207070707070707, | |
| "grad_norm": 3.1465113162994385, | |
| "learning_rate": 5.2323484775532e-07, | |
| "loss": 0.5467818975448608, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 2.208754208754209, | |
| "grad_norm": 12.308442115783691, | |
| "learning_rate": 5.219534052048364e-07, | |
| "loss": 0.48555779457092285, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 2.2104377104377106, | |
| "grad_norm": 6.089041709899902, | |
| "learning_rate": 5.206739542680903e-07, | |
| "loss": 0.4167608618736267, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 2.212121212121212, | |
| "grad_norm": 7.500848293304443, | |
| "learning_rate": 5.193964993533275e-07, | |
| "loss": 0.5702179074287415, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 2.2138047138047137, | |
| "grad_norm": 10.495234489440918, | |
| "learning_rate": 5.181210448619185e-07, | |
| "loss": 0.2557629644870758, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.2154882154882154, | |
| "grad_norm": 2.5270442962646484, | |
| "learning_rate": 5.168475951883405e-07, | |
| "loss": 0.39183729887008667, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 2.217171717171717, | |
| "grad_norm": 2.1306686401367188, | |
| "learning_rate": 5.155761547201631e-07, | |
| "loss": 0.06966563314199448, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 2.218855218855219, | |
| "grad_norm": 4.132006645202637, | |
| "learning_rate": 5.143067278380339e-07, | |
| "loss": 0.7425806522369385, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 2.2205387205387206, | |
| "grad_norm": 2.9199447631835938, | |
| "learning_rate": 5.13039318915663e-07, | |
| "loss": 1.07930326461792, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 2.4841439723968506, | |
| "learning_rate": 5.117739323198067e-07, | |
| "loss": 0.982938289642334, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.223905723905724, | |
| "grad_norm": 4.3581013679504395, | |
| "learning_rate": 5.105105724102547e-07, | |
| "loss": 0.5647614002227783, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 2.225589225589226, | |
| "grad_norm": 6.911370754241943, | |
| "learning_rate": 5.092492435398137e-07, | |
| "loss": 0.5829119086265564, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 2.227272727272727, | |
| "grad_norm": 4.011280059814453, | |
| "learning_rate": 5.079899500542917e-07, | |
| "loss": 0.5897196531295776, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 2.228956228956229, | |
| "grad_norm": 4.96337890625, | |
| "learning_rate": 5.067326962924848e-07, | |
| "loss": 0.2728573977947235, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 2.2306397306397305, | |
| "grad_norm": 6.272621154785156, | |
| "learning_rate": 5.054774865861617e-07, | |
| "loss": 0.9227702617645264, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.2323232323232323, | |
| "grad_norm": 4.739163875579834, | |
| "learning_rate": 5.042243252600475e-07, | |
| "loss": 0.5031465888023376, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 2.234006734006734, | |
| "grad_norm": 13.35574722290039, | |
| "learning_rate": 5.029732166318106e-07, | |
| "loss": 0.49748843908309937, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 2.2356902356902357, | |
| "grad_norm": 4.151340484619141, | |
| "learning_rate": 5.017241650120462e-07, | |
| "loss": 0.585181713104248, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 2.2373737373737375, | |
| "grad_norm": 17.889524459838867, | |
| "learning_rate": 5.004771747042631e-07, | |
| "loss": 0.7983870506286621, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 2.239057239057239, | |
| "grad_norm": 6.143094539642334, | |
| "learning_rate": 4.992322500048673e-07, | |
| "loss": 0.6713172197341919, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.240740740740741, | |
| "grad_norm": 3.4442899227142334, | |
| "learning_rate": 4.979893952031483e-07, | |
| "loss": 0.7296475768089294, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 2.242424242424242, | |
| "grad_norm": 16.668384552001953, | |
| "learning_rate": 4.96748614581264e-07, | |
| "loss": 0.3102848529815674, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 2.244107744107744, | |
| "grad_norm": 2.3950233459472656, | |
| "learning_rate": 4.955099124142251e-07, | |
| "loss": 0.712740421295166, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 2.2457912457912457, | |
| "grad_norm": 4.428253650665283, | |
| "learning_rate": 4.942732929698827e-07, | |
| "loss": 0.5821852684020996, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 2.2474747474747474, | |
| "grad_norm": 8.776701927185059, | |
| "learning_rate": 4.930387605089104e-07, | |
| "loss": 0.4474225640296936, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.249158249158249, | |
| "grad_norm": 3.6381278038024902, | |
| "learning_rate": 4.918063192847921e-07, | |
| "loss": 0.33651861548423767, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 2.250841750841751, | |
| "grad_norm": 4.837399482727051, | |
| "learning_rate": 4.905759735438068e-07, | |
| "loss": 0.5961496829986572, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 2.2525252525252526, | |
| "grad_norm": 2.985142946243286, | |
| "learning_rate": 4.893477275250127e-07, | |
| "loss": 0.6518359184265137, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 2.2542087542087543, | |
| "grad_norm": 7.1583943367004395, | |
| "learning_rate": 4.881215854602342e-07, | |
| "loss": 0.4896303117275238, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 2.255892255892256, | |
| "grad_norm": 1.9810396432876587, | |
| "learning_rate": 4.868975515740471e-07, | |
| "loss": 0.8590680956840515, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.257575757575758, | |
| "grad_norm": 7.562203884124756, | |
| "learning_rate": 4.856756300837625e-07, | |
| "loss": 0.18953704833984375, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 2.259259259259259, | |
| "grad_norm": 7.8364481925964355, | |
| "learning_rate": 4.844558251994146e-07, | |
| "loss": 0.12749773263931274, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 2.260942760942761, | |
| "grad_norm": 3.5520970821380615, | |
| "learning_rate": 4.832381411237444e-07, | |
| "loss": 0.6111665964126587, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 2.2626262626262625, | |
| "grad_norm": 4.207799911499023, | |
| "learning_rate": 4.820225820521855e-07, | |
| "loss": 0.36922651529312134, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 2.2643097643097643, | |
| "grad_norm": 1.94363534450531, | |
| "learning_rate": 4.808091521728506e-07, | |
| "loss": 0.9025669097900391, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.265993265993266, | |
| "grad_norm": 14.200057029724121, | |
| "learning_rate": 4.795978556665165e-07, | |
| "loss": 0.8429475426673889, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 2.2676767676767677, | |
| "grad_norm": 3.5672523975372314, | |
| "learning_rate": 4.783886967066088e-07, | |
| "loss": 0.6566574573516846, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 2.2693602693602695, | |
| "grad_norm": 4.338009357452393, | |
| "learning_rate": 4.77181679459189e-07, | |
| "loss": 0.5327779054641724, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 2.271043771043771, | |
| "grad_norm": 2.5908162593841553, | |
| "learning_rate": 4.759768080829399e-07, | |
| "loss": 0.624381959438324, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 2.2727272727272725, | |
| "grad_norm": 6.710553169250488, | |
| "learning_rate": 4.747740867291497e-07, | |
| "loss": 0.7681624889373779, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.274410774410774, | |
| "grad_norm": 2.840843915939331, | |
| "learning_rate": 4.7357351954169973e-07, | |
| "loss": 0.49092429876327515, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 2.276094276094276, | |
| "grad_norm": 2.1035234928131104, | |
| "learning_rate": 4.7237511065704933e-07, | |
| "loss": 0.8667645454406738, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 2.2777777777777777, | |
| "grad_norm": 3.245436429977417, | |
| "learning_rate": 4.7117886420422094e-07, | |
| "loss": 0.9094717502593994, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 2.2794612794612794, | |
| "grad_norm": 2.4817285537719727, | |
| "learning_rate": 4.6998478430478714e-07, | |
| "loss": 0.351574569940567, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 2.281144781144781, | |
| "grad_norm": 5.749747276306152, | |
| "learning_rate": 4.6879287507285596e-07, | |
| "loss": 0.5877597332000732, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.282828282828283, | |
| "grad_norm": 9.687824249267578, | |
| "learning_rate": 4.676031406150555e-07, | |
| "loss": 0.5526677370071411, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 2.2845117845117846, | |
| "grad_norm": 3.64471435546875, | |
| "learning_rate": 4.66415585030522e-07, | |
| "loss": 0.4332752227783203, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 2.2861952861952863, | |
| "grad_norm": 7.181333065032959, | |
| "learning_rate": 4.6523021241088416e-07, | |
| "loss": 0.7148293256759644, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 2.287878787878788, | |
| "grad_norm": 4.991126537322998, | |
| "learning_rate": 4.6404702684024905e-07, | |
| "loss": 0.5515605807304382, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 2.28956228956229, | |
| "grad_norm": 10.846860885620117, | |
| "learning_rate": 4.628660323951891e-07, | |
| "loss": 0.5390480160713196, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.291245791245791, | |
| "grad_norm": 3.9083449840545654, | |
| "learning_rate": 4.616872331447272e-07, | |
| "loss": 0.63498854637146, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 2.292929292929293, | |
| "grad_norm": 6.314955234527588, | |
| "learning_rate": 4.605106331503223e-07, | |
| "loss": 0.6880998611450195, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 2.2946127946127945, | |
| "grad_norm": 3.322652816772461, | |
| "learning_rate": 4.5933623646585683e-07, | |
| "loss": 0.6316101551055908, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 2.2962962962962963, | |
| "grad_norm": 5.35445499420166, | |
| "learning_rate": 4.581640471376215e-07, | |
| "loss": 0.5416774749755859, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 2.297979797979798, | |
| "grad_norm": 6.625260353088379, | |
| "learning_rate": 4.5699406920430155e-07, | |
| "loss": 0.972043514251709, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.2996632996632997, | |
| "grad_norm": 3.9685635566711426, | |
| "learning_rate": 4.5582630669696324e-07, | |
| "loss": 0.5268035531044006, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 2.3013468013468015, | |
| "grad_norm": 9.009088516235352, | |
| "learning_rate": 4.5466076363904e-07, | |
| "loss": 0.4689450263977051, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 2.303030303030303, | |
| "grad_norm": 6.697409629821777, | |
| "learning_rate": 4.5349744404631785e-07, | |
| "loss": 0.43555888533592224, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 2.3047138047138045, | |
| "grad_norm": 9.158797264099121, | |
| "learning_rate": 4.5233635192692206e-07, | |
| "loss": 0.5540938377380371, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 2.3063973063973062, | |
| "grad_norm": 18.85773468017578, | |
| "learning_rate": 4.511774912813043e-07, | |
| "loss": 0.4014560580253601, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.308080808080808, | |
| "grad_norm": 1.82210111618042, | |
| "learning_rate": 4.5002086610222626e-07, | |
| "loss": 0.7727656364440918, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 2.3097643097643097, | |
| "grad_norm": 3.7924273014068604, | |
| "learning_rate": 4.488664803747487e-07, | |
| "loss": 0.7189053297042847, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 2.3114478114478114, | |
| "grad_norm": 3.0608716011047363, | |
| "learning_rate": 4.4771433807621644e-07, | |
| "loss": 0.7668474912643433, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 2.313131313131313, | |
| "grad_norm": 5.792914867401123, | |
| "learning_rate": 4.4656444317624397e-07, | |
| "loss": 0.6078014373779297, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 2.314814814814815, | |
| "grad_norm": 1.747604250907898, | |
| "learning_rate": 4.454167996367032e-07, | |
| "loss": 0.10793264210224152, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.3164983164983166, | |
| "grad_norm": 4.28343391418457, | |
| "learning_rate": 4.442714114117092e-07, | |
| "loss": 0.33263859152793884, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 2.3181818181818183, | |
| "grad_norm": 2.2499372959136963, | |
| "learning_rate": 4.4312828244760613e-07, | |
| "loss": 0.39961159229278564, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 2.31986531986532, | |
| "grad_norm": 3.355552911758423, | |
| "learning_rate": 4.4198741668295425e-07, | |
| "loss": 0.8770014047622681, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 2.3215488215488214, | |
| "grad_norm": 2.2010586261749268, | |
| "learning_rate": 4.4084881804851644e-07, | |
| "loss": 0.5539072751998901, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 2.323232323232323, | |
| "grad_norm": 4.903811931610107, | |
| "learning_rate": 4.397124904672437e-07, | |
| "loss": 0.6975724697113037, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.324915824915825, | |
| "grad_norm": 5.035953044891357, | |
| "learning_rate": 4.3857843785426263e-07, | |
| "loss": 0.5050334334373474, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 2.3265993265993266, | |
| "grad_norm": 3.3227932453155518, | |
| "learning_rate": 4.374466641168622e-07, | |
| "loss": 0.8777497410774231, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 2.3282828282828283, | |
| "grad_norm": 4.905037879943848, | |
| "learning_rate": 4.363171731544786e-07, | |
| "loss": 0.7257252931594849, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 2.32996632996633, | |
| "grad_norm": 2.3318030834198, | |
| "learning_rate": 4.351899688586834e-07, | |
| "loss": 0.5315639972686768, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 2.3316498316498318, | |
| "grad_norm": 12.677505493164062, | |
| "learning_rate": 4.3406505511317025e-07, | |
| "loss": 0.6226543188095093, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 3.6738951206207275, | |
| "learning_rate": 4.329424357937397e-07, | |
| "loss": 0.5986767411231995, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 2.3350168350168348, | |
| "grad_norm": 3.570671558380127, | |
| "learning_rate": 4.318221147682879e-07, | |
| "loss": 0.693830132484436, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 2.3367003367003365, | |
| "grad_norm": 3.0889062881469727, | |
| "learning_rate": 4.307040958967924e-07, | |
| "loss": 0.6411426663398743, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 2.3383838383838382, | |
| "grad_norm": 4.422166347503662, | |
| "learning_rate": 4.2958838303129817e-07, | |
| "loss": 0.45083481073379517, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 2.34006734006734, | |
| "grad_norm": 29.303316116333008, | |
| "learning_rate": 4.2847498001590573e-07, | |
| "loss": 0.6881177425384521, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.3417508417508417, | |
| "grad_norm": 2.217395544052124, | |
| "learning_rate": 4.273638906867573e-07, | |
| "loss": 0.5657017230987549, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 2.3434343434343434, | |
| "grad_norm": 10.195280075073242, | |
| "learning_rate": 4.2625511887202225e-07, | |
| "loss": 0.7839221954345703, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 2.345117845117845, | |
| "grad_norm": 2.6481029987335205, | |
| "learning_rate": 4.2514866839188657e-07, | |
| "loss": 0.5463940501213074, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 2.346801346801347, | |
| "grad_norm": 2.2342593669891357, | |
| "learning_rate": 4.2404454305853796e-07, | |
| "loss": 0.8763151168823242, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 2.3484848484848486, | |
| "grad_norm": 4.609320640563965, | |
| "learning_rate": 4.229427466761522e-07, | |
| "loss": 0.7232416868209839, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.3501683501683504, | |
| "grad_norm": 6.990656852722168, | |
| "learning_rate": 4.2184328304088164e-07, | |
| "loss": 0.5656273365020752, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 2.351851851851852, | |
| "grad_norm": 10.642841339111328, | |
| "learning_rate": 4.2074615594084146e-07, | |
| "loss": 0.6187400817871094, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 2.3535353535353534, | |
| "grad_norm": 3.1630921363830566, | |
| "learning_rate": 4.1965136915609543e-07, | |
| "loss": 0.9885926246643066, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 2.355218855218855, | |
| "grad_norm": 13.756888389587402, | |
| "learning_rate": 4.1855892645864513e-07, | |
| "loss": 0.45941799879074097, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 2.356902356902357, | |
| "grad_norm": 2.228693962097168, | |
| "learning_rate": 4.1746883161241555e-07, | |
| "loss": 0.9851700067520142, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.3585858585858586, | |
| "grad_norm": 2.863492965698242, | |
| "learning_rate": 4.1638108837324137e-07, | |
| "loss": 0.9169178009033203, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 2.3602693602693603, | |
| "grad_norm": 3.3131117820739746, | |
| "learning_rate": 4.152957004888563e-07, | |
| "loss": 0.7946122884750366, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 2.361952861952862, | |
| "grad_norm": 6.783644676208496, | |
| "learning_rate": 4.142126716988784e-07, | |
| "loss": 0.7735965847969055, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 3.6407532691955566, | |
| "learning_rate": 4.131320057347969e-07, | |
| "loss": 0.802727460861206, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 2.3653198653198655, | |
| "grad_norm": 3.4392080307006836, | |
| "learning_rate": 4.120537063199612e-07, | |
| "loss": 1.0042896270751953, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.3670033670033668, | |
| "grad_norm": 15.25992202758789, | |
| "learning_rate": 4.109777771695663e-07, | |
| "loss": 0.7024844288825989, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 2.3686868686868685, | |
| "grad_norm": 2.76926589012146, | |
| "learning_rate": 4.0990422199064103e-07, | |
| "loss": 0.6036837100982666, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 2.3703703703703702, | |
| "grad_norm": 4.845790386199951, | |
| "learning_rate": 4.0883304448203477e-07, | |
| "loss": 0.484286904335022, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 2.372053872053872, | |
| "grad_norm": 3.267883777618408, | |
| "learning_rate": 4.077642483344044e-07, | |
| "loss": 0.5557587146759033, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 2.3737373737373737, | |
| "grad_norm": 5.12905216217041, | |
| "learning_rate": 4.066978372302025e-07, | |
| "loss": 0.6941782236099243, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.3754208754208754, | |
| "grad_norm": 3.630934953689575, | |
| "learning_rate": 4.056338148436643e-07, | |
| "loss": 0.4251060485839844, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 2.377104377104377, | |
| "grad_norm": 5.501477241516113, | |
| "learning_rate": 4.0457218484079414e-07, | |
| "loss": 0.9760651588439941, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 2.378787878787879, | |
| "grad_norm": 3.194762945175171, | |
| "learning_rate": 4.035129508793542e-07, | |
| "loss": 0.8394796848297119, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 2.3804713804713806, | |
| "grad_norm": 689.3011474609375, | |
| "learning_rate": 4.024561166088516e-07, | |
| "loss": 0.4385402798652649, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 2.3821548821548824, | |
| "grad_norm": 8.300933837890625, | |
| "learning_rate": 4.0140168567052447e-07, | |
| "loss": 0.932929277420044, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.3838383838383836, | |
| "grad_norm": 20.601125717163086, | |
| "learning_rate": 4.003496616973312e-07, | |
| "loss": 0.6770232915878296, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 2.3855218855218854, | |
| "grad_norm": 7.719077110290527, | |
| "learning_rate": 3.9930004831393757e-07, | |
| "loss": 0.5193581581115723, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 2.387205387205387, | |
| "grad_norm": 3.433854341506958, | |
| "learning_rate": 3.982528491367025e-07, | |
| "loss": 0.5733506679534912, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 2.388888888888889, | |
| "grad_norm": 5.136038780212402, | |
| "learning_rate": 3.9720806777366817e-07, | |
| "loss": 0.47218313813209534, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 2.3905723905723906, | |
| "grad_norm": 1.433040976524353, | |
| "learning_rate": 3.961657078245462e-07, | |
| "loss": 0.8041648864746094, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.3922558922558923, | |
| "grad_norm": 0.9403243660926819, | |
| "learning_rate": 3.9512577288070487e-07, | |
| "loss": 0.3452025055885315, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 2.393939393939394, | |
| "grad_norm": 2.0302951335906982, | |
| "learning_rate": 3.940882665251576e-07, | |
| "loss": 0.9638313055038452, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 2.3956228956228958, | |
| "grad_norm": 2.591130495071411, | |
| "learning_rate": 3.930531923325506e-07, | |
| "loss": 0.7442007064819336, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 2.3973063973063975, | |
| "grad_norm": 4.4280548095703125, | |
| "learning_rate": 3.920205538691497e-07, | |
| "loss": 0.953087329864502, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 2.398989898989899, | |
| "grad_norm": 2.4256279468536377, | |
| "learning_rate": 3.9099035469282906e-07, | |
| "loss": 0.7336077094078064, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.4006734006734005, | |
| "grad_norm": 8.586638450622559, | |
| "learning_rate": 3.8996259835305835e-07, | |
| "loss": 0.390910804271698, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 2.4023569023569022, | |
| "grad_norm": 32.83812713623047, | |
| "learning_rate": 3.8893728839089035e-07, | |
| "loss": 0.609326958656311, | |
| "step": 2854 | |
| }, | |
| { | |
| "epoch": 2.404040404040404, | |
| "grad_norm": 4.8817458152771, | |
| "learning_rate": 3.879144283389495e-07, | |
| "loss": 0.5054650902748108, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 2.4057239057239057, | |
| "grad_norm": 6.203306198120117, | |
| "learning_rate": 3.8689402172141915e-07, | |
| "loss": 0.6514500975608826, | |
| "step": 2858 | |
| }, | |
| { | |
| "epoch": 2.4074074074074074, | |
| "grad_norm": 5.882429122924805, | |
| "learning_rate": 3.8587607205402916e-07, | |
| "loss": 0.41622331738471985, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.409090909090909, | |
| "grad_norm": 2.390727996826172, | |
| "learning_rate": 3.848605828440444e-07, | |
| "loss": 0.7136590480804443, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 2.410774410774411, | |
| "grad_norm": 6.754751682281494, | |
| "learning_rate": 3.8384755759025313e-07, | |
| "loss": 0.4541894793510437, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 2.4124579124579126, | |
| "grad_norm": 3.0260815620422363, | |
| "learning_rate": 3.828369997829528e-07, | |
| "loss": 0.6994350552558899, | |
| "step": 2866 | |
| }, | |
| { | |
| "epoch": 2.4141414141414144, | |
| "grad_norm": 2.372957706451416, | |
| "learning_rate": 3.818289129039405e-07, | |
| "loss": 0.8106458187103271, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 2.4158249158249157, | |
| "grad_norm": 2.801581621170044, | |
| "learning_rate": 3.808233004264997e-07, | |
| "loss": 0.5665256977081299, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.4175084175084174, | |
| "grad_norm": 3.397507905960083, | |
| "learning_rate": 3.79820165815389e-07, | |
| "loss": 0.44936102628707886, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 2.419191919191919, | |
| "grad_norm": 2.3020706176757812, | |
| "learning_rate": 3.788195125268284e-07, | |
| "loss": 0.8391485214233398, | |
| "step": 2874 | |
| }, | |
| { | |
| "epoch": 2.420875420875421, | |
| "grad_norm": 3.2758114337921143, | |
| "learning_rate": 3.7782134400848995e-07, | |
| "loss": 0.7489950656890869, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 2.4225589225589226, | |
| "grad_norm": 5.947027206420898, | |
| "learning_rate": 3.768256636994843e-07, | |
| "loss": 0.4590849280357361, | |
| "step": 2878 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "grad_norm": 2.203789234161377, | |
| "learning_rate": 3.7583247503034864e-07, | |
| "loss": 0.7745201587677002, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.425925925925926, | |
| "grad_norm": 3.3688504695892334, | |
| "learning_rate": 3.7484178142303625e-07, | |
| "loss": 0.5334046483039856, | |
| "step": 2882 | |
| }, | |
| { | |
| "epoch": 2.4276094276094278, | |
| "grad_norm": 6.785653114318848, | |
| "learning_rate": 3.738535862909031e-07, | |
| "loss": 0.5028021335601807, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 2.429292929292929, | |
| "grad_norm": 2.8243677616119385, | |
| "learning_rate": 3.7286789303869735e-07, | |
| "loss": 0.5118685960769653, | |
| "step": 2886 | |
| }, | |
| { | |
| "epoch": 2.430976430976431, | |
| "grad_norm": 9.112323760986328, | |
| "learning_rate": 3.7188470506254744e-07, | |
| "loss": 0.5720535516738892, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 2.4326599326599325, | |
| "grad_norm": 2.4455068111419678, | |
| "learning_rate": 3.7090402574994885e-07, | |
| "loss": 0.5391176342964172, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.4343434343434343, | |
| "grad_norm": 5.355926990509033, | |
| "learning_rate": 3.699258584797548e-07, | |
| "loss": 0.6294881105422974, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 2.436026936026936, | |
| "grad_norm": 2.457951545715332, | |
| "learning_rate": 3.6895020662216326e-07, | |
| "loss": 0.9022385478019714, | |
| "step": 2894 | |
| }, | |
| { | |
| "epoch": 2.4377104377104377, | |
| "grad_norm": 7.03529167175293, | |
| "learning_rate": 3.679770735387052e-07, | |
| "loss": 0.720146656036377, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 2.4393939393939394, | |
| "grad_norm": 10.114142417907715, | |
| "learning_rate": 3.6700646258223343e-07, | |
| "loss": 0.6195645332336426, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 2.441077441077441, | |
| "grad_norm": 5.667145729064941, | |
| "learning_rate": 3.6603837709691153e-07, | |
| "loss": 0.43182432651519775, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.442760942760943, | |
| "grad_norm": 13.144913673400879, | |
| "learning_rate": 3.6507282041820085e-07, | |
| "loss": 0.7789742350578308, | |
| "step": 2902 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 9.248213768005371, | |
| "learning_rate": 3.641097958728506e-07, | |
| "loss": 0.48242291808128357, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 2.4461279461279464, | |
| "grad_norm": 2.1247684955596924, | |
| "learning_rate": 3.631493067788858e-07, | |
| "loss": 0.3829724192619324, | |
| "step": 2906 | |
| }, | |
| { | |
| "epoch": 2.4478114478114477, | |
| "grad_norm": 5.711479663848877, | |
| "learning_rate": 3.6219135644559506e-07, | |
| "loss": 0.5261117815971375, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 2.4494949494949494, | |
| "grad_norm": 9.852108001708984, | |
| "learning_rate": 3.6123594817352046e-07, | |
| "loss": 0.6702965497970581, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.451178451178451, | |
| "grad_norm": 6.790271282196045, | |
| "learning_rate": 3.602830852544458e-07, | |
| "loss": 0.4730827212333679, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 2.452861952861953, | |
| "grad_norm": 8.912752151489258, | |
| "learning_rate": 3.593327709713844e-07, | |
| "loss": 0.7823283076286316, | |
| "step": 2914 | |
| }, | |
| { | |
| "epoch": 2.4545454545454546, | |
| "grad_norm": 4.171782970428467, | |
| "learning_rate": 3.5838500859856893e-07, | |
| "loss": 0.6686667203903198, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 2.4562289562289563, | |
| "grad_norm": 2.5204222202301025, | |
| "learning_rate": 3.5743980140143975e-07, | |
| "loss": 0.3113139867782593, | |
| "step": 2918 | |
| }, | |
| { | |
| "epoch": 2.457912457912458, | |
| "grad_norm": 3.9417402744293213, | |
| "learning_rate": 3.5649715263663297e-07, | |
| "loss": 0.7965060472488403, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.45959595959596, | |
| "grad_norm": 98.92294311523438, | |
| "learning_rate": 3.5555706555197043e-07, | |
| "loss": 0.43743637204170227, | |
| "step": 2922 | |
| }, | |
| { | |
| "epoch": 2.461279461279461, | |
| "grad_norm": 3.686532974243164, | |
| "learning_rate": 3.5461954338644795e-07, | |
| "loss": 0.30664563179016113, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 2.462962962962963, | |
| "grad_norm": 2.410140037536621, | |
| "learning_rate": 3.536845893702234e-07, | |
| "loss": 0.5530849695205688, | |
| "step": 2926 | |
| }, | |
| { | |
| "epoch": 2.4646464646464645, | |
| "grad_norm": 24.317949295043945, | |
| "learning_rate": 3.527522067246068e-07, | |
| "loss": 0.5903668403625488, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 2.4663299663299663, | |
| "grad_norm": 3.0360710620880127, | |
| "learning_rate": 3.518223986620491e-07, | |
| "loss": 0.24971121549606323, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.468013468013468, | |
| "grad_norm": 5.305819511413574, | |
| "learning_rate": 3.5089516838612986e-07, | |
| "loss": 0.654639482498169, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 2.4696969696969697, | |
| "grad_norm": 6.428488254547119, | |
| "learning_rate": 3.499705190915476e-07, | |
| "loss": 0.6544331312179565, | |
| "step": 2934 | |
| }, | |
| { | |
| "epoch": 2.4713804713804715, | |
| "grad_norm": 5.150181293487549, | |
| "learning_rate": 3.4904845396410854e-07, | |
| "loss": 0.4527553915977478, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 2.473063973063973, | |
| "grad_norm": 9.783395767211914, | |
| "learning_rate": 3.4812897618071445e-07, | |
| "loss": 0.5435815453529358, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 2.474747474747475, | |
| "grad_norm": 5.587001800537109, | |
| "learning_rate": 3.472120889093536e-07, | |
| "loss": 0.4773102402687073, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.4764309764309766, | |
| "grad_norm": 4.579451084136963, | |
| "learning_rate": 3.462977953090884e-07, | |
| "loss": 0.40418028831481934, | |
| "step": 2942 | |
| }, | |
| { | |
| "epoch": 2.478114478114478, | |
| "grad_norm": 8.405234336853027, | |
| "learning_rate": 3.453860985300446e-07, | |
| "loss": 0.43912988901138306, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 2.4797979797979797, | |
| "grad_norm": 2.54058837890625, | |
| "learning_rate": 3.4447700171340164e-07, | |
| "loss": 0.9208707213401794, | |
| "step": 2946 | |
| }, | |
| { | |
| "epoch": 2.4814814814814814, | |
| "grad_norm": 2.506683588027954, | |
| "learning_rate": 3.4357050799138053e-07, | |
| "loss": 0.9445154666900635, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 2.483164983164983, | |
| "grad_norm": 2.4092612266540527, | |
| "learning_rate": 3.4266662048723337e-07, | |
| "loss": 0.9850308895111084, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.484848484848485, | |
| "grad_norm": 10.964947700500488, | |
| "learning_rate": 3.417653423152329e-07, | |
| "loss": 0.8890873193740845, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 2.4865319865319866, | |
| "grad_norm": 3.6544744968414307, | |
| "learning_rate": 3.4086667658066186e-07, | |
| "loss": 0.5936705470085144, | |
| "step": 2954 | |
| }, | |
| { | |
| "epoch": 2.4882154882154883, | |
| "grad_norm": 6.769886016845703, | |
| "learning_rate": 3.3997062637980167e-07, | |
| "loss": 0.8404591083526611, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 2.48989898989899, | |
| "grad_norm": 6.549720764160156, | |
| "learning_rate": 3.390771947999224e-07, | |
| "loss": 0.5225011110305786, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 2.4915824915824913, | |
| "grad_norm": 3.255201816558838, | |
| "learning_rate": 3.381863849192718e-07, | |
| "loss": 0.8342874050140381, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.493265993265993, | |
| "grad_norm": 4.254117488861084, | |
| "learning_rate": 3.3729819980706444e-07, | |
| "loss": 0.5838370323181152, | |
| "step": 2962 | |
| }, | |
| { | |
| "epoch": 2.494949494949495, | |
| "grad_norm": 2.933912992477417, | |
| "learning_rate": 3.364126425234719e-07, | |
| "loss": 0.7112206220626831, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 2.4966329966329965, | |
| "grad_norm": 5.019345760345459, | |
| "learning_rate": 3.3552971611961187e-07, | |
| "loss": 0.5937138199806213, | |
| "step": 2966 | |
| }, | |
| { | |
| "epoch": 2.4983164983164983, | |
| "grad_norm": 3.7426111698150635, | |
| "learning_rate": 3.34649423637537e-07, | |
| "loss": 0.81259685754776, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 14.945383071899414, | |
| "learning_rate": 3.337717681102253e-07, | |
| "loss": 0.8419524431228638, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.5016835016835017, | |
| "grad_norm": 3.5432753562927246, | |
| "learning_rate": 3.328967525615697e-07, | |
| "loss": 0.36146029829978943, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 2.5033670033670035, | |
| "grad_norm": 31.251523971557617, | |
| "learning_rate": 3.3202438000636634e-07, | |
| "loss": 0.5271892547607422, | |
| "step": 2974 | |
| }, | |
| { | |
| "epoch": 2.505050505050505, | |
| "grad_norm": 4.31404447555542, | |
| "learning_rate": 3.311546534503061e-07, | |
| "loss": 0.6813575029373169, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 2.506734006734007, | |
| "grad_norm": 10.586312294006348, | |
| "learning_rate": 3.3028757588996303e-07, | |
| "loss": 0.3660055994987488, | |
| "step": 2978 | |
| }, | |
| { | |
| "epoch": 2.5084175084175087, | |
| "grad_norm": 3.4156813621520996, | |
| "learning_rate": 3.294231503127839e-07, | |
| "loss": 0.7575110197067261, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.51010101010101, | |
| "grad_norm": 8.647886276245117, | |
| "learning_rate": 3.2856137969707847e-07, | |
| "loss": 0.788750171661377, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 2.5117845117845117, | |
| "grad_norm": 3.4446113109588623, | |
| "learning_rate": 3.277022670120095e-07, | |
| "loss": 0.4518158435821533, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 2.5134680134680134, | |
| "grad_norm": 15.611486434936523, | |
| "learning_rate": 3.268458152175813e-07, | |
| "loss": 0.7932558059692383, | |
| "step": 2986 | |
| }, | |
| { | |
| "epoch": 2.515151515151515, | |
| "grad_norm": 27.114980697631836, | |
| "learning_rate": 3.2599202726463084e-07, | |
| "loss": 0.61873459815979, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 2.516835016835017, | |
| "grad_norm": 2.904008626937866, | |
| "learning_rate": 3.2514090609481683e-07, | |
| "loss": 0.10597741603851318, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.5185185185185186, | |
| "grad_norm": 4.048925399780273, | |
| "learning_rate": 3.2429245464060965e-07, | |
| "loss": 0.8708055019378662, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 2.5202020202020203, | |
| "grad_norm": 8.804458618164062, | |
| "learning_rate": 3.234466758252818e-07, | |
| "loss": 0.5630843043327332, | |
| "step": 2994 | |
| }, | |
| { | |
| "epoch": 2.5218855218855216, | |
| "grad_norm": 2.408494234085083, | |
| "learning_rate": 3.2260357256289715e-07, | |
| "loss": 0.6830452084541321, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 2.5235690235690234, | |
| "grad_norm": 4.321279525756836, | |
| "learning_rate": 3.217631477583009e-07, | |
| "loss": 0.5143815875053406, | |
| "step": 2998 | |
| }, | |
| { | |
| "epoch": 2.525252525252525, | |
| "grad_norm": 1.794520378112793, | |
| "learning_rate": 3.2092540430711044e-07, | |
| "loss": 0.5180540084838867, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.526936026936027, | |
| "grad_norm": 3.5048828125, | |
| "learning_rate": 3.200903450957044e-07, | |
| "loss": 0.49375149607658386, | |
| "step": 3002 | |
| }, | |
| { | |
| "epoch": 2.5286195286195285, | |
| "grad_norm": 3.251695156097412, | |
| "learning_rate": 3.192579730012129e-07, | |
| "loss": 0.9845426082611084, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 2.5303030303030303, | |
| "grad_norm": 6.4302263259887695, | |
| "learning_rate": 3.184282908915081e-07, | |
| "loss": 0.7751657962799072, | |
| "step": 3006 | |
| }, | |
| { | |
| "epoch": 2.531986531986532, | |
| "grad_norm": 2.9614450931549072, | |
| "learning_rate": 3.1760130162519427e-07, | |
| "loss": 0.6437252759933472, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 2.5336700336700337, | |
| "grad_norm": 3.641021728515625, | |
| "learning_rate": 3.16777008051597e-07, | |
| "loss": 0.33099907636642456, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.5353535353535355, | |
| "grad_norm": 6.20613431930542, | |
| "learning_rate": 3.159554130107546e-07, | |
| "loss": 0.7693390846252441, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 2.537037037037037, | |
| "grad_norm": 2.9264049530029297, | |
| "learning_rate": 3.1513651933340797e-07, | |
| "loss": 0.6058576107025146, | |
| "step": 3014 | |
| }, | |
| { | |
| "epoch": 2.538720538720539, | |
| "grad_norm": 4.105390548706055, | |
| "learning_rate": 3.143203298409899e-07, | |
| "loss": 0.5138027667999268, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 2.5404040404040407, | |
| "grad_norm": 13.755269050598145, | |
| "learning_rate": 3.1350684734561676e-07, | |
| "loss": 0.8655276298522949, | |
| "step": 3018 | |
| }, | |
| { | |
| "epoch": 2.542087542087542, | |
| "grad_norm": 2.1755192279815674, | |
| "learning_rate": 3.126960746500784e-07, | |
| "loss": 0.7289071083068848, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.5437710437710437, | |
| "grad_norm": 12.643874168395996, | |
| "learning_rate": 3.118880145478274e-07, | |
| "loss": 0.8041051030158997, | |
| "step": 3022 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 3.0522072315216064, | |
| "learning_rate": 3.110826698229711e-07, | |
| "loss": 0.978661835193634, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 2.547138047138047, | |
| "grad_norm": 10.360844612121582, | |
| "learning_rate": 3.102800432502607e-07, | |
| "loss": 0.2467118501663208, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 2.548821548821549, | |
| "grad_norm": 4.895616054534912, | |
| "learning_rate": 3.0948013759508274e-07, | |
| "loss": 0.522205114364624, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 2.5505050505050506, | |
| "grad_norm": 8.892946243286133, | |
| "learning_rate": 3.0868295561344874e-07, | |
| "loss": 0.4860239624977112, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.5521885521885523, | |
| "grad_norm": 2.0342283248901367, | |
| "learning_rate": 3.078885000519858e-07, | |
| "loss": 0.4318680763244629, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 2.5538720538720536, | |
| "grad_norm": 3.473409414291382, | |
| "learning_rate": 3.0709677364792767e-07, | |
| "loss": 0.8540394306182861, | |
| "step": 3034 | |
| }, | |
| { | |
| "epoch": 2.5555555555555554, | |
| "grad_norm": 10.30406665802002, | |
| "learning_rate": 3.0630777912910533e-07, | |
| "loss": 0.9184716939926147, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 2.557239057239057, | |
| "grad_norm": 6.738753795623779, | |
| "learning_rate": 3.0552151921393633e-07, | |
| "loss": 0.6098148822784424, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 2.558922558922559, | |
| "grad_norm": 2.9204185009002686, | |
| "learning_rate": 3.0473799661141707e-07, | |
| "loss": 0.9494307041168213, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.5606060606060606, | |
| "grad_norm": 5.460939407348633, | |
| "learning_rate": 3.0395721402111286e-07, | |
| "loss": 0.6524157524108887, | |
| "step": 3042 | |
| }, | |
| { | |
| "epoch": 2.5622895622895623, | |
| "grad_norm": 4.9505109786987305, | |
| "learning_rate": 3.031791741331478e-07, | |
| "loss": 0.8453473448753357, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 2.563973063973064, | |
| "grad_norm": 12.800024032592773, | |
| "learning_rate": 3.0240387962819695e-07, | |
| "loss": 0.6964143514633179, | |
| "step": 3046 | |
| }, | |
| { | |
| "epoch": 2.5656565656565657, | |
| "grad_norm": 2.980398654937744, | |
| "learning_rate": 3.016313331774762e-07, | |
| "loss": 0.8597656488418579, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 2.5673400673400675, | |
| "grad_norm": 5.009873867034912, | |
| "learning_rate": 3.008615374427329e-07, | |
| "loss": 0.3663683533668518, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.569023569023569, | |
| "grad_norm": 3.2331385612487793, | |
| "learning_rate": 3.000944950762373e-07, | |
| "loss": 0.9516968131065369, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 2.570707070707071, | |
| "grad_norm": 3.4293010234832764, | |
| "learning_rate": 2.993302087207732e-07, | |
| "loss": 0.07853099703788757, | |
| "step": 3054 | |
| }, | |
| { | |
| "epoch": 2.5723905723905722, | |
| "grad_norm": 7.385575771331787, | |
| "learning_rate": 2.985686810096285e-07, | |
| "loss": 0.5600473284721375, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 2.574074074074074, | |
| "grad_norm": 6.306962490081787, | |
| "learning_rate": 2.978099145665867e-07, | |
| "loss": 0.3351885974407196, | |
| "step": 3058 | |
| }, | |
| { | |
| "epoch": 2.5757575757575757, | |
| "grad_norm": 4.720430850982666, | |
| "learning_rate": 2.970539120059174e-07, | |
| "loss": 0.6371778249740601, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.5774410774410774, | |
| "grad_norm": 6.102284908294678, | |
| "learning_rate": 2.963006759323676e-07, | |
| "loss": 0.5941987037658691, | |
| "step": 3062 | |
| }, | |
| { | |
| "epoch": 2.579124579124579, | |
| "grad_norm": 5.050604820251465, | |
| "learning_rate": 2.955502089411523e-07, | |
| "loss": 0.424297571182251, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 2.580808080808081, | |
| "grad_norm": 5.28799295425415, | |
| "learning_rate": 2.9480251361794656e-07, | |
| "loss": 0.5996015667915344, | |
| "step": 3066 | |
| }, | |
| { | |
| "epoch": 2.5824915824915826, | |
| "grad_norm": 9.331116676330566, | |
| "learning_rate": 2.940575925388746e-07, | |
| "loss": 0.3746086657047272, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 2.584175084175084, | |
| "grad_norm": 13.008201599121094, | |
| "learning_rate": 2.933154482705035e-07, | |
| "loss": 0.17353637516498566, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.5858585858585856, | |
| "grad_norm": 5.598928928375244, | |
| "learning_rate": 2.925760833698327e-07, | |
| "loss": 0.43435174226760864, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 2.5875420875420874, | |
| "grad_norm": 4.106137752532959, | |
| "learning_rate": 2.9183950038428475e-07, | |
| "loss": 0.8951042890548706, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 2.589225589225589, | |
| "grad_norm": 7.533908843994141, | |
| "learning_rate": 2.9110570185169834e-07, | |
| "loss": 0.35531511902809143, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 2.590909090909091, | |
| "grad_norm": 2.466156482696533, | |
| "learning_rate": 2.903746903003184e-07, | |
| "loss": 0.8299113512039185, | |
| "step": 3078 | |
| }, | |
| { | |
| "epoch": 2.5925925925925926, | |
| "grad_norm": 4.047122478485107, | |
| "learning_rate": 2.896464682487866e-07, | |
| "loss": 0.6478674411773682, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.5942760942760943, | |
| "grad_norm": 2.4090776443481445, | |
| "learning_rate": 2.8892103820613487e-07, | |
| "loss": 0.9649114012718201, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 2.595959595959596, | |
| "grad_norm": 3.08392071723938, | |
| "learning_rate": 2.88198402671775e-07, | |
| "loss": 0.5619069337844849, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 2.5976430976430978, | |
| "grad_norm": 3.889181137084961, | |
| "learning_rate": 2.874785641354901e-07, | |
| "loss": 0.5941061973571777, | |
| "step": 3086 | |
| }, | |
| { | |
| "epoch": 2.5993265993265995, | |
| "grad_norm": 4.151243209838867, | |
| "learning_rate": 2.867615250774269e-07, | |
| "loss": 0.7975903153419495, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 2.601010101010101, | |
| "grad_norm": 6.307215690612793, | |
| "learning_rate": 2.860472879680869e-07, | |
| "loss": 0.8723431825637817, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.602693602693603, | |
| "grad_norm": 4.979188442230225, | |
| "learning_rate": 2.8533585526831726e-07, | |
| "loss": 0.6906735897064209, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 2.6043771043771042, | |
| "grad_norm": 5.310150623321533, | |
| "learning_rate": 2.8462722942930286e-07, | |
| "loss": 0.5048916339874268, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 2.606060606060606, | |
| "grad_norm": 5.775015830993652, | |
| "learning_rate": 2.8392141289255806e-07, | |
| "loss": 0.660202145576477, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 2.6077441077441077, | |
| "grad_norm": 12.841134071350098, | |
| "learning_rate": 2.8321840808991775e-07, | |
| "loss": 0.5634772777557373, | |
| "step": 3098 | |
| }, | |
| { | |
| "epoch": 2.6094276094276094, | |
| "grad_norm": 6.739739418029785, | |
| "learning_rate": 2.8251821744352933e-07, | |
| "loss": 0.5956814289093018, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.611111111111111, | |
| "grad_norm": 2.563978433609009, | |
| "learning_rate": 2.8182084336584423e-07, | |
| "loss": 0.5830974578857422, | |
| "step": 3102 | |
| }, | |
| { | |
| "epoch": 2.612794612794613, | |
| "grad_norm": 4.95272970199585, | |
| "learning_rate": 2.8112628825960926e-07, | |
| "loss": 0.8090439438819885, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 2.6144781144781146, | |
| "grad_norm": 3.6197354793548584, | |
| "learning_rate": 2.804345545178594e-07, | |
| "loss": 0.7719713449478149, | |
| "step": 3106 | |
| }, | |
| { | |
| "epoch": 2.616161616161616, | |
| "grad_norm": 12.875308990478516, | |
| "learning_rate": 2.7974564452390833e-07, | |
| "loss": 0.18324008584022522, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 2.6178451178451176, | |
| "grad_norm": 3.717010498046875, | |
| "learning_rate": 2.790595606513406e-07, | |
| "loss": 0.7723451852798462, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.6195286195286194, | |
| "grad_norm": 2.814573287963867, | |
| "learning_rate": 2.78376305264004e-07, | |
| "loss": 0.39754652976989746, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 2.621212121212121, | |
| "grad_norm": 3.2848994731903076, | |
| "learning_rate": 2.776958807160011e-07, | |
| "loss": 0.4727073609828949, | |
| "step": 3114 | |
| }, | |
| { | |
| "epoch": 2.622895622895623, | |
| "grad_norm": 3.7905068397521973, | |
| "learning_rate": 2.7701828935168026e-07, | |
| "loss": 0.8447589874267578, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 2.6245791245791246, | |
| "grad_norm": 2.8799266815185547, | |
| "learning_rate": 2.763435335056291e-07, | |
| "loss": 1.0325953960418701, | |
| "step": 3118 | |
| }, | |
| { | |
| "epoch": 2.6262626262626263, | |
| "grad_norm": 3.1782491207122803, | |
| "learning_rate": 2.756716155026656e-07, | |
| "loss": 0.5554063320159912, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.627946127946128, | |
| "grad_norm": 2.897000551223755, | |
| "learning_rate": 2.750025376578295e-07, | |
| "loss": 0.9207072854042053, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 2.6296296296296298, | |
| "grad_norm": 2.4364206790924072, | |
| "learning_rate": 2.743363022763758e-07, | |
| "loss": 0.8367090225219727, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 2.6313131313131315, | |
| "grad_norm": 4.580779075622559, | |
| "learning_rate": 2.7367291165376593e-07, | |
| "loss": 0.6048181056976318, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 2.6329966329966332, | |
| "grad_norm": 6.332035064697266, | |
| "learning_rate": 2.7301236807565925e-07, | |
| "loss": 0.808570384979248, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 2.634680134680135, | |
| "grad_norm": 7.100130081176758, | |
| "learning_rate": 2.7235467381790654e-07, | |
| "loss": 0.49354591965675354, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.6363636363636362, | |
| "grad_norm": 2.4457104206085205, | |
| "learning_rate": 2.716998311465415e-07, | |
| "loss": 0.2983268201351166, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 2.638047138047138, | |
| "grad_norm": 4.332514762878418, | |
| "learning_rate": 2.710478423177722e-07, | |
| "loss": 0.8370668888092041, | |
| "step": 3134 | |
| }, | |
| { | |
| "epoch": 2.6397306397306397, | |
| "grad_norm": 4.5044684410095215, | |
| "learning_rate": 2.7039870957797464e-07, | |
| "loss": 0.7652538418769836, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 2.6414141414141414, | |
| "grad_norm": 4.017055511474609, | |
| "learning_rate": 2.697524351636844e-07, | |
| "loss": 0.4114927649497986, | |
| "step": 3138 | |
| }, | |
| { | |
| "epoch": 2.643097643097643, | |
| "grad_norm": 3.3894689083099365, | |
| "learning_rate": 2.691090213015886e-07, | |
| "loss": 0.8686310052871704, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.644781144781145, | |
| "grad_norm": 2.7027831077575684, | |
| "learning_rate": 2.6846847020851884e-07, | |
| "loss": 0.5540004372596741, | |
| "step": 3142 | |
| }, | |
| { | |
| "epoch": 2.6464646464646466, | |
| "grad_norm": 3.608794927597046, | |
| "learning_rate": 2.678307840914431e-07, | |
| "loss": 0.8333272933959961, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 2.648148148148148, | |
| "grad_norm": 3.318763494491577, | |
| "learning_rate": 2.6719596514745826e-07, | |
| "loss": 0.9629621505737305, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 2.6498316498316496, | |
| "grad_norm": 3.6985297203063965, | |
| "learning_rate": 2.665640155637828e-07, | |
| "loss": 0.5129526853561401, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 2.6515151515151514, | |
| "grad_norm": 2.535443067550659, | |
| "learning_rate": 2.659349375177489e-07, | |
| "loss": 0.8636926412582397, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.653198653198653, | |
| "grad_norm": 2.768599510192871, | |
| "learning_rate": 2.6530873317679515e-07, | |
| "loss": 0.20498168468475342, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 2.654882154882155, | |
| "grad_norm": 34.86625671386719, | |
| "learning_rate": 2.6468540469845895e-07, | |
| "loss": 0.9441362619400024, | |
| "step": 3154 | |
| }, | |
| { | |
| "epoch": 2.6565656565656566, | |
| "grad_norm": 9.064558982849121, | |
| "learning_rate": 2.640649542303693e-07, | |
| "loss": 0.5518494844436646, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 2.6582491582491583, | |
| "grad_norm": 3.18203067779541, | |
| "learning_rate": 2.634473839102389e-07, | |
| "loss": 0.35931962728500366, | |
| "step": 3158 | |
| }, | |
| { | |
| "epoch": 2.65993265993266, | |
| "grad_norm": 7.7922282218933105, | |
| "learning_rate": 2.6283269586585737e-07, | |
| "loss": 0.44168537855148315, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.6616161616161618, | |
| "grad_norm": 4.682225227355957, | |
| "learning_rate": 2.6222089221508404e-07, | |
| "loss": 0.6104831695556641, | |
| "step": 3162 | |
| }, | |
| { | |
| "epoch": 2.6632996632996635, | |
| "grad_norm": 2.9735536575317383, | |
| "learning_rate": 2.6161197506583944e-07, | |
| "loss": 0.8378016352653503, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 2.6649831649831652, | |
| "grad_norm": 6.616426467895508, | |
| "learning_rate": 2.610059465160995e-07, | |
| "loss": 0.6439419984817505, | |
| "step": 3166 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 3.3657751083374023, | |
| "learning_rate": 2.6040280865388773e-07, | |
| "loss": 0.7727220058441162, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 2.6683501683501682, | |
| "grad_norm": 3.285837173461914, | |
| "learning_rate": 2.5980256355726744e-07, | |
| "loss": 0.6320611834526062, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.67003367003367, | |
| "grad_norm": 4.853776931762695, | |
| "learning_rate": 2.5920521329433606e-07, | |
| "loss": 1.043792963027954, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 2.6717171717171717, | |
| "grad_norm": 2.360769271850586, | |
| "learning_rate": 2.586107599232164e-07, | |
| "loss": 0.9384379386901855, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 2.6734006734006734, | |
| "grad_norm": 14.25788402557373, | |
| "learning_rate": 2.5801920549205023e-07, | |
| "loss": 0.4818713068962097, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 2.675084175084175, | |
| "grad_norm": 2.0616092681884766, | |
| "learning_rate": 2.5743055203899167e-07, | |
| "loss": 0.9861509799957275, | |
| "step": 3178 | |
| }, | |
| { | |
| "epoch": 2.676767676767677, | |
| "grad_norm": 4.687266826629639, | |
| "learning_rate": 2.568448015921996e-07, | |
| "loss": 0.6932214498519897, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.678451178451178, | |
| "grad_norm": 2.3194851875305176, | |
| "learning_rate": 2.562619561698306e-07, | |
| "loss": 0.7709292769432068, | |
| "step": 3182 | |
| }, | |
| { | |
| "epoch": 2.68013468013468, | |
| "grad_norm": 2.256274461746216, | |
| "learning_rate": 2.556820177800324e-07, | |
| "loss": 0.8786018490791321, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 2.6818181818181817, | |
| "grad_norm": 1.7933223247528076, | |
| "learning_rate": 2.551049884209371e-07, | |
| "loss": 0.7843552827835083, | |
| "step": 3186 | |
| }, | |
| { | |
| "epoch": 2.6835016835016834, | |
| "grad_norm": 3.6488430500030518, | |
| "learning_rate": 2.5453087008065307e-07, | |
| "loss": 0.7388215661048889, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 2.685185185185185, | |
| "grad_norm": 1.1536720991134644, | |
| "learning_rate": 2.5395966473725994e-07, | |
| "loss": 0.552982747554779, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.686868686868687, | |
| "grad_norm": 3.049055814743042, | |
| "learning_rate": 2.5339137435880043e-07, | |
| "loss": 0.617717981338501, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 2.6885521885521886, | |
| "grad_norm": 2.4993679523468018, | |
| "learning_rate": 2.5282600090327383e-07, | |
| "loss": 0.7265998125076294, | |
| "step": 3194 | |
| }, | |
| { | |
| "epoch": 2.6902356902356903, | |
| "grad_norm": 12.052529335021973, | |
| "learning_rate": 2.5226354631862966e-07, | |
| "loss": 0.6202006340026855, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 2.691919191919192, | |
| "grad_norm": 2.131632089614868, | |
| "learning_rate": 2.517040125427608e-07, | |
| "loss": 0.741972804069519, | |
| "step": 3198 | |
| }, | |
| { | |
| "epoch": 2.6936026936026938, | |
| "grad_norm": 2.2996838092803955, | |
| "learning_rate": 2.511474015034964e-07, | |
| "loss": 0.8759193420410156, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.6952861952861955, | |
| "grad_norm": 6.061952590942383, | |
| "learning_rate": 2.5059371511859557e-07, | |
| "loss": 0.6976549625396729, | |
| "step": 3202 | |
| }, | |
| { | |
| "epoch": 2.6969696969696972, | |
| "grad_norm": 3.891650915145874, | |
| "learning_rate": 2.50042955295741e-07, | |
| "loss": 0.6694223880767822, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 2.6986531986531985, | |
| "grad_norm": 5.893383026123047, | |
| "learning_rate": 2.494951239325321e-07, | |
| "loss": 0.7830284833908081, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 2.7003367003367003, | |
| "grad_norm": 4.715972423553467, | |
| "learning_rate": 2.489502229164781e-07, | |
| "loss": 0.5429476499557495, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 2.702020202020202, | |
| "grad_norm": 3.343920946121216, | |
| "learning_rate": 2.4840825412499274e-07, | |
| "loss": 0.8423386812210083, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.7037037037037037, | |
| "grad_norm": 2.458588123321533, | |
| "learning_rate": 2.478692194253861e-07, | |
| "loss": 0.4965520203113556, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 2.7053872053872055, | |
| "grad_norm": 2.6822140216827393, | |
| "learning_rate": 2.473331206748597e-07, | |
| "loss": 0.6127833127975464, | |
| "step": 3214 | |
| }, | |
| { | |
| "epoch": 2.707070707070707, | |
| "grad_norm": 3.830547571182251, | |
| "learning_rate": 2.467999597204996e-07, | |
| "loss": 0.2938854694366455, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 2.708754208754209, | |
| "grad_norm": 3.668973684310913, | |
| "learning_rate": 2.462697383992691e-07, | |
| "loss": 0.7545672655105591, | |
| "step": 3218 | |
| }, | |
| { | |
| "epoch": 2.71043771043771, | |
| "grad_norm": 4.789590358734131, | |
| "learning_rate": 2.457424585380041e-07, | |
| "loss": 0.3368055820465088, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.712121212121212, | |
| "grad_norm": 4.607179641723633, | |
| "learning_rate": 2.4521812195340544e-07, | |
| "loss": 0.7228003144264221, | |
| "step": 3222 | |
| }, | |
| { | |
| "epoch": 2.7138047138047137, | |
| "grad_norm": 3.7761380672454834, | |
| "learning_rate": 2.4469673045203333e-07, | |
| "loss": 0.39306753873825073, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 2.7154882154882154, | |
| "grad_norm": 3.8872487545013428, | |
| "learning_rate": 2.441782858303007e-07, | |
| "loss": 0.388794481754303, | |
| "step": 3226 | |
| }, | |
| { | |
| "epoch": 2.717171717171717, | |
| "grad_norm": 3.936227560043335, | |
| "learning_rate": 2.436627898744678e-07, | |
| "loss": 0.7990210056304932, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 2.718855218855219, | |
| "grad_norm": 10.530872344970703, | |
| "learning_rate": 2.4315024436063464e-07, | |
| "loss": 0.3864361643791199, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.7205387205387206, | |
| "grad_norm": 8.344436645507812, | |
| "learning_rate": 2.4264065105473637e-07, | |
| "loss": 0.8147022724151611, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 2.7222222222222223, | |
| "grad_norm": 1.8948400020599365, | |
| "learning_rate": 2.4213401171253656e-07, | |
| "loss": 0.6463346481323242, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 2.723905723905724, | |
| "grad_norm": 2.3045897483825684, | |
| "learning_rate": 2.416303280796206e-07, | |
| "loss": 0.7769128084182739, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 2.725589225589226, | |
| "grad_norm": 10.252862930297852, | |
| "learning_rate": 2.411296018913907e-07, | |
| "loss": 0.7157000303268433, | |
| "step": 3238 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 12.489968299865723, | |
| "learning_rate": 2.406318348730592e-07, | |
| "loss": 0.7306414842605591, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.728956228956229, | |
| "grad_norm": 3.00982666015625, | |
| "learning_rate": 2.401370287396428e-07, | |
| "loss": 0.8304033279418945, | |
| "step": 3242 | |
| }, | |
| { | |
| "epoch": 2.7306397306397305, | |
| "grad_norm": 4.058210849761963, | |
| "learning_rate": 2.396451851959571e-07, | |
| "loss": 0.5530973672866821, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 2.7323232323232323, | |
| "grad_norm": 4.974558353424072, | |
| "learning_rate": 2.391563059366099e-07, | |
| "loss": 0.7806906700134277, | |
| "step": 3246 | |
| }, | |
| { | |
| "epoch": 2.734006734006734, | |
| "grad_norm": 10.766674995422363, | |
| "learning_rate": 2.3867039264599587e-07, | |
| "loss": 0.805009126663208, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 2.7356902356902357, | |
| "grad_norm": 4.717216491699219, | |
| "learning_rate": 2.3818744699829105e-07, | |
| "loss": 0.6719311475753784, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.7373737373737375, | |
| "grad_norm": 4.689093112945557, | |
| "learning_rate": 2.3770747065744594e-07, | |
| "loss": 0.37460649013519287, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 2.739057239057239, | |
| "grad_norm": 3.905974864959717, | |
| "learning_rate": 2.3723046527718137e-07, | |
| "loss": 0.528462290763855, | |
| "step": 3254 | |
| }, | |
| { | |
| "epoch": 2.7407407407407405, | |
| "grad_norm": 3.8697361946105957, | |
| "learning_rate": 2.367564325009815e-07, | |
| "loss": 0.4876176714897156, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 2.742424242424242, | |
| "grad_norm": 2.9344778060913086, | |
| "learning_rate": 2.362853739620885e-07, | |
| "loss": 0.6226130723953247, | |
| "step": 3258 | |
| }, | |
| { | |
| "epoch": 2.744107744107744, | |
| "grad_norm": 4.8839497566223145, | |
| "learning_rate": 2.3581729128349745e-07, | |
| "loss": 0.4137502908706665, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.7457912457912457, | |
| "grad_norm": 2.9513931274414062, | |
| "learning_rate": 2.3535218607795013e-07, | |
| "loss": 0.6418605446815491, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 2.7474747474747474, | |
| "grad_norm": 3.3043465614318848, | |
| "learning_rate": 2.3489005994792948e-07, | |
| "loss": 0.857982337474823, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 2.749158249158249, | |
| "grad_norm": 5.111167907714844, | |
| "learning_rate": 2.3443091448565454e-07, | |
| "loss": 0.958759605884552, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 2.750841750841751, | |
| "grad_norm": 74.0482406616211, | |
| "learning_rate": 2.339747512730749e-07, | |
| "loss": 0.4375573396682739, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 2.7525252525252526, | |
| "grad_norm": 3.2530107498168945, | |
| "learning_rate": 2.3352157188186424e-07, | |
| "loss": 0.9555472135543823, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.7542087542087543, | |
| "grad_norm": 44.573936462402344, | |
| "learning_rate": 2.3307137787341667e-07, | |
| "loss": 0.5092712044715881, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 2.755892255892256, | |
| "grad_norm": 2.355350971221924, | |
| "learning_rate": 2.3262417079883986e-07, | |
| "loss": 0.7026905417442322, | |
| "step": 3274 | |
| }, | |
| { | |
| "epoch": 2.757575757575758, | |
| "grad_norm": 13.51882553100586, | |
| "learning_rate": 2.3217995219895016e-07, | |
| "loss": 0.3385421633720398, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 2.7592592592592595, | |
| "grad_norm": 1.3492799997329712, | |
| "learning_rate": 2.317387236042678e-07, | |
| "loss": 0.03149527311325073, | |
| "step": 3278 | |
| }, | |
| { | |
| "epoch": 2.760942760942761, | |
| "grad_norm": 24.129674911499023, | |
| "learning_rate": 2.313004865350109e-07, | |
| "loss": 1.0571789741516113, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.7626262626262625, | |
| "grad_norm": 2.9202077388763428, | |
| "learning_rate": 2.3086524250109045e-07, | |
| "loss": 1.0254530906677246, | |
| "step": 3282 | |
| }, | |
| { | |
| "epoch": 2.7643097643097643, | |
| "grad_norm": 10.319761276245117, | |
| "learning_rate": 2.3043299300210528e-07, | |
| "loss": 0.2718232274055481, | |
| "step": 3284 | |
| }, | |
| { | |
| "epoch": 2.765993265993266, | |
| "grad_norm": 4.364471435546875, | |
| "learning_rate": 2.30003739527337e-07, | |
| "loss": 0.7651864290237427, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 2.7676767676767677, | |
| "grad_norm": 5.035273551940918, | |
| "learning_rate": 2.2957748355574408e-07, | |
| "loss": 0.7020351886749268, | |
| "step": 3288 | |
| }, | |
| { | |
| "epoch": 2.7693602693602695, | |
| "grad_norm": 11.138975143432617, | |
| "learning_rate": 2.2915422655595795e-07, | |
| "loss": 0.20551855862140656, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.771043771043771, | |
| "grad_norm": 5.818138599395752, | |
| "learning_rate": 2.287339699862771e-07, | |
| "loss": 0.9749652147293091, | |
| "step": 3292 | |
| }, | |
| { | |
| "epoch": 2.7727272727272725, | |
| "grad_norm": 5.8484063148498535, | |
| "learning_rate": 2.2831671529466205e-07, | |
| "loss": 0.7997506260871887, | |
| "step": 3294 | |
| }, | |
| { | |
| "epoch": 2.774410774410774, | |
| "grad_norm": 3.476667642593384, | |
| "learning_rate": 2.2790246391873086e-07, | |
| "loss": 0.8032985925674438, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 2.776094276094276, | |
| "grad_norm": 4.120417594909668, | |
| "learning_rate": 2.2749121728575393e-07, | |
| "loss": 0.23050040006637573, | |
| "step": 3298 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 3.6002514362335205, | |
| "learning_rate": 2.2708297681264874e-07, | |
| "loss": 0.45907649397850037, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.7794612794612794, | |
| "grad_norm": 2.618075370788574, | |
| "learning_rate": 2.2667774390597562e-07, | |
| "loss": 0.4696184992790222, | |
| "step": 3302 | |
| }, | |
| { | |
| "epoch": 2.781144781144781, | |
| "grad_norm": 6.530674457550049, | |
| "learning_rate": 2.2627551996193247e-07, | |
| "loss": 0.47576916217803955, | |
| "step": 3304 | |
| }, | |
| { | |
| "epoch": 2.782828282828283, | |
| "grad_norm": 18.45606231689453, | |
| "learning_rate": 2.2587630636634985e-07, | |
| "loss": 0.6657184362411499, | |
| "step": 3306 | |
| }, | |
| { | |
| "epoch": 2.7845117845117846, | |
| "grad_norm": 11.66965389251709, | |
| "learning_rate": 2.2548010449468676e-07, | |
| "loss": 0.48266786336898804, | |
| "step": 3308 | |
| }, | |
| { | |
| "epoch": 2.7861952861952863, | |
| "grad_norm": 2.84804368019104, | |
| "learning_rate": 2.2508691571202528e-07, | |
| "loss": 0.6634323596954346, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.787878787878788, | |
| "grad_norm": 3.701871395111084, | |
| "learning_rate": 2.2469674137306627e-07, | |
| "loss": 0.4185872972011566, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 2.78956228956229, | |
| "grad_norm": 2.2695560455322266, | |
| "learning_rate": 2.2430958282212414e-07, | |
| "loss": 0.6932981014251709, | |
| "step": 3314 | |
| }, | |
| { | |
| "epoch": 2.791245791245791, | |
| "grad_norm": 3.9276177883148193, | |
| "learning_rate": 2.239254413931236e-07, | |
| "loss": 0.9720036387443542, | |
| "step": 3316 | |
| }, | |
| { | |
| "epoch": 2.792929292929293, | |
| "grad_norm": 3.183957099914551, | |
| "learning_rate": 2.2354431840959307e-07, | |
| "loss": 0.7453635334968567, | |
| "step": 3318 | |
| }, | |
| { | |
| "epoch": 2.7946127946127945, | |
| "grad_norm": 4.194116115570068, | |
| "learning_rate": 2.2316621518466167e-07, | |
| "loss": 0.3255777359008789, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.7962962962962963, | |
| "grad_norm": 5.5670366287231445, | |
| "learning_rate": 2.227911330210542e-07, | |
| "loss": 0.6090131998062134, | |
| "step": 3322 | |
| }, | |
| { | |
| "epoch": 2.797979797979798, | |
| "grad_norm": 2.372026205062866, | |
| "learning_rate": 2.2241907321108638e-07, | |
| "loss": 0.6710550785064697, | |
| "step": 3324 | |
| }, | |
| { | |
| "epoch": 2.7996632996632997, | |
| "grad_norm": 3.636491060256958, | |
| "learning_rate": 2.22050037036661e-07, | |
| "loss": 0.30255502462387085, | |
| "step": 3326 | |
| }, | |
| { | |
| "epoch": 2.8013468013468015, | |
| "grad_norm": 3.7633321285247803, | |
| "learning_rate": 2.216840257692628e-07, | |
| "loss": 0.723252534866333, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 2.8030303030303028, | |
| "grad_norm": 2.568369150161743, | |
| "learning_rate": 2.213210406699547e-07, | |
| "loss": 0.78731769323349, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.8047138047138045, | |
| "grad_norm": 3.9559519290924072, | |
| "learning_rate": 2.209610829893729e-07, | |
| "loss": 0.5705679655075073, | |
| "step": 3332 | |
| }, | |
| { | |
| "epoch": 2.8063973063973062, | |
| "grad_norm": 5.107378005981445, | |
| "learning_rate": 2.2060415396772337e-07, | |
| "loss": 0.4503876864910126, | |
| "step": 3334 | |
| }, | |
| { | |
| "epoch": 2.808080808080808, | |
| "grad_norm": 3.7301788330078125, | |
| "learning_rate": 2.2025025483477654e-07, | |
| "loss": 0.5614144802093506, | |
| "step": 3336 | |
| }, | |
| { | |
| "epoch": 2.8097643097643097, | |
| "grad_norm": 3.425426959991455, | |
| "learning_rate": 2.1989938680986382e-07, | |
| "loss": 0.27632904052734375, | |
| "step": 3338 | |
| }, | |
| { | |
| "epoch": 2.8114478114478114, | |
| "grad_norm": 11.55947208404541, | |
| "learning_rate": 2.1955155110187344e-07, | |
| "loss": 0.6297179460525513, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.813131313131313, | |
| "grad_norm": 5.041746139526367, | |
| "learning_rate": 2.1920674890924545e-07, | |
| "loss": 0.7801995873451233, | |
| "step": 3342 | |
| }, | |
| { | |
| "epoch": 2.814814814814815, | |
| "grad_norm": 1.9846611022949219, | |
| "learning_rate": 2.1886498141996858e-07, | |
| "loss": 0.3154934346675873, | |
| "step": 3344 | |
| }, | |
| { | |
| "epoch": 2.8164983164983166, | |
| "grad_norm": 3.4041101932525635, | |
| "learning_rate": 2.185262498115759e-07, | |
| "loss": 0.7565585374832153, | |
| "step": 3346 | |
| }, | |
| { | |
| "epoch": 2.8181818181818183, | |
| "grad_norm": 5.533918380737305, | |
| "learning_rate": 2.1819055525113995e-07, | |
| "loss": 0.5513463020324707, | |
| "step": 3348 | |
| }, | |
| { | |
| "epoch": 2.81986531986532, | |
| "grad_norm": 3.816920042037964, | |
| "learning_rate": 2.178578988952698e-07, | |
| "loss": 0.8172674179077148, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.821548821548822, | |
| "grad_norm": 4.7206573486328125, | |
| "learning_rate": 2.1752828189010677e-07, | |
| "loss": 0.7926508188247681, | |
| "step": 3352 | |
| }, | |
| { | |
| "epoch": 2.823232323232323, | |
| "grad_norm": 2.8711562156677246, | |
| "learning_rate": 2.1720170537132003e-07, | |
| "loss": 0.7785905599594116, | |
| "step": 3354 | |
| }, | |
| { | |
| "epoch": 2.824915824915825, | |
| "grad_norm": 7.083092212677002, | |
| "learning_rate": 2.16878170464103e-07, | |
| "loss": 0.8117780685424805, | |
| "step": 3356 | |
| }, | |
| { | |
| "epoch": 2.8265993265993266, | |
| "grad_norm": 6.3713178634643555, | |
| "learning_rate": 2.1655767828316967e-07, | |
| "loss": 0.4899190068244934, | |
| "step": 3358 | |
| }, | |
| { | |
| "epoch": 2.8282828282828283, | |
| "grad_norm": 8.093062400817871, | |
| "learning_rate": 2.1624022993275042e-07, | |
| "loss": 0.481950581073761, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.82996632996633, | |
| "grad_norm": 3.7031800746917725, | |
| "learning_rate": 2.1592582650658838e-07, | |
| "loss": 0.6889939308166504, | |
| "step": 3362 | |
| }, | |
| { | |
| "epoch": 2.8316498316498318, | |
| "grad_norm": 8.515325546264648, | |
| "learning_rate": 2.1561446908793575e-07, | |
| "loss": 0.5986655950546265, | |
| "step": 3364 | |
| }, | |
| { | |
| "epoch": 2.8333333333333335, | |
| "grad_norm": 2.5616695880889893, | |
| "learning_rate": 2.1530615874954978e-07, | |
| "loss": 0.4613681137561798, | |
| "step": 3366 | |
| }, | |
| { | |
| "epoch": 2.8350168350168348, | |
| "grad_norm": 6.432313919067383, | |
| "learning_rate": 2.1500089655368913e-07, | |
| "loss": 0.35357874631881714, | |
| "step": 3368 | |
| }, | |
| { | |
| "epoch": 2.8367003367003365, | |
| "grad_norm": 5.070071220397949, | |
| "learning_rate": 2.146986835521108e-07, | |
| "loss": 0.815057635307312, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.8383838383838382, | |
| "grad_norm": 1.3125436305999756, | |
| "learning_rate": 2.143995207860655e-07, | |
| "loss": 0.6456162929534912, | |
| "step": 3372 | |
| }, | |
| { | |
| "epoch": 2.84006734006734, | |
| "grad_norm": 50.76771545410156, | |
| "learning_rate": 2.1410340928629483e-07, | |
| "loss": 0.29310160875320435, | |
| "step": 3374 | |
| }, | |
| { | |
| "epoch": 2.8417508417508417, | |
| "grad_norm": 2.078246831893921, | |
| "learning_rate": 2.138103500730278e-07, | |
| "loss": 0.851909875869751, | |
| "step": 3376 | |
| }, | |
| { | |
| "epoch": 2.8434343434343434, | |
| "grad_norm": 2.2148220539093018, | |
| "learning_rate": 2.1352034415597635e-07, | |
| "loss": 0.7448092699050903, | |
| "step": 3378 | |
| }, | |
| { | |
| "epoch": 2.845117845117845, | |
| "grad_norm": 2.512826919555664, | |
| "learning_rate": 2.1323339253433309e-07, | |
| "loss": 0.5352383255958557, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.846801346801347, | |
| "grad_norm": 5.046896934509277, | |
| "learning_rate": 2.1294949619676717e-07, | |
| "loss": 0.522847056388855, | |
| "step": 3382 | |
| }, | |
| { | |
| "epoch": 2.8484848484848486, | |
| "grad_norm": 4.314877033233643, | |
| "learning_rate": 2.1266865612142064e-07, | |
| "loss": 0.5352615118026733, | |
| "step": 3384 | |
| }, | |
| { | |
| "epoch": 2.8501683501683504, | |
| "grad_norm": 3.3411834239959717, | |
| "learning_rate": 2.1239087327590582e-07, | |
| "loss": 0.7238250970840454, | |
| "step": 3386 | |
| }, | |
| { | |
| "epoch": 2.851851851851852, | |
| "grad_norm": 0.83232581615448, | |
| "learning_rate": 2.121161486173017e-07, | |
| "loss": 0.6121417284011841, | |
| "step": 3388 | |
| }, | |
| { | |
| "epoch": 2.8535353535353534, | |
| "grad_norm": 8.091914176940918, | |
| "learning_rate": 2.1184448309215015e-07, | |
| "loss": 0.4724659025669098, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.855218855218855, | |
| "grad_norm": 3.3312911987304688, | |
| "learning_rate": 2.1157587763645322e-07, | |
| "loss": 0.5098093748092651, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 2.856902356902357, | |
| "grad_norm": 5.780312538146973, | |
| "learning_rate": 2.113103331756698e-07, | |
| "loss": 0.9295372366905212, | |
| "step": 3394 | |
| }, | |
| { | |
| "epoch": 2.8585858585858586, | |
| "grad_norm": 2.5686521530151367, | |
| "learning_rate": 2.110478506247122e-07, | |
| "loss": 0.9365147948265076, | |
| "step": 3396 | |
| }, | |
| { | |
| "epoch": 2.8602693602693603, | |
| "grad_norm": 2.75380277633667, | |
| "learning_rate": 2.1078843088794325e-07, | |
| "loss": 0.4805770516395569, | |
| "step": 3398 | |
| }, | |
| { | |
| "epoch": 2.861952861952862, | |
| "grad_norm": 14.623507499694824, | |
| "learning_rate": 2.105320748591732e-07, | |
| "loss": 0.38062724471092224, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.8636363636363638, | |
| "grad_norm": 47.26361846923828, | |
| "learning_rate": 2.1027878342165624e-07, | |
| "loss": 0.4569489359855652, | |
| "step": 3402 | |
| }, | |
| { | |
| "epoch": 2.865319865319865, | |
| "grad_norm": 2.116769313812256, | |
| "learning_rate": 2.1002855744808815e-07, | |
| "loss": 0.34320202469825745, | |
| "step": 3404 | |
| }, | |
| { | |
| "epoch": 2.8670033670033668, | |
| "grad_norm": 4.610642910003662, | |
| "learning_rate": 2.0978139780060257e-07, | |
| "loss": 0.7092417478561401, | |
| "step": 3406 | |
| }, | |
| { | |
| "epoch": 2.8686868686868685, | |
| "grad_norm": 4.693014144897461, | |
| "learning_rate": 2.0953730533076862e-07, | |
| "loss": 0.29190459847450256, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 2.8703703703703702, | |
| "grad_norm": 3.3123207092285156, | |
| "learning_rate": 2.0929628087958734e-07, | |
| "loss": 0.7917627692222595, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.872053872053872, | |
| "grad_norm": 1.7922461032867432, | |
| "learning_rate": 2.0905832527748953e-07, | |
| "loss": 0.43554821610450745, | |
| "step": 3412 | |
| }, | |
| { | |
| "epoch": 2.8737373737373737, | |
| "grad_norm": 4.745511054992676, | |
| "learning_rate": 2.0882343934433236e-07, | |
| "loss": 0.5983174443244934, | |
| "step": 3414 | |
| }, | |
| { | |
| "epoch": 2.8754208754208754, | |
| "grad_norm": 6.916215896606445, | |
| "learning_rate": 2.085916238893966e-07, | |
| "loss": 0.17676572501659393, | |
| "step": 3416 | |
| }, | |
| { | |
| "epoch": 2.877104377104377, | |
| "grad_norm": 4.048447132110596, | |
| "learning_rate": 2.0836287971138418e-07, | |
| "loss": 0.6077107191085815, | |
| "step": 3418 | |
| }, | |
| { | |
| "epoch": 2.878787878787879, | |
| "grad_norm": 2.5704290866851807, | |
| "learning_rate": 2.0813720759841492e-07, | |
| "loss": 0.4146248400211334, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.8804713804713806, | |
| "grad_norm": 5.706145286560059, | |
| "learning_rate": 2.0791460832802423e-07, | |
| "loss": 0.7497705221176147, | |
| "step": 3422 | |
| }, | |
| { | |
| "epoch": 2.8821548821548824, | |
| "grad_norm": 1.7757506370544434, | |
| "learning_rate": 2.0769508266716027e-07, | |
| "loss": 0.5505831241607666, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 2.883838383838384, | |
| "grad_norm": 7.052734851837158, | |
| "learning_rate": 2.0747863137218126e-07, | |
| "loss": 0.6165893077850342, | |
| "step": 3426 | |
| }, | |
| { | |
| "epoch": 2.8855218855218854, | |
| "grad_norm": 5.826257705688477, | |
| "learning_rate": 2.0726525518885308e-07, | |
| "loss": 0.5343178510665894, | |
| "step": 3428 | |
| }, | |
| { | |
| "epoch": 2.887205387205387, | |
| "grad_norm": 8.041903495788574, | |
| "learning_rate": 2.0705495485234653e-07, | |
| "loss": 0.3310260772705078, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 15.362848281860352, | |
| "learning_rate": 2.0684773108723455e-07, | |
| "loss": 0.5320956707000732, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 2.8905723905723906, | |
| "grad_norm": 7.592126369476318, | |
| "learning_rate": 2.0664358460749018e-07, | |
| "loss": 0.29516857862472534, | |
| "step": 3434 | |
| }, | |
| { | |
| "epoch": 2.8922558922558923, | |
| "grad_norm": 1.8380248546600342, | |
| "learning_rate": 2.064425161164842e-07, | |
| "loss": 0.9136509895324707, | |
| "step": 3436 | |
| }, | |
| { | |
| "epoch": 2.893939393939394, | |
| "grad_norm": 4.288794994354248, | |
| "learning_rate": 2.0624452630698195e-07, | |
| "loss": 0.8272508382797241, | |
| "step": 3438 | |
| }, | |
| { | |
| "epoch": 2.8956228956228958, | |
| "grad_norm": 3.879866600036621, | |
| "learning_rate": 2.0604961586114163e-07, | |
| "loss": 0.744123101234436, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.897306397306397, | |
| "grad_norm": 3.288698196411133, | |
| "learning_rate": 2.0585778545051195e-07, | |
| "loss": 0.8894016742706299, | |
| "step": 3442 | |
| }, | |
| { | |
| "epoch": 2.898989898989899, | |
| "grad_norm": 15.847039222717285, | |
| "learning_rate": 2.0566903573602913e-07, | |
| "loss": 0.2585524320602417, | |
| "step": 3444 | |
| }, | |
| { | |
| "epoch": 2.9006734006734005, | |
| "grad_norm": 4.235921859741211, | |
| "learning_rate": 2.0548336736801548e-07, | |
| "loss": 0.5225664377212524, | |
| "step": 3446 | |
| }, | |
| { | |
| "epoch": 2.9023569023569022, | |
| "grad_norm": 5.334314346313477, | |
| "learning_rate": 2.0530078098617668e-07, | |
| "loss": 1.000659704208374, | |
| "step": 3448 | |
| }, | |
| { | |
| "epoch": 2.904040404040404, | |
| "grad_norm": 13.81791877746582, | |
| "learning_rate": 2.0512127721959954e-07, | |
| "loss": 0.2958747446537018, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.9057239057239057, | |
| "grad_norm": 2.8504996299743652, | |
| "learning_rate": 2.0494485668675003e-07, | |
| "loss": 0.5946668386459351, | |
| "step": 3452 | |
| }, | |
| { | |
| "epoch": 2.9074074074074074, | |
| "grad_norm": 30.945682525634766, | |
| "learning_rate": 2.0477151999547137e-07, | |
| "loss": 0.6222255229949951, | |
| "step": 3454 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 2.8661885261535645, | |
| "learning_rate": 2.0460126774298115e-07, | |
| "loss": 0.9090818166732788, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 2.910774410774411, | |
| "grad_norm": 3.6362955570220947, | |
| "learning_rate": 2.044341005158701e-07, | |
| "loss": 0.6454827785491943, | |
| "step": 3458 | |
| }, | |
| { | |
| "epoch": 2.9124579124579126, | |
| "grad_norm": 5.509945392608643, | |
| "learning_rate": 2.042700188900996e-07, | |
| "loss": 0.8902723789215088, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.9141414141414144, | |
| "grad_norm": 4.623058795928955, | |
| "learning_rate": 2.0410902343099998e-07, | |
| "loss": 0.9835023283958435, | |
| "step": 3462 | |
| }, | |
| { | |
| "epoch": 2.915824915824916, | |
| "grad_norm": 5.559566020965576, | |
| "learning_rate": 2.039511146932683e-07, | |
| "loss": 0.725146472454071, | |
| "step": 3464 | |
| }, | |
| { | |
| "epoch": 2.9175084175084174, | |
| "grad_norm": 2.3381059169769287, | |
| "learning_rate": 2.0379629322096658e-07, | |
| "loss": 0.8742655515670776, | |
| "step": 3466 | |
| }, | |
| { | |
| "epoch": 2.919191919191919, | |
| "grad_norm": 3.1581509113311768, | |
| "learning_rate": 2.036445595475199e-07, | |
| "loss": 0.5896962881088257, | |
| "step": 3468 | |
| }, | |
| { | |
| "epoch": 2.920875420875421, | |
| "grad_norm": 2.895928382873535, | |
| "learning_rate": 2.0349591419571473e-07, | |
| "loss": 0.08913551270961761, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.9225589225589226, | |
| "grad_norm": 3.939779758453369, | |
| "learning_rate": 2.0335035767769674e-07, | |
| "loss": 0.5938529968261719, | |
| "step": 3472 | |
| }, | |
| { | |
| "epoch": 2.9242424242424243, | |
| "grad_norm": 2.6540651321411133, | |
| "learning_rate": 2.032078904949694e-07, | |
| "loss": 0.607816755771637, | |
| "step": 3474 | |
| }, | |
| { | |
| "epoch": 2.925925925925926, | |
| "grad_norm": 11.374692916870117, | |
| "learning_rate": 2.0306851313839217e-07, | |
| "loss": 0.26831308007240295, | |
| "step": 3476 | |
| }, | |
| { | |
| "epoch": 2.9276094276094278, | |
| "grad_norm": 4.051253318786621, | |
| "learning_rate": 2.0293222608817862e-07, | |
| "loss": 0.776150107383728, | |
| "step": 3478 | |
| }, | |
| { | |
| "epoch": 2.929292929292929, | |
| "grad_norm": 6.790820121765137, | |
| "learning_rate": 2.0279902981389491e-07, | |
| "loss": 0.44397690892219543, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.930976430976431, | |
| "grad_norm": 4.825781345367432, | |
| "learning_rate": 2.026689247744584e-07, | |
| "loss": 0.7775415182113647, | |
| "step": 3482 | |
| }, | |
| { | |
| "epoch": 2.9326599326599325, | |
| "grad_norm": 3.1354546546936035, | |
| "learning_rate": 2.0254191141813563e-07, | |
| "loss": 0.5349434614181519, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 2.9343434343434343, | |
| "grad_norm": 3.595128059387207, | |
| "learning_rate": 2.0241799018254102e-07, | |
| "loss": 0.6211014986038208, | |
| "step": 3486 | |
| }, | |
| { | |
| "epoch": 2.936026936026936, | |
| "grad_norm": 4.181585311889648, | |
| "learning_rate": 2.0229716149463543e-07, | |
| "loss": 0.6584489345550537, | |
| "step": 3488 | |
| }, | |
| { | |
| "epoch": 2.9377104377104377, | |
| "grad_norm": 5.394354343414307, | |
| "learning_rate": 2.0217942577072447e-07, | |
| "loss": 0.5959441661834717, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.9393939393939394, | |
| "grad_norm": 13.857940673828125, | |
| "learning_rate": 2.0206478341645734e-07, | |
| "loss": 0.8532196283340454, | |
| "step": 3492 | |
| }, | |
| { | |
| "epoch": 2.941077441077441, | |
| "grad_norm": 6.366513252258301, | |
| "learning_rate": 2.0195323482682508e-07, | |
| "loss": 0.3821958899497986, | |
| "step": 3494 | |
| }, | |
| { | |
| "epoch": 2.942760942760943, | |
| "grad_norm": 2.0421321392059326, | |
| "learning_rate": 2.0184478038615948e-07, | |
| "loss": 0.7394722700119019, | |
| "step": 3496 | |
| }, | |
| { | |
| "epoch": 2.9444444444444446, | |
| "grad_norm": 4.313158988952637, | |
| "learning_rate": 2.0173942046813191e-07, | |
| "loss": 0.2922773063182831, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 2.9461279461279464, | |
| "grad_norm": 5.628312110900879, | |
| "learning_rate": 2.016371554357515e-07, | |
| "loss": 0.608026385307312, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.9478114478114477, | |
| "grad_norm": 10.177474975585938, | |
| "learning_rate": 2.015379856413643e-07, | |
| "loss": 0.684483528137207, | |
| "step": 3502 | |
| }, | |
| { | |
| "epoch": 2.9494949494949494, | |
| "grad_norm": 9.977062225341797, | |
| "learning_rate": 2.01441911426652e-07, | |
| "loss": 0.36152565479278564, | |
| "step": 3504 | |
| }, | |
| { | |
| "epoch": 2.951178451178451, | |
| "grad_norm": 1.5593669414520264, | |
| "learning_rate": 2.013489331226307e-07, | |
| "loss": 0.6608873009681702, | |
| "step": 3506 | |
| }, | |
| { | |
| "epoch": 2.952861952861953, | |
| "grad_norm": 3.423954486846924, | |
| "learning_rate": 2.0125905104964978e-07, | |
| "loss": 0.8101043701171875, | |
| "step": 3508 | |
| }, | |
| { | |
| "epoch": 2.9545454545454546, | |
| "grad_norm": 4.263778209686279, | |
| "learning_rate": 2.0117226551739068e-07, | |
| "loss": 0.7046741247177124, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.9562289562289563, | |
| "grad_norm": 3.3937125205993652, | |
| "learning_rate": 2.0108857682486629e-07, | |
| "loss": 0.7705718874931335, | |
| "step": 3512 | |
| }, | |
| { | |
| "epoch": 2.957912457912458, | |
| "grad_norm": 10.03588581085205, | |
| "learning_rate": 2.0100798526041927e-07, | |
| "loss": 0.31763288378715515, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 2.9595959595959593, | |
| "grad_norm": 3.6547443866729736, | |
| "learning_rate": 2.009304911017215e-07, | |
| "loss": 0.8195918202400208, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 2.961279461279461, | |
| "grad_norm": 2.8320508003234863, | |
| "learning_rate": 2.0085609461577295e-07, | |
| "loss": 0.871679425239563, | |
| "step": 3518 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "grad_norm": 5.754692554473877, | |
| "learning_rate": 2.0078479605890064e-07, | |
| "loss": 0.3950427770614624, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.9646464646464645, | |
| "grad_norm": 3.0160629749298096, | |
| "learning_rate": 2.007165956767584e-07, | |
| "loss": 0.65765380859375, | |
| "step": 3522 | |
| }, | |
| { | |
| "epoch": 2.9663299663299663, | |
| "grad_norm": 5.943231105804443, | |
| "learning_rate": 2.00651493704325e-07, | |
| "loss": 0.2477177381515503, | |
| "step": 3524 | |
| }, | |
| { | |
| "epoch": 2.968013468013468, | |
| "grad_norm": 6.068716049194336, | |
| "learning_rate": 2.0058949036590426e-07, | |
| "loss": 0.8671658039093018, | |
| "step": 3526 | |
| }, | |
| { | |
| "epoch": 2.9696969696969697, | |
| "grad_norm": 2.297165632247925, | |
| "learning_rate": 2.0053058587512378e-07, | |
| "loss": 0.7299938201904297, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 2.9713804713804715, | |
| "grad_norm": 3.451326847076416, | |
| "learning_rate": 2.0047478043493418e-07, | |
| "loss": 0.7638918161392212, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.973063973063973, | |
| "grad_norm": 5.721773147583008, | |
| "learning_rate": 2.004220742376088e-07, | |
| "loss": 0.6010457873344421, | |
| "step": 3532 | |
| }, | |
| { | |
| "epoch": 2.974747474747475, | |
| "grad_norm": 11.908121109008789, | |
| "learning_rate": 2.0037246746474277e-07, | |
| "loss": 0.21666747331619263, | |
| "step": 3534 | |
| }, | |
| { | |
| "epoch": 2.9764309764309766, | |
| "grad_norm": 2.7472894191741943, | |
| "learning_rate": 2.0032596028725204e-07, | |
| "loss": 0.828637421131134, | |
| "step": 3536 | |
| }, | |
| { | |
| "epoch": 2.9781144781144784, | |
| "grad_norm": 7.899786949157715, | |
| "learning_rate": 2.0028255286537355e-07, | |
| "loss": 0.4242842197418213, | |
| "step": 3538 | |
| }, | |
| { | |
| "epoch": 2.9797979797979797, | |
| "grad_norm": 2.2358016967773438, | |
| "learning_rate": 2.0024224534866408e-07, | |
| "loss": 0.9581695795059204, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.9814814814814814, | |
| "grad_norm": 4.023903846740723, | |
| "learning_rate": 2.0020503787599998e-07, | |
| "loss": 0.8976711630821228, | |
| "step": 3542 | |
| }, | |
| { | |
| "epoch": 2.983164983164983, | |
| "grad_norm": 5.354180812835693, | |
| "learning_rate": 2.001709305755767e-07, | |
| "loss": 0.47080734372138977, | |
| "step": 3544 | |
| }, | |
| { | |
| "epoch": 2.984848484848485, | |
| "grad_norm": 6.203042507171631, | |
| "learning_rate": 2.0013992356490827e-07, | |
| "loss": 0.799166202545166, | |
| "step": 3546 | |
| }, | |
| { | |
| "epoch": 2.9865319865319866, | |
| "grad_norm": 6.4163031578063965, | |
| "learning_rate": 2.0011201695082687e-07, | |
| "loss": 0.30166110396385193, | |
| "step": 3548 | |
| }, | |
| { | |
| "epoch": 2.9882154882154883, | |
| "grad_norm": 9.541460037231445, | |
| "learning_rate": 2.0008721082948243e-07, | |
| "loss": 0.3377661108970642, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.98989898989899, | |
| "grad_norm": 12.612906455993652, | |
| "learning_rate": 2.0006550528634258e-07, | |
| "loss": 0.4944566488265991, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 2.9915824915824913, | |
| "grad_norm": 1.854871153831482, | |
| "learning_rate": 2.00046900396192e-07, | |
| "loss": 0.9397309422492981, | |
| "step": 3554 | |
| }, | |
| { | |
| "epoch": 2.993265993265993, | |
| "grad_norm": 2.197124719619751, | |
| "learning_rate": 2.0003139622313241e-07, | |
| "loss": 0.7814288139343262, | |
| "step": 3556 | |
| }, | |
| { | |
| "epoch": 2.994949494949495, | |
| "grad_norm": 2.3128502368927, | |
| "learning_rate": 2.0001899282058216e-07, | |
| "loss": 0.6661207675933838, | |
| "step": 3558 | |
| }, | |
| { | |
| "epoch": 2.9966329966329965, | |
| "grad_norm": 12.201488494873047, | |
| "learning_rate": 2.000096902312762e-07, | |
| "loss": 0.40893661975860596, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.9983164983164983, | |
| "grad_norm": 4.00324821472168, | |
| "learning_rate": 2.0000348848726586e-07, | |
| "loss": 0.5416642427444458, | |
| "step": 3562 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 11.186657905578613, | |
| "learning_rate": 2.0000038760991877e-07, | |
| "loss": 0.361904501914978, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 3564, | |
| "total_flos": 4.2988160857187287e+18, | |
| "train_loss": 0.7978645538875685, | |
| "train_runtime": 6311.8591, | |
| "train_samples_per_second": 9.034, | |
| "train_steps_per_second": 0.565 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3564, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.2988160857187287e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |