Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-106 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-106 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-106") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-106") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-106") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-106 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-106" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-106", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-106
- SGLang
How to use furproxy/9b-106 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-106" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-106", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-106" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-106", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-106 with Docker Model Runner:
docker model run hf.co/furproxy/9b-106
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 2844, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004219409282700422, | |
| "grad_norm": 59.86221694946289, | |
| "learning_rate": 5.594405594405594e-08, | |
| "loss": 2.1981945037841797, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008438818565400843, | |
| "grad_norm": 12.374919891357422, | |
| "learning_rate": 1.6783216783216782e-07, | |
| "loss": 1.7811565399169922, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012658227848101266, | |
| "grad_norm": 2.8306868076324463, | |
| "learning_rate": 2.7972027972027973e-07, | |
| "loss": 1.9376487731933594, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.016877637130801686, | |
| "grad_norm": 5.625478267669678, | |
| "learning_rate": 3.916083916083916e-07, | |
| "loss": 1.9494853019714355, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02109704641350211, | |
| "grad_norm": 15.797261238098145, | |
| "learning_rate": 5.034965034965036e-07, | |
| "loss": 1.849827766418457, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02531645569620253, | |
| "grad_norm": 2.746943950653076, | |
| "learning_rate": 6.153846153846154e-07, | |
| "loss": 1.3138155937194824, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.029535864978902954, | |
| "grad_norm": 7.311520576477051, | |
| "learning_rate": 7.272727272727272e-07, | |
| "loss": 1.650458574295044, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03375527426160337, | |
| "grad_norm": 10.263240814208984, | |
| "learning_rate": 8.391608391608391e-07, | |
| "loss": 2.136387825012207, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0379746835443038, | |
| "grad_norm": 1.834839940071106, | |
| "learning_rate": 9.51048951048951e-07, | |
| "loss": 1.8011322021484375, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04219409282700422, | |
| "grad_norm": 3.437499761581421, | |
| "learning_rate": 1.0629370629370628e-06, | |
| "loss": 1.872532606124878, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.046413502109704644, | |
| "grad_norm": 2.469942808151245, | |
| "learning_rate": 1.1748251748251746e-06, | |
| "loss": 1.5344078540802002, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05063291139240506, | |
| "grad_norm": 3.802064895629883, | |
| "learning_rate": 1.2867132867132867e-06, | |
| "loss": 1.7150638103485107, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05485232067510549, | |
| "grad_norm": 3.20348858833313, | |
| "learning_rate": 1.3986013986013985e-06, | |
| "loss": 1.5234665870666504, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05907172995780591, | |
| "grad_norm": 3.4241185188293457, | |
| "learning_rate": 1.5104895104895103e-06, | |
| "loss": 1.8226149082183838, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06329113924050633, | |
| "grad_norm": 9.308449745178223, | |
| "learning_rate": 1.6223776223776222e-06, | |
| "loss": 1.499394178390503, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06751054852320675, | |
| "grad_norm": 2.2547056674957275, | |
| "learning_rate": 1.734265734265734e-06, | |
| "loss": 1.6274735927581787, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07172995780590717, | |
| "grad_norm": 43.91905212402344, | |
| "learning_rate": 1.8461538461538462e-06, | |
| "loss": 1.1456708908081055, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0759493670886076, | |
| "grad_norm": 3.9160234928131104, | |
| "learning_rate": 1.958041958041958e-06, | |
| "loss": 1.6436142921447754, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08016877637130802, | |
| "grad_norm": 4.995796203613281, | |
| "learning_rate": 2.06993006993007e-06, | |
| "loss": 1.1828241348266602, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08438818565400844, | |
| "grad_norm": 1.9018964767456055, | |
| "learning_rate": 2.1818181818181815e-06, | |
| "loss": 1.6038843393325806, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08860759493670886, | |
| "grad_norm": 70.95392608642578, | |
| "learning_rate": 2.2937062937062938e-06, | |
| "loss": 1.1521950960159302, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09282700421940929, | |
| "grad_norm": 5.062403202056885, | |
| "learning_rate": 2.405594405594405e-06, | |
| "loss": 1.8195090293884277, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0970464135021097, | |
| "grad_norm": 7.01928186416626, | |
| "learning_rate": 2.5174825174825174e-06, | |
| "loss": 1.6717772483825684, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10126582278481013, | |
| "grad_norm": 2.7374989986419678, | |
| "learning_rate": 2.629370629370629e-06, | |
| "loss": 1.5755090713500977, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10548523206751055, | |
| "grad_norm": 3.8747036457061768, | |
| "learning_rate": 2.741258741258741e-06, | |
| "loss": 0.8314967751502991, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10970464135021098, | |
| "grad_norm": 5.7753753662109375, | |
| "learning_rate": 2.8531468531468534e-06, | |
| "loss": 0.8825576305389404, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11392405063291139, | |
| "grad_norm": 2.804755449295044, | |
| "learning_rate": 2.9650349650349648e-06, | |
| "loss": 0.8038457632064819, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11814345991561181, | |
| "grad_norm": 2.4263148307800293, | |
| "learning_rate": 3.076923076923077e-06, | |
| "loss": 1.4315626621246338, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12236286919831224, | |
| "grad_norm": 8.127001762390137, | |
| "learning_rate": 3.1888111888111884e-06, | |
| "loss": 0.7465399503707886, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12658227848101267, | |
| "grad_norm": 2.8598272800445557, | |
| "learning_rate": 3.3006993006993007e-06, | |
| "loss": 1.4817099571228027, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1308016877637131, | |
| "grad_norm": 3.184314489364624, | |
| "learning_rate": 3.4125874125874125e-06, | |
| "loss": 1.5089186429977417, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1350210970464135, | |
| "grad_norm": 1.7058652639389038, | |
| "learning_rate": 3.5244755244755243e-06, | |
| "loss": 1.0980231761932373, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.13924050632911392, | |
| "grad_norm": 4.485511779785156, | |
| "learning_rate": 3.636363636363636e-06, | |
| "loss": 0.8224247694015503, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14345991561181434, | |
| "grad_norm": 2.326599359512329, | |
| "learning_rate": 3.748251748251748e-06, | |
| "loss": 1.1156786680221558, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14767932489451477, | |
| "grad_norm": 5.480278491973877, | |
| "learning_rate": 3.860139860139859e-06, | |
| "loss": 0.9959129095077515, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1518987341772152, | |
| "grad_norm": 2.004271984100342, | |
| "learning_rate": 3.972027972027972e-06, | |
| "loss": 1.2949004173278809, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15611814345991562, | |
| "grad_norm": 5.011590957641602, | |
| "learning_rate": 4.083916083916084e-06, | |
| "loss": 1.046140193939209, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16033755274261605, | |
| "grad_norm": 5.78662633895874, | |
| "learning_rate": 4.195804195804196e-06, | |
| "loss": 0.86857008934021, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16455696202531644, | |
| "grad_norm": 2.636563539505005, | |
| "learning_rate": 4.3076923076923076e-06, | |
| "loss": 1.3720424175262451, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.16877637130801687, | |
| "grad_norm": 2.0765841007232666, | |
| "learning_rate": 4.4195804195804185e-06, | |
| "loss": 1.3632028102874756, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1729957805907173, | |
| "grad_norm": 2.3837273120880127, | |
| "learning_rate": 4.531468531468531e-06, | |
| "loss": 1.4058136940002441, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.17721518987341772, | |
| "grad_norm": 4.226639747619629, | |
| "learning_rate": 4.643356643356643e-06, | |
| "loss": 0.7853094339370728, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18143459915611815, | |
| "grad_norm": 4.09359884262085, | |
| "learning_rate": 4.755244755244755e-06, | |
| "loss": 1.1315921545028687, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.18565400843881857, | |
| "grad_norm": 1.9499998092651367, | |
| "learning_rate": 4.8671328671328676e-06, | |
| "loss": 1.323297381401062, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.189873417721519, | |
| "grad_norm": 7.248386383056641, | |
| "learning_rate": 4.9790209790209785e-06, | |
| "loss": 0.6489843726158142, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1940928270042194, | |
| "grad_norm": 3.945362091064453, | |
| "learning_rate": 5.09090909090909e-06, | |
| "loss": 1.7082250118255615, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.19831223628691982, | |
| "grad_norm": 9.237424850463867, | |
| "learning_rate": 5.202797202797202e-06, | |
| "loss": 0.9587538242340088, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20253164556962025, | |
| "grad_norm": 2.5106499195098877, | |
| "learning_rate": 5.314685314685315e-06, | |
| "loss": 1.4145572185516357, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20675105485232068, | |
| "grad_norm": 2.280298948287964, | |
| "learning_rate": 5.426573426573427e-06, | |
| "loss": 1.4730861186981201, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2109704641350211, | |
| "grad_norm": 4.468693256378174, | |
| "learning_rate": 5.538461538461538e-06, | |
| "loss": 1.24980628490448, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21518987341772153, | |
| "grad_norm": 2.57384991645813, | |
| "learning_rate": 5.6503496503496495e-06, | |
| "loss": 1.0641834735870361, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.21940928270042195, | |
| "grad_norm": 2.2377758026123047, | |
| "learning_rate": 5.762237762237762e-06, | |
| "loss": 0.9983944892883301, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22362869198312235, | |
| "grad_norm": 7.83008337020874, | |
| "learning_rate": 5.874125874125874e-06, | |
| "loss": 0.9789789319038391, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.22784810126582278, | |
| "grad_norm": 2.1825568675994873, | |
| "learning_rate": 5.986013986013986e-06, | |
| "loss": 0.9948168992996216, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2320675105485232, | |
| "grad_norm": 1.7740533351898193, | |
| "learning_rate": 6.097902097902097e-06, | |
| "loss": 1.0290191173553467, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23628691983122363, | |
| "grad_norm": 1.6431820392608643, | |
| "learning_rate": 6.2097902097902095e-06, | |
| "loss": 1.3816218376159668, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24050632911392406, | |
| "grad_norm": 4.050329208374023, | |
| "learning_rate": 6.321678321678321e-06, | |
| "loss": 1.2858781814575195, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.24472573839662448, | |
| "grad_norm": 4.519939422607422, | |
| "learning_rate": 6.433566433566433e-06, | |
| "loss": 1.6122548580169678, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2489451476793249, | |
| "grad_norm": 1.7163703441619873, | |
| "learning_rate": 6.545454545454546e-06, | |
| "loss": 1.2705044746398926, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.25316455696202533, | |
| "grad_norm": 1.1608729362487793, | |
| "learning_rate": 6.657342657342657e-06, | |
| "loss": 1.0020270347595215, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.25738396624472576, | |
| "grad_norm": 4.328707695007324, | |
| "learning_rate": 6.769230769230769e-06, | |
| "loss": 1.2712280750274658, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2616033755274262, | |
| "grad_norm": 1.8052810430526733, | |
| "learning_rate": 6.8811188811188805e-06, | |
| "loss": 1.2797789573669434, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.26582278481012656, | |
| "grad_norm": 2.120347023010254, | |
| "learning_rate": 6.993006993006993e-06, | |
| "loss": 1.3641468286514282, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.270042194092827, | |
| "grad_norm": 7.924063682556152, | |
| "learning_rate": 7.104895104895105e-06, | |
| "loss": 0.8769274950027466, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2742616033755274, | |
| "grad_norm": 2.9971201419830322, | |
| "learning_rate": 7.216783216783216e-06, | |
| "loss": 1.1519945859909058, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.27848101265822783, | |
| "grad_norm": 4.976275444030762, | |
| "learning_rate": 7.328671328671328e-06, | |
| "loss": 1.217698335647583, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.28270042194092826, | |
| "grad_norm": 3.9333672523498535, | |
| "learning_rate": 7.4405594405594405e-06, | |
| "loss": 0.6807541847229004, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2869198312236287, | |
| "grad_norm": 4.12578821182251, | |
| "learning_rate": 7.552447552447552e-06, | |
| "loss": 0.8635811805725098, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2911392405063291, | |
| "grad_norm": 4.128167629241943, | |
| "learning_rate": 7.664335664335663e-06, | |
| "loss": 1.3738093376159668, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.29535864978902954, | |
| "grad_norm": 4.789083957672119, | |
| "learning_rate": 7.776223776223776e-06, | |
| "loss": 0.9322667717933655, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.29957805907172996, | |
| "grad_norm": 5.845694541931152, | |
| "learning_rate": 7.888111888111889e-06, | |
| "loss": 1.2719149589538574, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3037974683544304, | |
| "grad_norm": 4.548868656158447, | |
| "learning_rate": 8e-06, | |
| "loss": 1.0755615234375, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3080168776371308, | |
| "grad_norm": 2.8906826972961426, | |
| "learning_rate": 7.99999025946351e-06, | |
| "loss": 1.3829221725463867, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.31223628691983124, | |
| "grad_norm": 8.330571174621582, | |
| "learning_rate": 7.999961037906754e-06, | |
| "loss": 1.3621151447296143, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.31645569620253167, | |
| "grad_norm": 3.5093352794647217, | |
| "learning_rate": 7.999912335487857e-06, | |
| "loss": 1.2308037281036377, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3206751054852321, | |
| "grad_norm": 6.054520606994629, | |
| "learning_rate": 7.999844152470372e-06, | |
| "loss": 1.2870557308197021, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.32489451476793246, | |
| "grad_norm": 1.853664755821228, | |
| "learning_rate": 7.999756489223264e-06, | |
| "loss": 1.465219259262085, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3291139240506329, | |
| "grad_norm": 3.744748592376709, | |
| "learning_rate": 7.999649346220915e-06, | |
| "loss": 1.2533340454101562, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 3.5540201663970947, | |
| "learning_rate": 7.999522724043118e-06, | |
| "loss": 1.3192243576049805, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.33755274261603374, | |
| "grad_norm": 1.734734058380127, | |
| "learning_rate": 7.999376623375078e-06, | |
| "loss": 1.302985429763794, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.34177215189873417, | |
| "grad_norm": 4.0876383781433105, | |
| "learning_rate": 7.999211045007407e-06, | |
| "loss": 0.8754786849021912, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3459915611814346, | |
| "grad_norm": 4.771523952484131, | |
| "learning_rate": 7.999025989836115e-06, | |
| "loss": 1.2280066013336182, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.350210970464135, | |
| "grad_norm": 8.509572982788086, | |
| "learning_rate": 7.998821458862613e-06, | |
| "loss": 0.8188046813011169, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.35443037974683544, | |
| "grad_norm": 4.890702724456787, | |
| "learning_rate": 7.998597453193701e-06, | |
| "loss": 1.2213170528411865, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.35864978902953587, | |
| "grad_norm": 1.8209432363510132, | |
| "learning_rate": 7.998353974041564e-06, | |
| "loss": 1.312690019607544, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3628691983122363, | |
| "grad_norm": 7.146384239196777, | |
| "learning_rate": 7.998091022723772e-06, | |
| "loss": 1.2072352170944214, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3670886075949367, | |
| "grad_norm": 3.6562139987945557, | |
| "learning_rate": 7.997808600663259e-06, | |
| "loss": 1.0147764682769775, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.37130801687763715, | |
| "grad_norm": 1.787636637687683, | |
| "learning_rate": 7.997506709388324e-06, | |
| "loss": 1.3151808977127075, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3755274261603376, | |
| "grad_norm": 3.019120454788208, | |
| "learning_rate": 7.997185350532626e-06, | |
| "loss": 1.3140928745269775, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.379746835443038, | |
| "grad_norm": 1.2904940843582153, | |
| "learning_rate": 7.996844525835172e-06, | |
| "loss": 1.0001540184020996, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.38396624472573837, | |
| "grad_norm": 1.6466349363327026, | |
| "learning_rate": 7.9964842371403e-06, | |
| "loss": 1.2761380672454834, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3881856540084388, | |
| "grad_norm": 2.2190961837768555, | |
| "learning_rate": 7.996104486397683e-06, | |
| "loss": 1.272679090499878, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3924050632911392, | |
| "grad_norm": 8.958863258361816, | |
| "learning_rate": 7.995705275662305e-06, | |
| "loss": 0.5206277966499329, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.39662447257383965, | |
| "grad_norm": 1.6779208183288574, | |
| "learning_rate": 7.995286607094459e-06, | |
| "loss": 0.9622843265533447, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4008438818565401, | |
| "grad_norm": 1.6733808517456055, | |
| "learning_rate": 7.994848482959734e-06, | |
| "loss": 1.0529744625091553, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4050632911392405, | |
| "grad_norm": 6.742412567138672, | |
| "learning_rate": 7.994390905628996e-06, | |
| "loss": 1.2907187938690186, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4092827004219409, | |
| "grad_norm": 5.04363489151001, | |
| "learning_rate": 7.993913877578386e-06, | |
| "loss": 1.062695026397705, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.41350210970464135, | |
| "grad_norm": 1.8267443180084229, | |
| "learning_rate": 7.993417401389293e-06, | |
| "loss": 1.2746732234954834, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4177215189873418, | |
| "grad_norm": 2.2780373096466064, | |
| "learning_rate": 7.99290147974836e-06, | |
| "loss": 1.0202131271362305, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4219409282700422, | |
| "grad_norm": 2.0691215991973877, | |
| "learning_rate": 7.992366115447445e-06, | |
| "loss": 1.1179842948913574, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.42616033755274263, | |
| "grad_norm": 4.217780590057373, | |
| "learning_rate": 7.991811311383625e-06, | |
| "loss": 1.1258949041366577, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.43037974683544306, | |
| "grad_norm": 2.5018441677093506, | |
| "learning_rate": 7.991237070559173e-06, | |
| "loss": 0.8922556638717651, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4345991561181435, | |
| "grad_norm": 2.525747776031494, | |
| "learning_rate": 7.990643396081536e-06, | |
| "loss": 1.4427449703216553, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4388185654008439, | |
| "grad_norm": 2.50166916847229, | |
| "learning_rate": 7.990030291163336e-06, | |
| "loss": 0.723818838596344, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4430379746835443, | |
| "grad_norm": 1.4910839796066284, | |
| "learning_rate": 7.98939775912233e-06, | |
| "loss": 1.2674278020858765, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4472573839662447, | |
| "grad_norm": 4.068655490875244, | |
| "learning_rate": 7.98874580338141e-06, | |
| "loss": 0.7890869379043579, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.45147679324894513, | |
| "grad_norm": 2.0325798988342285, | |
| "learning_rate": 7.988074427468575e-06, | |
| "loss": 1.1955333948135376, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.45569620253164556, | |
| "grad_norm": 2.587684154510498, | |
| "learning_rate": 7.987383635016914e-06, | |
| "loss": 1.2449276447296143, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.459915611814346, | |
| "grad_norm": 2.6565630435943604, | |
| "learning_rate": 7.986673429764587e-06, | |
| "loss": 1.294593334197998, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4641350210970464, | |
| "grad_norm": 3.8166139125823975, | |
| "learning_rate": 7.985943815554808e-06, | |
| "loss": 1.2401716709136963, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.46835443037974683, | |
| "grad_norm": 5.548577785491943, | |
| "learning_rate": 7.985194796335814e-06, | |
| "loss": 1.1999175548553467, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.47257383966244726, | |
| "grad_norm": 2.5439255237579346, | |
| "learning_rate": 7.98442637616086e-06, | |
| "loss": 1.0728691816329956, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4767932489451477, | |
| "grad_norm": 12.275174140930176, | |
| "learning_rate": 7.983638559188175e-06, | |
| "loss": 1.2714494466781616, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4810126582278481, | |
| "grad_norm": 23.638010025024414, | |
| "learning_rate": 7.982831349680965e-06, | |
| "loss": 0.7866320610046387, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.48523206751054854, | |
| "grad_norm": 4.956063270568848, | |
| "learning_rate": 7.982004752007367e-06, | |
| "loss": 0.921814501285553, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.48945147679324896, | |
| "grad_norm": 3.645993232727051, | |
| "learning_rate": 7.98115877064044e-06, | |
| "loss": 0.8924152851104736, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4936708860759494, | |
| "grad_norm": 2.041003704071045, | |
| "learning_rate": 7.980293410158139e-06, | |
| "loss": 1.2708659172058105, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.4978902953586498, | |
| "grad_norm": 4.042453289031982, | |
| "learning_rate": 7.979408675243278e-06, | |
| "loss": 1.3152391910552979, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5021097046413502, | |
| "grad_norm": 4.269692420959473, | |
| "learning_rate": 7.978504570683523e-06, | |
| "loss": 0.980125367641449, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5063291139240507, | |
| "grad_norm": 3.801175355911255, | |
| "learning_rate": 7.977581101371354e-06, | |
| "loss": 0.4545478820800781, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.510548523206751, | |
| "grad_norm": 4.7520060539245605, | |
| "learning_rate": 7.97663827230404e-06, | |
| "loss": 1.4193034172058105, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5147679324894515, | |
| "grad_norm": 3.5392112731933594, | |
| "learning_rate": 7.975676088583614e-06, | |
| "loss": 0.8708986043930054, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5189873417721519, | |
| "grad_norm": 2.672224283218384, | |
| "learning_rate": 7.974694555416848e-06, | |
| "loss": 1.4755961894989014, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5232067510548524, | |
| "grad_norm": 1.7014986276626587, | |
| "learning_rate": 7.973693678115218e-06, | |
| "loss": 1.186201810836792, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5274261603375527, | |
| "grad_norm": 1.939778208732605, | |
| "learning_rate": 7.97267346209488e-06, | |
| "loss": 1.3046202659606934, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5316455696202531, | |
| "grad_norm": 9.32943344116211, | |
| "learning_rate": 7.971633912876644e-06, | |
| "loss": 1.1170387268066406, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5358649789029536, | |
| "grad_norm": 1.9259498119354248, | |
| "learning_rate": 7.97057503608593e-06, | |
| "loss": 1.2260360717773438, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.540084388185654, | |
| "grad_norm": 2.0438666343688965, | |
| "learning_rate": 7.969496837452762e-06, | |
| "loss": 1.1931499242782593, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5443037974683544, | |
| "grad_norm": 5.915661811828613, | |
| "learning_rate": 7.968399322811707e-06, | |
| "loss": 1.0251163244247437, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5485232067510548, | |
| "grad_norm": 5.656018257141113, | |
| "learning_rate": 7.967282498101866e-06, | |
| "loss": 0.9710787534713745, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5527426160337553, | |
| "grad_norm": 1.6091376543045044, | |
| "learning_rate": 7.966146369366839e-06, | |
| "loss": 1.2647578716278076, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5569620253164557, | |
| "grad_norm": 2.3578081130981445, | |
| "learning_rate": 7.96499094275468e-06, | |
| "loss": 1.2789413928985596, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5611814345991561, | |
| "grad_norm": 2.2791264057159424, | |
| "learning_rate": 7.963816224517875e-06, | |
| "loss": 1.2268846035003662, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5654008438818565, | |
| "grad_norm": 4.264853000640869, | |
| "learning_rate": 7.962622221013308e-06, | |
| "loss": 1.4937443733215332, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.569620253164557, | |
| "grad_norm": 3.884678602218628, | |
| "learning_rate": 7.961408938702217e-06, | |
| "loss": 1.050868034362793, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5738396624472574, | |
| "grad_norm": 3.3875067234039307, | |
| "learning_rate": 7.96017638415017e-06, | |
| "loss": 1.3021985292434692, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5780590717299579, | |
| "grad_norm": 2.571835994720459, | |
| "learning_rate": 7.958924564027025e-06, | |
| "loss": 1.1042567491531372, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5822784810126582, | |
| "grad_norm": 2.283672571182251, | |
| "learning_rate": 7.957653485106894e-06, | |
| "loss": 1.2543787956237793, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5864978902953587, | |
| "grad_norm": 3.0379388332366943, | |
| "learning_rate": 7.956363154268103e-06, | |
| "loss": 1.393994688987732, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5907172995780591, | |
| "grad_norm": 4.789394378662109, | |
| "learning_rate": 7.95505357849316e-06, | |
| "loss": 0.9629275798797607, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5949367088607594, | |
| "grad_norm": 7.168961524963379, | |
| "learning_rate": 7.953724764868716e-06, | |
| "loss": 1.330991268157959, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5991561181434599, | |
| "grad_norm": 2.166527271270752, | |
| "learning_rate": 7.952376720585524e-06, | |
| "loss": 1.54081130027771, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6033755274261603, | |
| "grad_norm": 0.7811316847801208, | |
| "learning_rate": 7.951009452938407e-06, | |
| "loss": 1.1209747791290283, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6075949367088608, | |
| "grad_norm": 1.7322338819503784, | |
| "learning_rate": 7.949622969326205e-06, | |
| "loss": 1.248884916305542, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6118143459915611, | |
| "grad_norm": 31.41063690185547, | |
| "learning_rate": 7.94821727725175e-06, | |
| "loss": 0.8109699487686157, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6160337552742616, | |
| "grad_norm": 2.779398202896118, | |
| "learning_rate": 7.946792384321818e-06, | |
| "loss": 0.6689173579216003, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.620253164556962, | |
| "grad_norm": 2.087568521499634, | |
| "learning_rate": 7.945348298247087e-06, | |
| "loss": 1.2270828485488892, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6244725738396625, | |
| "grad_norm": 5.774115085601807, | |
| "learning_rate": 7.943885026842097e-06, | |
| "loss": 0.7052218317985535, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6286919831223629, | |
| "grad_norm": 8.402689933776855, | |
| "learning_rate": 7.94240257802521e-06, | |
| "loss": 0.6502029895782471, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6329113924050633, | |
| "grad_norm": 6.8001556396484375, | |
| "learning_rate": 7.94090095981856e-06, | |
| "loss": 1.7823140621185303, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6371308016877637, | |
| "grad_norm": 1.8385719060897827, | |
| "learning_rate": 7.939380180348018e-06, | |
| "loss": 1.2579293251037598, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6413502109704642, | |
| "grad_norm": 3.531500816345215, | |
| "learning_rate": 7.937840247843148e-06, | |
| "loss": 0.8740752339363098, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6455696202531646, | |
| "grad_norm": 4.576239109039307, | |
| "learning_rate": 7.93628117063715e-06, | |
| "loss": 1.0002870559692383, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6497890295358649, | |
| "grad_norm": 1.4867647886276245, | |
| "learning_rate": 7.934702957166833e-06, | |
| "loss": 1.2564589977264404, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6540084388185654, | |
| "grad_norm": 2.41863751411438, | |
| "learning_rate": 7.933105615972553e-06, | |
| "loss": 1.1982673406600952, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6582278481012658, | |
| "grad_norm": 8.212380409240723, | |
| "learning_rate": 7.931489155698178e-06, | |
| "loss": 0.9985597729682922, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6624472573839663, | |
| "grad_norm": 1.5422508716583252, | |
| "learning_rate": 7.929853585091034e-06, | |
| "loss": 1.2045118808746338, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.346829652786255, | |
| "learning_rate": 7.928198913001865e-06, | |
| "loss": 1.0920261144638062, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6708860759493671, | |
| "grad_norm": 2.013681173324585, | |
| "learning_rate": 7.926525148384776e-06, | |
| "loss": 1.0814929008483887, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6751054852320675, | |
| "grad_norm": 6.084042072296143, | |
| "learning_rate": 7.924832300297197e-06, | |
| "loss": 1.0774112939834595, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.679324894514768, | |
| "grad_norm": 2.2710366249084473, | |
| "learning_rate": 7.923120377899818e-06, | |
| "loss": 0.872334897518158, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6835443037974683, | |
| "grad_norm": 4.1272969245910645, | |
| "learning_rate": 7.921389390456549e-06, | |
| "loss": 0.866448163986206, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6877637130801688, | |
| "grad_norm": 1.4605779647827148, | |
| "learning_rate": 7.919639347334477e-06, | |
| "loss": 0.8561316132545471, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6919831223628692, | |
| "grad_norm": 1.4929612874984741, | |
| "learning_rate": 7.917870258003798e-06, | |
| "loss": 0.8531728982925415, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6962025316455697, | |
| "grad_norm": 3.4534809589385986, | |
| "learning_rate": 7.916082132037782e-06, | |
| "loss": 1.5728954076766968, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.70042194092827, | |
| "grad_norm": 6.895410537719727, | |
| "learning_rate": 7.914274979112704e-06, | |
| "loss": 1.0785008668899536, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7046413502109705, | |
| "grad_norm": 2.275595188140869, | |
| "learning_rate": 7.912448809007812e-06, | |
| "loss": 1.3434700965881348, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7088607594936709, | |
| "grad_norm": 2.5494587421417236, | |
| "learning_rate": 7.910603631605259e-06, | |
| "loss": 1.2570360898971558, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7130801687763713, | |
| "grad_norm": 1.906052589416504, | |
| "learning_rate": 7.908739456890056e-06, | |
| "loss": 1.2807261943817139, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7172995780590717, | |
| "grad_norm": 8.606728553771973, | |
| "learning_rate": 7.906856294950012e-06, | |
| "loss": 1.254488468170166, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7215189873417721, | |
| "grad_norm": 1.998100757598877, | |
| "learning_rate": 7.90495415597569e-06, | |
| "loss": 1.249230146408081, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7257383966244726, | |
| "grad_norm": 1.9510746002197266, | |
| "learning_rate": 7.90303305026034e-06, | |
| "loss": 1.07149076461792, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.729957805907173, | |
| "grad_norm": 4.211492538452148, | |
| "learning_rate": 7.901092988199852e-06, | |
| "loss": 0.8842002153396606, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7341772151898734, | |
| "grad_norm": 2.758723497390747, | |
| "learning_rate": 7.899133980292698e-06, | |
| "loss": 1.2383522987365723, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7383966244725738, | |
| "grad_norm": 10.058371543884277, | |
| "learning_rate": 7.897156037139865e-06, | |
| "loss": 1.1752148866653442, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7426160337552743, | |
| "grad_norm": 13.896164894104004, | |
| "learning_rate": 7.89515916944482e-06, | |
| "loss": 1.0859854221343994, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7468354430379747, | |
| "grad_norm": 4.250057697296143, | |
| "learning_rate": 7.893143388013425e-06, | |
| "loss": 0.6642742156982422, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7510548523206751, | |
| "grad_norm": 9.338140487670898, | |
| "learning_rate": 7.891108703753902e-06, | |
| "loss": 1.047743320465088, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7552742616033755, | |
| "grad_norm": 3.788804054260254, | |
| "learning_rate": 7.88905512767676e-06, | |
| "loss": 1.0082635879516602, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.759493670886076, | |
| "grad_norm": 2.8348827362060547, | |
| "learning_rate": 7.886982670894736e-06, | |
| "loss": 1.210444688796997, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7637130801687764, | |
| "grad_norm": 4.0919718742370605, | |
| "learning_rate": 7.884891344622746e-06, | |
| "loss": 0.98717200756073, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7679324894514767, | |
| "grad_norm": 2.804220676422119, | |
| "learning_rate": 7.88278116017781e-06, | |
| "loss": 1.5726983547210693, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7721518987341772, | |
| "grad_norm": 11.296149253845215, | |
| "learning_rate": 7.880652128978999e-06, | |
| "loss": 0.7079776525497437, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7763713080168776, | |
| "grad_norm": 2.3584940433502197, | |
| "learning_rate": 7.878504262547373e-06, | |
| "loss": 1.1683130264282227, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7805907172995781, | |
| "grad_norm": 4.991513252258301, | |
| "learning_rate": 7.876337572505914e-06, | |
| "loss": 1.472283124923706, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7848101265822784, | |
| "grad_norm": 7.023402214050293, | |
| "learning_rate": 7.87415207057947e-06, | |
| "loss": 0.9845293760299683, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7890295358649789, | |
| "grad_norm": 5.190583229064941, | |
| "learning_rate": 7.871947768594688e-06, | |
| "loss": 1.0484483242034912, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.7932489451476793, | |
| "grad_norm": 2.0012083053588867, | |
| "learning_rate": 7.869724678479944e-06, | |
| "loss": 1.152682900428772, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7974683544303798, | |
| "grad_norm": 0.8431169986724854, | |
| "learning_rate": 7.86748281226529e-06, | |
| "loss": 1.119347333908081, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8016877637130801, | |
| "grad_norm": 1.5772409439086914, | |
| "learning_rate": 7.865222182082384e-06, | |
| "loss": 0.7247622013092041, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8059071729957806, | |
| "grad_norm": 4.462345123291016, | |
| "learning_rate": 7.862942800164416e-06, | |
| "loss": 1.2693476676940918, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.810126582278481, | |
| "grad_norm": 18.4215145111084, | |
| "learning_rate": 7.860644678846057e-06, | |
| "loss": 1.0132197141647339, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8143459915611815, | |
| "grad_norm": 2.6663124561309814, | |
| "learning_rate": 7.858327830563384e-06, | |
| "loss": 1.2443773746490479, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8185654008438819, | |
| "grad_norm": 7.3652448654174805, | |
| "learning_rate": 7.855992267853806e-06, | |
| "loss": 0.9400072693824768, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8227848101265823, | |
| "grad_norm": 33.57978820800781, | |
| "learning_rate": 7.85363800335601e-06, | |
| "loss": 1.216599941253662, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8270042194092827, | |
| "grad_norm": 4.298940181732178, | |
| "learning_rate": 7.851265049809886e-06, | |
| "loss": 0.9645065069198608, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8312236286919831, | |
| "grad_norm": 3.8016936779022217, | |
| "learning_rate": 7.848873420056456e-06, | |
| "loss": 0.9074147939682007, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8354430379746836, | |
| "grad_norm": 2.417340040206909, | |
| "learning_rate": 7.846463127037807e-06, | |
| "loss": 1.2312746047973633, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8396624472573839, | |
| "grad_norm": 0.5479583144187927, | |
| "learning_rate": 7.844034183797021e-06, | |
| "loss": 1.0866131782531738, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8438818565400844, | |
| "grad_norm": 1.3444585800170898, | |
| "learning_rate": 7.841586603478105e-06, | |
| "loss": 0.9458938837051392, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8481012658227848, | |
| "grad_norm": 5.396076679229736, | |
| "learning_rate": 7.839120399325913e-06, | |
| "loss": 0.7105859518051147, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8523206751054853, | |
| "grad_norm": 3.8646159172058105, | |
| "learning_rate": 7.836635584686089e-06, | |
| "loss": 1.21824049949646, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8565400843881856, | |
| "grad_norm": 1.9759020805358887, | |
| "learning_rate": 7.834132173004981e-06, | |
| "loss": 0.9442010521888733, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8607594936708861, | |
| "grad_norm": 2.555657148361206, | |
| "learning_rate": 7.831610177829574e-06, | |
| "loss": 0.9720205664634705, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8649789029535865, | |
| "grad_norm": 15.906916618347168, | |
| "learning_rate": 7.829069612807413e-06, | |
| "loss": 1.23225998878479, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.869198312236287, | |
| "grad_norm": 2.261281967163086, | |
| "learning_rate": 7.826510491686538e-06, | |
| "loss": 0.8622678518295288, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8734177215189873, | |
| "grad_norm": 30.68505859375, | |
| "learning_rate": 7.823932828315398e-06, | |
| "loss": 1.210330605506897, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8776371308016878, | |
| "grad_norm": 4.517366409301758, | |
| "learning_rate": 7.82133663664279e-06, | |
| "loss": 1.0655572414398193, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8818565400843882, | |
| "grad_norm": 8.33056926727295, | |
| "learning_rate": 7.81872193071776e-06, | |
| "loss": 1.163268804550171, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8860759493670886, | |
| "grad_norm": 1.1660995483398438, | |
| "learning_rate": 7.81608872468956e-06, | |
| "loss": 0.9473620653152466, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.890295358649789, | |
| "grad_norm": 1.9505749940872192, | |
| "learning_rate": 7.813437032807541e-06, | |
| "loss": 1.3407762050628662, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8945147679324894, | |
| "grad_norm": 13.3614501953125, | |
| "learning_rate": 7.810766869421092e-06, | |
| "loss": 0.9824624061584473, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8987341772151899, | |
| "grad_norm": 4.366639137268066, | |
| "learning_rate": 7.808078248979564e-06, | |
| "loss": 1.1881823539733887, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9029535864978903, | |
| "grad_norm": 1.7050063610076904, | |
| "learning_rate": 7.805371186032176e-06, | |
| "loss": 1.0488433837890625, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9071729957805907, | |
| "grad_norm": 7.406988143920898, | |
| "learning_rate": 7.80264569522796e-06, | |
| "loss": 1.0144481658935547, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9113924050632911, | |
| "grad_norm": 3.4340415000915527, | |
| "learning_rate": 7.799901791315658e-06, | |
| "loss": 1.0802500247955322, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9156118143459916, | |
| "grad_norm": 3.148068428039551, | |
| "learning_rate": 7.797139489143655e-06, | |
| "loss": 1.2489020824432373, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.919831223628692, | |
| "grad_norm": 4.874792098999023, | |
| "learning_rate": 7.794358803659903e-06, | |
| "loss": 0.8715201616287231, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9240506329113924, | |
| "grad_norm": 1.5414555072784424, | |
| "learning_rate": 7.791559749911826e-06, | |
| "loss": 1.2029755115509033, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9282700421940928, | |
| "grad_norm": 1.9815410375595093, | |
| "learning_rate": 7.788742343046248e-06, | |
| "loss": 0.9946187138557434, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9324894514767933, | |
| "grad_norm": 10.352864265441895, | |
| "learning_rate": 7.785906598309314e-06, | |
| "loss": 0.9312165975570679, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9367088607594937, | |
| "grad_norm": 8.163040161132812, | |
| "learning_rate": 7.783052531046397e-06, | |
| "loss": 1.0982768535614014, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9409282700421941, | |
| "grad_norm": 6.510847091674805, | |
| "learning_rate": 7.780180156702023e-06, | |
| "loss": 1.3022956848144531, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9451476793248945, | |
| "grad_norm": 1.658566951751709, | |
| "learning_rate": 7.777289490819783e-06, | |
| "loss": 1.0020906925201416, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9493670886075949, | |
| "grad_norm": 3.4121336936950684, | |
| "learning_rate": 7.774380549042255e-06, | |
| "loss": 0.9293044209480286, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9535864978902954, | |
| "grad_norm": 1.8673920631408691, | |
| "learning_rate": 7.771453347110913e-06, | |
| "loss": 1.2867658138275146, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9578059071729957, | |
| "grad_norm": 1.841113805770874, | |
| "learning_rate": 7.768507900866044e-06, | |
| "loss": 0.982481062412262, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9620253164556962, | |
| "grad_norm": 4.477797985076904, | |
| "learning_rate": 7.765544226246663e-06, | |
| "loss": 1.1560728549957275, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9662447257383966, | |
| "grad_norm": 2.086230993270874, | |
| "learning_rate": 7.762562339290425e-06, | |
| "loss": 0.8860993981361389, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9704641350210971, | |
| "grad_norm": 27.607803344726562, | |
| "learning_rate": 7.759562256133541e-06, | |
| "loss": 0.7165157794952393, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9746835443037974, | |
| "grad_norm": 9.253504753112793, | |
| "learning_rate": 7.75654399301069e-06, | |
| "loss": 0.9001080989837646, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9789029535864979, | |
| "grad_norm": 1.412550449371338, | |
| "learning_rate": 7.753507566254927e-06, | |
| "loss": 1.168654441833496, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9831223628691983, | |
| "grad_norm": 4.56351900100708, | |
| "learning_rate": 7.750452992297599e-06, | |
| "loss": 0.7248488664627075, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9873417721518988, | |
| "grad_norm": 2.234978199005127, | |
| "learning_rate": 7.747380287668257e-06, | |
| "loss": 1.3152525424957275, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.9915611814345991, | |
| "grad_norm": 7.138265609741211, | |
| "learning_rate": 7.744289468994562e-06, | |
| "loss": 0.8874726891517639, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9957805907172996, | |
| "grad_norm": 3.042675256729126, | |
| "learning_rate": 7.741180553002199e-06, | |
| "loss": 1.2144908905029297, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.7988324165344238, | |
| "learning_rate": 7.738053556514784e-06, | |
| "loss": 1.2585757970809937, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0042194092827004, | |
| "grad_norm": 6.92104434967041, | |
| "learning_rate": 7.734908496453774e-06, | |
| "loss": 1.060208797454834, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0084388185654007, | |
| "grad_norm": 1.8207498788833618, | |
| "learning_rate": 7.73174538983838e-06, | |
| "loss": 1.1424498558044434, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0126582278481013, | |
| "grad_norm": 2.2936971187591553, | |
| "learning_rate": 7.72856425378546e-06, | |
| "loss": 1.1568377017974854, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0168776371308017, | |
| "grad_norm": 48.475311279296875, | |
| "learning_rate": 7.725365105509444e-06, | |
| "loss": 0.8294604420661926, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.021097046413502, | |
| "grad_norm": 4.676328659057617, | |
| "learning_rate": 7.722147962322236e-06, | |
| "loss": 1.0818572044372559, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0253164556962024, | |
| "grad_norm": 5.833449840545654, | |
| "learning_rate": 7.718912841633112e-06, | |
| "loss": 0.5055439472198486, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.029535864978903, | |
| "grad_norm": 4.804643630981445, | |
| "learning_rate": 7.715659760948632e-06, | |
| "loss": 0.9713239073753357, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0337552742616034, | |
| "grad_norm": 3.2628095149993896, | |
| "learning_rate": 7.71238873787255e-06, | |
| "loss": 0.8403753042221069, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0379746835443038, | |
| "grad_norm": 2.2691550254821777, | |
| "learning_rate": 7.709099790105707e-06, | |
| "loss": 1.1320157051086426, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0421940928270041, | |
| "grad_norm": 19.545082092285156, | |
| "learning_rate": 7.705792935445948e-06, | |
| "loss": 0.8306432962417603, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0464135021097047, | |
| "grad_norm": 4.1693949699401855, | |
| "learning_rate": 7.702468191788014e-06, | |
| "loss": 0.9293802976608276, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0506329113924051, | |
| "grad_norm": 2.5364606380462646, | |
| "learning_rate": 7.699125577123455e-06, | |
| "loss": 1.2761287689208984, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0548523206751055, | |
| "grad_norm": 11.509333610534668, | |
| "learning_rate": 7.695765109540526e-06, | |
| "loss": 1.0367153882980347, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0590717299578059, | |
| "grad_norm": 2.0936784744262695, | |
| "learning_rate": 7.692386807224092e-06, | |
| "loss": 1.1410118341445923, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.0632911392405062, | |
| "grad_norm": 1.661012053489685, | |
| "learning_rate": 7.68899068845553e-06, | |
| "loss": 1.2624824047088623, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.0675105485232068, | |
| "grad_norm": 3.211272716522217, | |
| "learning_rate": 7.685576771612624e-06, | |
| "loss": 0.727929413318634, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0717299578059072, | |
| "grad_norm": 2.976832151412964, | |
| "learning_rate": 7.682145075169482e-06, | |
| "loss": 0.9856585264205933, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.0759493670886076, | |
| "grad_norm": 1.821731448173523, | |
| "learning_rate": 7.678695617696413e-06, | |
| "loss": 1.0898807048797607, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.080168776371308, | |
| "grad_norm": 3.3283071517944336, | |
| "learning_rate": 7.675228417859842e-06, | |
| "loss": 1.0197210311889648, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.0843881856540085, | |
| "grad_norm": 2.766814708709717, | |
| "learning_rate": 7.67174349442221e-06, | |
| "loss": 1.07771897315979, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.0886075949367089, | |
| "grad_norm": 2.035024881362915, | |
| "learning_rate": 7.66824086624186e-06, | |
| "loss": 1.0527117252349854, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.0928270042194093, | |
| "grad_norm": 2.086318254470825, | |
| "learning_rate": 7.664720552272948e-06, | |
| "loss": 0.9818480014801025, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.0970464135021096, | |
| "grad_norm": 2.671875, | |
| "learning_rate": 7.661182571565332e-06, | |
| "loss": 0.9276726245880127, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1012658227848102, | |
| "grad_norm": 8.45070743560791, | |
| "learning_rate": 7.657626943264474e-06, | |
| "loss": 0.8790248036384583, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1054852320675106, | |
| "grad_norm": 2.1058099269866943, | |
| "learning_rate": 7.654053686611334e-06, | |
| "loss": 1.0137805938720703, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.109704641350211, | |
| "grad_norm": 8.899941444396973, | |
| "learning_rate": 7.650462820942264e-06, | |
| "loss": 0.8656354546546936, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1139240506329113, | |
| "grad_norm": 2.253026247024536, | |
| "learning_rate": 7.64685436568891e-06, | |
| "loss": 1.2973110675811768, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1181434599156117, | |
| "grad_norm": 1.9157750606536865, | |
| "learning_rate": 7.643228340378098e-06, | |
| "loss": 1.208802342414856, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1223628691983123, | |
| "grad_norm": 3.9860875606536865, | |
| "learning_rate": 7.639584764631736e-06, | |
| "loss": 0.6784745454788208, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1265822784810127, | |
| "grad_norm": 4.096652984619141, | |
| "learning_rate": 7.6359236581667e-06, | |
| "loss": 0.969964861869812, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.130801687763713, | |
| "grad_norm": 2.2989354133605957, | |
| "learning_rate": 7.632245040794737e-06, | |
| "loss": 1.0640895366668701, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1350210970464134, | |
| "grad_norm": 2.0458743572235107, | |
| "learning_rate": 7.6285489324223534e-06, | |
| "loss": 0.8687632083892822, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.139240506329114, | |
| "grad_norm": 3.1451354026794434, | |
| "learning_rate": 7.6248353530507e-06, | |
| "loss": 0.8472435474395752, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1434599156118144, | |
| "grad_norm": 1.7947264909744263, | |
| "learning_rate": 7.621104322775477e-06, | |
| "loss": 0.7688232660293579, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1476793248945147, | |
| "grad_norm": 1.6323504447937012, | |
| "learning_rate": 7.617355861786813e-06, | |
| "loss": 0.7883434891700745, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1518987341772151, | |
| "grad_norm": 3.807711601257324, | |
| "learning_rate": 7.613589990369167e-06, | |
| "loss": 1.1249892711639404, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.1561181434599157, | |
| "grad_norm": 3.2176730632781982, | |
| "learning_rate": 7.6098067289012086e-06, | |
| "loss": 1.1086885929107666, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.160337552742616, | |
| "grad_norm": 8.372694969177246, | |
| "learning_rate": 7.606006097855713e-06, | |
| "loss": 0.8941718339920044, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1645569620253164, | |
| "grad_norm": 3.027027130126953, | |
| "learning_rate": 7.602188117799451e-06, | |
| "loss": 1.2869350910186768, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1687763713080168, | |
| "grad_norm": 3.2669458389282227, | |
| "learning_rate": 7.598352809393074e-06, | |
| "loss": 1.1333280801773071, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1729957805907172, | |
| "grad_norm": 30.656925201416016, | |
| "learning_rate": 7.594500193391006e-06, | |
| "loss": 0.6378011703491211, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.1772151898734178, | |
| "grad_norm": 4.539504528045654, | |
| "learning_rate": 7.590630290641327e-06, | |
| "loss": 1.1045113801956177, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.1814345991561181, | |
| "grad_norm": 26.276714324951172, | |
| "learning_rate": 7.586743122085666e-06, | |
| "loss": 0.5744074583053589, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1856540084388185, | |
| "grad_norm": 1.9147850275039673, | |
| "learning_rate": 7.582838708759082e-06, | |
| "loss": 0.7490895986557007, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.189873417721519, | |
| "grad_norm": 3.1463582515716553, | |
| "learning_rate": 7.5789170717899516e-06, | |
| "loss": 1.3113162517547607, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.1940928270042195, | |
| "grad_norm": 2.1219849586486816, | |
| "learning_rate": 7.57497823239986e-06, | |
| "loss": 1.0440800189971924, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.1983122362869199, | |
| "grad_norm": 4.773017406463623, | |
| "learning_rate": 7.571022211903475e-06, | |
| "loss": 0.8006106615066528, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2025316455696202, | |
| "grad_norm": 1.4802496433258057, | |
| "learning_rate": 7.567049031708445e-06, | |
| "loss": 1.0503010749816895, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2067510548523206, | |
| "grad_norm": 6.079049587249756, | |
| "learning_rate": 7.563058713315273e-06, | |
| "loss": 1.0806069374084473, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2109704641350212, | |
| "grad_norm": 2.4842324256896973, | |
| "learning_rate": 7.559051278317204e-06, | |
| "loss": 1.1085004806518555, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2151898734177216, | |
| "grad_norm": 1.6897258758544922, | |
| "learning_rate": 7.5550267484001084e-06, | |
| "loss": 0.9017348289489746, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.219409282700422, | |
| "grad_norm": 3.8387186527252197, | |
| "learning_rate": 7.5509851453423665e-06, | |
| "loss": 0.6278250813484192, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2236286919831223, | |
| "grad_norm": 4.397797107696533, | |
| "learning_rate": 7.546926491014742e-06, | |
| "loss": 0.9347223043441772, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2278481012658227, | |
| "grad_norm": 6.560842514038086, | |
| "learning_rate": 7.5428508073802765e-06, | |
| "loss": 0.8352513313293457, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2320675105485233, | |
| "grad_norm": 2.411914825439453, | |
| "learning_rate": 7.538758116494163e-06, | |
| "loss": 0.8624718189239502, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2362869198312236, | |
| "grad_norm": 13.554781913757324, | |
| "learning_rate": 7.534648440503624e-06, | |
| "loss": 0.9081147909164429, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.240506329113924, | |
| "grad_norm": 4.150012493133545, | |
| "learning_rate": 7.530521801647799e-06, | |
| "loss": 0.8333830833435059, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.2447257383966246, | |
| "grad_norm": 2.9597084522247314, | |
| "learning_rate": 7.52637822225762e-06, | |
| "loss": 1.1118628978729248, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.248945147679325, | |
| "grad_norm": 1.8890756368637085, | |
| "learning_rate": 7.522217724755688e-06, | |
| "loss": 0.58323734998703, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2531645569620253, | |
| "grad_norm": 1.4861680269241333, | |
| "learning_rate": 7.51804033165616e-06, | |
| "loss": 0.8740505576133728, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.2573839662447257, | |
| "grad_norm": 7.2344560623168945, | |
| "learning_rate": 7.513846065564618e-06, | |
| "loss": 0.7560818195343018, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.261603375527426, | |
| "grad_norm": 5.018416404724121, | |
| "learning_rate": 7.509634949177952e-06, | |
| "loss": 0.664783239364624, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.2658227848101267, | |
| "grad_norm": 1.6520812511444092, | |
| "learning_rate": 7.505407005284236e-06, | |
| "loss": 0.9736641645431519, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.270042194092827, | |
| "grad_norm": 6.091807842254639, | |
| "learning_rate": 7.5011622567626055e-06, | |
| "loss": 1.3401249647140503, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.2742616033755274, | |
| "grad_norm": 6.131025791168213, | |
| "learning_rate": 7.4969007265831284e-06, | |
| "loss": 1.0688127279281616, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.2784810126582278, | |
| "grad_norm": 5.798713207244873, | |
| "learning_rate": 7.4926224378066905e-06, | |
| "loss": 0.7948801517486572, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.2827004219409281, | |
| "grad_norm": 10.305594444274902, | |
| "learning_rate": 7.488327413584863e-06, | |
| "loss": 0.8427482843399048, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.2869198312236287, | |
| "grad_norm": 8.171891212463379, | |
| "learning_rate": 7.484015677159779e-06, | |
| "loss": 0.9117364883422852, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2911392405063291, | |
| "grad_norm": 5.7218804359436035, | |
| "learning_rate": 7.479687251864008e-06, | |
| "loss": 0.6430226564407349, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.2953586497890295, | |
| "grad_norm": 1.4333503246307373, | |
| "learning_rate": 7.47534216112043e-06, | |
| "loss": 0.6988530158996582, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.29957805907173, | |
| "grad_norm": 2.1469638347625732, | |
| "learning_rate": 7.4709804284421096e-06, | |
| "loss": 1.0747710466384888, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3037974683544304, | |
| "grad_norm": 2.6359219551086426, | |
| "learning_rate": 7.466602077432167e-06, | |
| "loss": 1.0839927196502686, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3080168776371308, | |
| "grad_norm": 2.7480428218841553, | |
| "learning_rate": 7.4622071317836495e-06, | |
| "loss": 0.6828069090843201, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3122362869198312, | |
| "grad_norm": 3.1572508811950684, | |
| "learning_rate": 7.45779561527941e-06, | |
| "loss": 0.7725319862365723, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3164556962025316, | |
| "grad_norm": 4.627418518066406, | |
| "learning_rate": 7.453367551791965e-06, | |
| "loss": 0.8618891835212708, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3206751054852321, | |
| "grad_norm": 13.366847038269043, | |
| "learning_rate": 7.448922965283379e-06, | |
| "loss": 1.0350444316864014, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3248945147679325, | |
| "grad_norm": 9.86931324005127, | |
| "learning_rate": 7.44446187980513e-06, | |
| "loss": 1.5988271236419678, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3291139240506329, | |
| "grad_norm": 2.421264171600342, | |
| "learning_rate": 7.439984319497975e-06, | |
| "loss": 0.8888686895370483, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 2.454784870147705, | |
| "learning_rate": 7.435490308591826e-06, | |
| "loss": 0.9130518436431885, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3375527426160336, | |
| "grad_norm": 1.1312861442565918, | |
| "learning_rate": 7.4309798714056145e-06, | |
| "loss": 0.7504403591156006, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.3417721518987342, | |
| "grad_norm": 6.462110996246338, | |
| "learning_rate": 7.4264530323471605e-06, | |
| "loss": 0.6684986352920532, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3459915611814346, | |
| "grad_norm": 34.968597412109375, | |
| "learning_rate": 7.421909815913044e-06, | |
| "loss": 0.7958526611328125, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.350210970464135, | |
| "grad_norm": 2.127056121826172, | |
| "learning_rate": 7.4173502466884655e-06, | |
| "loss": 1.2176686525344849, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3544303797468356, | |
| "grad_norm": 2.8063347339630127, | |
| "learning_rate": 7.412774349347123e-06, | |
| "loss": 0.781902015209198, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.358649789029536, | |
| "grad_norm": 2.1645452976226807, | |
| "learning_rate": 7.408182148651068e-06, | |
| "loss": 1.2542736530303955, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.3628691983122363, | |
| "grad_norm": 2.789949417114258, | |
| "learning_rate": 7.4035736694505765e-06, | |
| "loss": 1.1398190259933472, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.3670886075949367, | |
| "grad_norm": 3.9663474559783936, | |
| "learning_rate": 7.398948936684016e-06, | |
| "loss": 0.8999311923980713, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.371308016877637, | |
| "grad_norm": 3.7580132484436035, | |
| "learning_rate": 7.394307975377705e-06, | |
| "loss": 1.3543846607208252, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3755274261603376, | |
| "grad_norm": 2.3560047149658203, | |
| "learning_rate": 7.389650810645788e-06, | |
| "loss": 1.067474365234375, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.379746835443038, | |
| "grad_norm": 8.556138038635254, | |
| "learning_rate": 7.384977467690088e-06, | |
| "loss": 0.7700819373130798, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.3839662447257384, | |
| "grad_norm": 2.5057106018066406, | |
| "learning_rate": 7.380287971799974e-06, | |
| "loss": 1.1854264736175537, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.3881856540084387, | |
| "grad_norm": 3.844391107559204, | |
| "learning_rate": 7.37558234835223e-06, | |
| "loss": 0.6306626796722412, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.3924050632911391, | |
| "grad_norm": 5.850677013397217, | |
| "learning_rate": 7.370860622810906e-06, | |
| "loss": 1.1226918697357178, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.3966244725738397, | |
| "grad_norm": 3.9669106006622314, | |
| "learning_rate": 7.3661228207271954e-06, | |
| "loss": 0.7194290161132812, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.40084388185654, | |
| "grad_norm": 1.6219054460525513, | |
| "learning_rate": 7.3613689677392795e-06, | |
| "loss": 1.117922306060791, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4050632911392404, | |
| "grad_norm": 2.222062826156616, | |
| "learning_rate": 7.356599089572203e-06, | |
| "loss": 0.7130357027053833, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.409282700421941, | |
| "grad_norm": 2.1885979175567627, | |
| "learning_rate": 7.35181321203773e-06, | |
| "loss": 1.0384951829910278, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4135021097046414, | |
| "grad_norm": 1.6668306589126587, | |
| "learning_rate": 7.3470113610342025e-06, | |
| "loss": 1.1504696607589722, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4177215189873418, | |
| "grad_norm": 3.1233675479888916, | |
| "learning_rate": 7.342193562546399e-06, | |
| "loss": 1.2578648328781128, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4219409282700421, | |
| "grad_norm": 1.6057863235473633, | |
| "learning_rate": 7.337359842645397e-06, | |
| "loss": 0.6594195365905762, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4261603375527425, | |
| "grad_norm": 4.513533592224121, | |
| "learning_rate": 7.332510227488436e-06, | |
| "loss": 0.6804168224334717, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4303797468354431, | |
| "grad_norm": 3.0312631130218506, | |
| "learning_rate": 7.327644743318766e-06, | |
| "loss": 0.9876019954681396, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4345991561181435, | |
| "grad_norm": 5.478196144104004, | |
| "learning_rate": 7.322763416465513e-06, | |
| "loss": 1.087882399559021, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4388185654008439, | |
| "grad_norm": 4.24491024017334, | |
| "learning_rate": 7.317866273343534e-06, | |
| "loss": 0.8171271085739136, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.4430379746835442, | |
| "grad_norm": 1.7392427921295166, | |
| "learning_rate": 7.312953340453274e-06, | |
| "loss": 1.1222814321517944, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4472573839662446, | |
| "grad_norm": 6.056474208831787, | |
| "learning_rate": 7.308024644380625e-06, | |
| "loss": 0.7576450109481812, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4514767932489452, | |
| "grad_norm": 3.2466626167297363, | |
| "learning_rate": 7.303080211796774e-06, | |
| "loss": 0.8898618221282959, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.4556962025316456, | |
| "grad_norm": 8.08736515045166, | |
| "learning_rate": 7.298120069458071e-06, | |
| "loss": 1.0252546072006226, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.459915611814346, | |
| "grad_norm": 6.501577377319336, | |
| "learning_rate": 7.293144244205875e-06, | |
| "loss": 0.6603308916091919, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.4641350210970465, | |
| "grad_norm": 2.7896158695220947, | |
| "learning_rate": 7.288152762966415e-06, | |
| "loss": 1.225158452987671, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.4683544303797469, | |
| "grad_norm": 4.285532474517822, | |
| "learning_rate": 7.283145652750635e-06, | |
| "loss": 0.5561915636062622, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.4725738396624473, | |
| "grad_norm": 1.7746226787567139, | |
| "learning_rate": 7.27812294065406e-06, | |
| "loss": 1.1419634819030762, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.4767932489451476, | |
| "grad_norm": 1.805708408355713, | |
| "learning_rate": 7.2730846538566375e-06, | |
| "loss": 1.178218126296997, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.481012658227848, | |
| "grad_norm": 5.747068405151367, | |
| "learning_rate": 7.2680308196226e-06, | |
| "loss": 0.989362359046936, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.4852320675105486, | |
| "grad_norm": 7.557291030883789, | |
| "learning_rate": 7.262961465300312e-06, | |
| "loss": 0.7833366394042969, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.489451476793249, | |
| "grad_norm": 1.8191746473312378, | |
| "learning_rate": 7.257876618322125e-06, | |
| "loss": 1.1235054731369019, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.4936708860759493, | |
| "grad_norm": 2.4673242568969727, | |
| "learning_rate": 7.252776306204226e-06, | |
| "loss": 1.1172146797180176, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.49789029535865, | |
| "grad_norm": 1.062907099723816, | |
| "learning_rate": 7.247660556546489e-06, | |
| "loss": 0.984178900718689, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.50210970464135, | |
| "grad_norm": 2.470454692840576, | |
| "learning_rate": 7.242529397032332e-06, | |
| "loss": 0.467578649520874, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5063291139240507, | |
| "grad_norm": 4.536571979522705, | |
| "learning_rate": 7.237382855428555e-06, | |
| "loss": 1.0994584560394287, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.510548523206751, | |
| "grad_norm": 1.8687479496002197, | |
| "learning_rate": 7.232220959585203e-06, | |
| "loss": 1.0004863739013672, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5147679324894514, | |
| "grad_norm": 1.6920982599258423, | |
| "learning_rate": 7.227043737435406e-06, | |
| "loss": 1.0185657739639282, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.518987341772152, | |
| "grad_norm": 3.6787755489349365, | |
| "learning_rate": 7.221851216995229e-06, | |
| "loss": 1.0142695903778076, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5232067510548524, | |
| "grad_norm": 3.2205076217651367, | |
| "learning_rate": 7.216643426363528e-06, | |
| "loss": 1.115187644958496, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5274261603375527, | |
| "grad_norm": 2.2605745792388916, | |
| "learning_rate": 7.211420393721787e-06, | |
| "loss": 1.204482078552246, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5316455696202531, | |
| "grad_norm": 4.8364667892456055, | |
| "learning_rate": 7.206182147333974e-06, | |
| "loss": 0.6358588933944702, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.5358649789029535, | |
| "grad_norm": 6.24644136428833, | |
| "learning_rate": 7.200928715546382e-06, | |
| "loss": 0.3853702247142792, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.540084388185654, | |
| "grad_norm": 2.5594065189361572, | |
| "learning_rate": 7.1956601267874806e-06, | |
| "loss": 0.8702763319015503, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5443037974683544, | |
| "grad_norm": 1.8882993459701538, | |
| "learning_rate": 7.1903764095677595e-06, | |
| "loss": 0.958168625831604, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.5485232067510548, | |
| "grad_norm": 2.2798001766204834, | |
| "learning_rate": 7.185077592479573e-06, | |
| "loss": 1.0963058471679688, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5527426160337554, | |
| "grad_norm": 3.2573153972625732, | |
| "learning_rate": 7.17976370419699e-06, | |
| "loss": 0.851632833480835, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.5569620253164556, | |
| "grad_norm": 3.9381816387176514, | |
| "learning_rate": 7.174434773475635e-06, | |
| "loss": 0.9015741944313049, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.5611814345991561, | |
| "grad_norm": 2.2864365577697754, | |
| "learning_rate": 7.169090829152531e-06, | |
| "loss": 1.0464608669281006, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5654008438818565, | |
| "grad_norm": 1.112131118774414, | |
| "learning_rate": 7.163731900145947e-06, | |
| "loss": 0.6916845440864563, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.5696202531645569, | |
| "grad_norm": 4.296708583831787, | |
| "learning_rate": 7.158358015455243e-06, | |
| "loss": 0.5111595392227173, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.5738396624472575, | |
| "grad_norm": 3.6669418811798096, | |
| "learning_rate": 7.152969204160704e-06, | |
| "loss": 0.6755394339561462, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.5780590717299579, | |
| "grad_norm": 14.373259544372559, | |
| "learning_rate": 7.147565495423394e-06, | |
| "loss": 0.6762098073959351, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.5822784810126582, | |
| "grad_norm": 1.3357782363891602, | |
| "learning_rate": 7.142146918484996e-06, | |
| "loss": 1.0881752967834473, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.5864978902953588, | |
| "grad_norm": 2.055518627166748, | |
| "learning_rate": 7.13671350266764e-06, | |
| "loss": 0.6121246814727783, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.590717299578059, | |
| "grad_norm": 4.617133617401123, | |
| "learning_rate": 7.131265277373768e-06, | |
| "loss": 0.7100765705108643, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.5949367088607596, | |
| "grad_norm": 2.564394950866699, | |
| "learning_rate": 7.125802272085954e-06, | |
| "loss": 0.5700541734695435, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.59915611814346, | |
| "grad_norm": 3.551905393600464, | |
| "learning_rate": 7.120324516366754e-06, | |
| "loss": 0.8716294765472412, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6033755274261603, | |
| "grad_norm": 2.745734930038452, | |
| "learning_rate": 7.114832039858547e-06, | |
| "loss": 1.1156964302062988, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6075949367088609, | |
| "grad_norm": 2.40625262260437, | |
| "learning_rate": 7.109324872283371e-06, | |
| "loss": 0.6260151863098145, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.611814345991561, | |
| "grad_norm": 1.5272423028945923, | |
| "learning_rate": 7.10380304344276e-06, | |
| "loss": 1.1335524320602417, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6160337552742616, | |
| "grad_norm": 4.155389785766602, | |
| "learning_rate": 7.098266583217592e-06, | |
| "loss": 1.0015907287597656, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.620253164556962, | |
| "grad_norm": 0.7669569849967957, | |
| "learning_rate": 7.0927155215679175e-06, | |
| "loss": 0.8719363212585449, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6244725738396624, | |
| "grad_norm": 2.079810380935669, | |
| "learning_rate": 7.087149888532803e-06, | |
| "loss": 1.0807125568389893, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.628691983122363, | |
| "grad_norm": 1.8407090902328491, | |
| "learning_rate": 7.081569714230168e-06, | |
| "loss": 1.1782212257385254, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6329113924050633, | |
| "grad_norm": 2.1639020442962646, | |
| "learning_rate": 7.075975028856614e-06, | |
| "loss": 1.0721291303634644, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.6371308016877637, | |
| "grad_norm": 4.351590633392334, | |
| "learning_rate": 7.070365862687276e-06, | |
| "loss": 1.0304412841796875, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6413502109704643, | |
| "grad_norm": 3.4083518981933594, | |
| "learning_rate": 7.064742246075647e-06, | |
| "loss": 0.8413809537887573, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.6455696202531644, | |
| "grad_norm": 4.749937534332275, | |
| "learning_rate": 7.059104209453417e-06, | |
| "loss": 0.3687572479248047, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.649789029535865, | |
| "grad_norm": 9.06319808959961, | |
| "learning_rate": 7.0534517833303085e-06, | |
| "loss": 1.0481884479522705, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.6540084388185654, | |
| "grad_norm": 4.062221527099609, | |
| "learning_rate": 7.047784998293913e-06, | |
| "loss": 0.8559701442718506, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.6582278481012658, | |
| "grad_norm": 10.564360618591309, | |
| "learning_rate": 7.0421038850095235e-06, | |
| "loss": 1.074246883392334, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.6624472573839664, | |
| "grad_norm": 12.672545433044434, | |
| "learning_rate": 7.036408474219966e-06, | |
| "loss": 0.5824824571609497, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 2.3040852546691895, | |
| "learning_rate": 7.03069879674544e-06, | |
| "loss": 1.0848541259765625, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6708860759493671, | |
| "grad_norm": 4.974740982055664, | |
| "learning_rate": 7.024974883483347e-06, | |
| "loss": 0.5032600164413452, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.6751054852320675, | |
| "grad_norm": 0.5375344753265381, | |
| "learning_rate": 7.019236765408122e-06, | |
| "loss": 0.7874377965927124, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.6793248945147679, | |
| "grad_norm": 2.140566349029541, | |
| "learning_rate": 7.013484473571073e-06, | |
| "loss": 1.0540302991867065, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.6835443037974684, | |
| "grad_norm": 6.119294166564941, | |
| "learning_rate": 7.007718039100201e-06, | |
| "loss": 0.8562701940536499, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.6877637130801688, | |
| "grad_norm": 11.767963409423828, | |
| "learning_rate": 7.001937493200045e-06, | |
| "loss": 1.2052388191223145, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6919831223628692, | |
| "grad_norm": 17.296886444091797, | |
| "learning_rate": 6.996142867151502e-06, | |
| "loss": 0.6549183130264282, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.6962025316455698, | |
| "grad_norm": 2.24831485748291, | |
| "learning_rate": 6.990334192311668e-06, | |
| "loss": 1.2283351421356201, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.70042194092827, | |
| "grad_norm": 2.4578514099121094, | |
| "learning_rate": 6.9845115001136545e-06, | |
| "loss": 1.1071836948394775, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7046413502109705, | |
| "grad_norm": 10.491120338439941, | |
| "learning_rate": 6.978674822066434e-06, | |
| "loss": 0.7744203805923462, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7088607594936709, | |
| "grad_norm": 9.302081108093262, | |
| "learning_rate": 6.97282418975466e-06, | |
| "loss": 0.8782643675804138, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7130801687763713, | |
| "grad_norm": 1.8191728591918945, | |
| "learning_rate": 6.966959634838495e-06, | |
| "loss": 1.128312349319458, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.7172995780590719, | |
| "grad_norm": 5.150999069213867, | |
| "learning_rate": 6.961081189053449e-06, | |
| "loss": 1.454809546470642, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.721518987341772, | |
| "grad_norm": 1.7257297039031982, | |
| "learning_rate": 6.955188884210195e-06, | |
| "loss": 1.0828335285186768, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.7257383966244726, | |
| "grad_norm": 4.542337894439697, | |
| "learning_rate": 6.9492827521944066e-06, | |
| "loss": 0.8022172451019287, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.729957805907173, | |
| "grad_norm": 8.734732627868652, | |
| "learning_rate": 6.943362824966579e-06, | |
| "loss": 0.46849238872528076, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7341772151898733, | |
| "grad_norm": 7.7328200340271, | |
| "learning_rate": 6.937429134561862e-06, | |
| "loss": 0.5579560995101929, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.738396624472574, | |
| "grad_norm": 2.0381147861480713, | |
| "learning_rate": 6.9314817130898826e-06, | |
| "loss": 0.7268804311752319, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.7426160337552743, | |
| "grad_norm": 1.5466476678848267, | |
| "learning_rate": 6.925520592734571e-06, | |
| "loss": 0.9139357805252075, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7468354430379747, | |
| "grad_norm": 9.312906265258789, | |
| "learning_rate": 6.919545805753988e-06, | |
| "loss": 0.9899505376815796, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.7510548523206753, | |
| "grad_norm": 1.7241586446762085, | |
| "learning_rate": 6.913557384480151e-06, | |
| "loss": 1.066752314567566, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7552742616033754, | |
| "grad_norm": 1.62288236618042, | |
| "learning_rate": 6.907555361318859e-06, | |
| "loss": 1.0838364362716675, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.759493670886076, | |
| "grad_norm": 1.7631701231002808, | |
| "learning_rate": 6.901539768749513e-06, | |
| "loss": 0.8664329051971436, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.7637130801687764, | |
| "grad_norm": 1.4140545129776, | |
| "learning_rate": 6.895510639324947e-06, | |
| "loss": 1.0501675605773926, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.7679324894514767, | |
| "grad_norm": 2.3473498821258545, | |
| "learning_rate": 6.889468005671248e-06, | |
| "loss": 0.9035965204238892, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.7721518987341773, | |
| "grad_norm": 4.273458003997803, | |
| "learning_rate": 6.883411900487578e-06, | |
| "loss": 0.7962709665298462, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7763713080168775, | |
| "grad_norm": 7.660892963409424, | |
| "learning_rate": 6.877342356545999e-06, | |
| "loss": 0.9311078190803528, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.780590717299578, | |
| "grad_norm": 1.3525784015655518, | |
| "learning_rate": 6.871259406691299e-06, | |
| "loss": 0.46452265977859497, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.7848101265822784, | |
| "grad_norm": 1.2650240659713745, | |
| "learning_rate": 6.865163083840808e-06, | |
| "loss": 0.653459370136261, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.7890295358649788, | |
| "grad_norm": 16.892318725585938, | |
| "learning_rate": 6.859053420984222e-06, | |
| "loss": 0.587769091129303, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.7932489451476794, | |
| "grad_norm": 3.177400588989258, | |
| "learning_rate": 6.852930451183426e-06, | |
| "loss": 0.8080633878707886, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.7974683544303798, | |
| "grad_norm": 2.106072187423706, | |
| "learning_rate": 6.846794207572317e-06, | |
| "loss": 1.09242582321167, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8016877637130801, | |
| "grad_norm": 3.0559608936309814, | |
| "learning_rate": 6.840644723356619e-06, | |
| "loss": 1.4061119556427002, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8059071729957807, | |
| "grad_norm": 0.9994240403175354, | |
| "learning_rate": 6.834482031813709e-06, | |
| "loss": 0.7950407862663269, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.810126582278481, | |
| "grad_norm": 17.731142044067383, | |
| "learning_rate": 6.82830616629243e-06, | |
| "loss": 1.0046894550323486, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.8143459915611815, | |
| "grad_norm": 1.822799801826477, | |
| "learning_rate": 6.822117160212916e-06, | |
| "loss": 0.6357040405273438, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8185654008438819, | |
| "grad_norm": 3.4448676109313965, | |
| "learning_rate": 6.815915047066415e-06, | |
| "loss": 1.0787222385406494, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.8227848101265822, | |
| "grad_norm": 1.7101033926010132, | |
| "learning_rate": 6.809699860415097e-06, | |
| "loss": 1.0257686376571655, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8270042194092828, | |
| "grad_norm": 6.336645603179932, | |
| "learning_rate": 6.80347163389188e-06, | |
| "loss": 0.9438542127609253, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.831223628691983, | |
| "grad_norm": 4.888845920562744, | |
| "learning_rate": 6.797230401200247e-06, | |
| "loss": 0.9173398017883301, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.8354430379746836, | |
| "grad_norm": 1.9469853639602661, | |
| "learning_rate": 6.790976196114059e-06, | |
| "loss": 1.036512851715088, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.839662447257384, | |
| "grad_norm": 5.8768815994262695, | |
| "learning_rate": 6.784709052477382e-06, | |
| "loss": 0.8006809949874878, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8438818565400843, | |
| "grad_norm": 2.4700989723205566, | |
| "learning_rate": 6.7784290042042924e-06, | |
| "loss": 0.9556717276573181, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.8481012658227849, | |
| "grad_norm": 3.1491811275482178, | |
| "learning_rate": 6.772136085278703e-06, | |
| "loss": 1.1224122047424316, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8523206751054853, | |
| "grad_norm": 23.419981002807617, | |
| "learning_rate": 6.765830329754171e-06, | |
| "loss": 0.7619462013244629, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.8565400843881856, | |
| "grad_norm": 4.361552715301514, | |
| "learning_rate": 6.7595117717537186e-06, | |
| "loss": 0.6938849687576294, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8607594936708862, | |
| "grad_norm": 3.267629623413086, | |
| "learning_rate": 6.753180445469651e-06, | |
| "loss": 0.8586090803146362, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.8649789029535864, | |
| "grad_norm": 1.915306806564331, | |
| "learning_rate": 6.746836385163365e-06, | |
| "loss": 0.7172484397888184, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.869198312236287, | |
| "grad_norm": 10.15339183807373, | |
| "learning_rate": 6.740479625165166e-06, | |
| "loss": 0.7663919925689697, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.8734177215189873, | |
| "grad_norm": 1.4916582107543945, | |
| "learning_rate": 6.734110199874082e-06, | |
| "loss": 1.0811569690704346, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.8776371308016877, | |
| "grad_norm": 3.9946820735931396, | |
| "learning_rate": 6.727728143757681e-06, | |
| "loss": 0.4816530644893646, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8818565400843883, | |
| "grad_norm": 1.4981932640075684, | |
| "learning_rate": 6.7213334913518795e-06, | |
| "loss": 0.6716771721839905, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.8860759493670884, | |
| "grad_norm": 1.4426230192184448, | |
| "learning_rate": 6.714926277260759e-06, | |
| "loss": 1.055748462677002, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.890295358649789, | |
| "grad_norm": 2.779737949371338, | |
| "learning_rate": 6.708506536156375e-06, | |
| "loss": 1.2872055768966675, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.8945147679324894, | |
| "grad_norm": 18.332468032836914, | |
| "learning_rate": 6.702074302778574e-06, | |
| "loss": 0.7888720631599426, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.8987341772151898, | |
| "grad_norm": 4.269297122955322, | |
| "learning_rate": 6.695629611934803e-06, | |
| "loss": 0.9360828995704651, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9029535864978904, | |
| "grad_norm": 3.4333150386810303, | |
| "learning_rate": 6.689172498499919e-06, | |
| "loss": 1.1581498384475708, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9071729957805907, | |
| "grad_norm": 3.325373888015747, | |
| "learning_rate": 6.6827029974160085e-06, | |
| "loss": 1.0004583597183228, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9113924050632911, | |
| "grad_norm": 1.337743878364563, | |
| "learning_rate": 6.676221143692186e-06, | |
| "loss": 1.2600127458572388, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9156118143459917, | |
| "grad_norm": 9.333498001098633, | |
| "learning_rate": 6.669726972404415e-06, | |
| "loss": 0.5244170427322388, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9198312236286919, | |
| "grad_norm": 1.4464713335037231, | |
| "learning_rate": 6.663220518695314e-06, | |
| "loss": 1.0309032201766968, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9240506329113924, | |
| "grad_norm": 3.8824985027313232, | |
| "learning_rate": 6.656701817773966e-06, | |
| "loss": 0.7978178262710571, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9282700421940928, | |
| "grad_norm": 2.117260217666626, | |
| "learning_rate": 6.650170904915727e-06, | |
| "loss": 1.1143381595611572, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9324894514767932, | |
| "grad_norm": 2.3016726970672607, | |
| "learning_rate": 6.643627815462041e-06, | |
| "loss": 0.7327609062194824, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.9367088607594938, | |
| "grad_norm": 1.3250812292099, | |
| "learning_rate": 6.637072584820241e-06, | |
| "loss": 1.0381274223327637, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.9409282700421941, | |
| "grad_norm": 3.2023909091949463, | |
| "learning_rate": 6.630505248463364e-06, | |
| "loss": 0.5368826985359192, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9451476793248945, | |
| "grad_norm": 1.5674402713775635, | |
| "learning_rate": 6.623925841929953e-06, | |
| "loss": 1.0610504150390625, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.9493670886075949, | |
| "grad_norm": 1.7156344652175903, | |
| "learning_rate": 6.617334400823867e-06, | |
| "loss": 1.154762625694275, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.9535864978902953, | |
| "grad_norm": 1.6762484312057495, | |
| "learning_rate": 6.610730960814092e-06, | |
| "loss": 0.8508365154266357, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.9578059071729959, | |
| "grad_norm": 1.3070154190063477, | |
| "learning_rate": 6.604115557634545e-06, | |
| "loss": 0.7161068916320801, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.9620253164556962, | |
| "grad_norm": 2.4822962284088135, | |
| "learning_rate": 6.597488227083879e-06, | |
| "loss": 1.1143286228179932, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.9662447257383966, | |
| "grad_norm": 2.1459968090057373, | |
| "learning_rate": 6.590849005025289e-06, | |
| "loss": 0.8785426020622253, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.9704641350210972, | |
| "grad_norm": 18.12381935119629, | |
| "learning_rate": 6.584197927386326e-06, | |
| "loss": 1.200589656829834, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.9746835443037973, | |
| "grad_norm": 1.572724461555481, | |
| "learning_rate": 6.577535030158689e-06, | |
| "loss": 1.1270561218261719, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.978902953586498, | |
| "grad_norm": 0.8099290132522583, | |
| "learning_rate": 6.570860349398041e-06, | |
| "loss": 0.6693128347396851, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.9831223628691983, | |
| "grad_norm": 1.4404888153076172, | |
| "learning_rate": 6.5641739212238136e-06, | |
| "loss": 1.1134912967681885, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9873417721518987, | |
| "grad_norm": 11.569640159606934, | |
| "learning_rate": 6.557475781819004e-06, | |
| "loss": 0.9092779159545898, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.9915611814345993, | |
| "grad_norm": 2.00720477104187, | |
| "learning_rate": 6.550765967429984e-06, | |
| "loss": 0.7343477010726929, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.9957805907172996, | |
| "grad_norm": 8.226292610168457, | |
| "learning_rate": 6.544044514366306e-06, | |
| "loss": 1.0801680088043213, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.7631890773773193, | |
| "learning_rate": 6.537311459000502e-06, | |
| "loss": 0.5224167108535767, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.0042194092827006, | |
| "grad_norm": 4.7062296867370605, | |
| "learning_rate": 6.53056683776789e-06, | |
| "loss": 0.8504010438919067, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0084388185654007, | |
| "grad_norm": 8.53116512298584, | |
| "learning_rate": 6.5238106871663755e-06, | |
| "loss": 0.6483380794525146, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.0126582278481013, | |
| "grad_norm": 3.5020530223846436, | |
| "learning_rate": 6.517043043756252e-06, | |
| "loss": 0.8229789733886719, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.0168776371308015, | |
| "grad_norm": 2.5090668201446533, | |
| "learning_rate": 6.5102639441600086e-06, | |
| "loss": 0.868636965751648, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.021097046413502, | |
| "grad_norm": 6.097753047943115, | |
| "learning_rate": 6.503473425062126e-06, | |
| "loss": 0.6441227197647095, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.0253164556962027, | |
| "grad_norm": 3.8150646686553955, | |
| "learning_rate": 6.4966715232088835e-06, | |
| "loss": 0.7223113179206848, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.029535864978903, | |
| "grad_norm": 2.547377109527588, | |
| "learning_rate": 6.489858275408152e-06, | |
| "loss": 1.046697735786438, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.0337552742616034, | |
| "grad_norm": 2.0660111904144287, | |
| "learning_rate": 6.483033718529204e-06, | |
| "loss": 0.7585334777832031, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.037974683544304, | |
| "grad_norm": 1.3951258659362793, | |
| "learning_rate": 6.476197889502512e-06, | |
| "loss": 0.571182370185852, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.042194092827004, | |
| "grad_norm": 5.060699939727783, | |
| "learning_rate": 6.46935082531954e-06, | |
| "loss": 0.6977952718734741, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.0464135021097047, | |
| "grad_norm": 9.534734725952148, | |
| "learning_rate": 6.4624925630325555e-06, | |
| "loss": 0.924410343170166, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.050632911392405, | |
| "grad_norm": 10.224217414855957, | |
| "learning_rate": 6.455623139754423e-06, | |
| "loss": 0.7734869122505188, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.0548523206751055, | |
| "grad_norm": 2.367072343826294, | |
| "learning_rate": 6.4487425926584005e-06, | |
| "loss": 0.762604832649231, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.059071729957806, | |
| "grad_norm": 3.0896172523498535, | |
| "learning_rate": 6.441850958977945e-06, | |
| "loss": 0.6143279075622559, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.0632911392405062, | |
| "grad_norm": 1.7992668151855469, | |
| "learning_rate": 6.434948276006505e-06, | |
| "loss": 0.6615221500396729, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.067510548523207, | |
| "grad_norm": 3.8281936645507812, | |
| "learning_rate": 6.4280345810973225e-06, | |
| "loss": 0.6476603150367737, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.071729957805907, | |
| "grad_norm": 1.7640984058380127, | |
| "learning_rate": 6.42110991166323e-06, | |
| "loss": 0.8162950277328491, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.0759493670886076, | |
| "grad_norm": 4.995830059051514, | |
| "learning_rate": 6.414174305176448e-06, | |
| "loss": 0.9169092774391174, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.080168776371308, | |
| "grad_norm": 1.7362895011901855, | |
| "learning_rate": 6.407227799168378e-06, | |
| "loss": 0.9022603034973145, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.0843881856540083, | |
| "grad_norm": 2.571808338165283, | |
| "learning_rate": 6.400270431229409e-06, | |
| "loss": 0.9147624969482422, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.088607594936709, | |
| "grad_norm": 9.426867485046387, | |
| "learning_rate": 6.393302239008705e-06, | |
| "loss": 0.46702778339385986, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.0928270042194095, | |
| "grad_norm": 1.8141038417816162, | |
| "learning_rate": 6.386323260214006e-06, | |
| "loss": 0.49038439989089966, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.0970464135021096, | |
| "grad_norm": 2.054802894592285, | |
| "learning_rate": 6.37933353261142e-06, | |
| "loss": 1.0175153017044067, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.1012658227848102, | |
| "grad_norm": 5.117776870727539, | |
| "learning_rate": 6.372333094025224e-06, | |
| "loss": 0.8956054449081421, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.1054852320675104, | |
| "grad_norm": 0.727590024471283, | |
| "learning_rate": 6.365321982337655e-06, | |
| "loss": 0.5565606951713562, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.109704641350211, | |
| "grad_norm": 2.0183682441711426, | |
| "learning_rate": 6.3583002354887065e-06, | |
| "loss": 1.0998228788375854, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.1139240506329116, | |
| "grad_norm": 3.211463689804077, | |
| "learning_rate": 6.351267891475925e-06, | |
| "loss": 0.8330961465835571, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.1181434599156117, | |
| "grad_norm": 4.385818004608154, | |
| "learning_rate": 6.344224988354201e-06, | |
| "loss": 0.8911874294281006, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.1223628691983123, | |
| "grad_norm": 1.8507286310195923, | |
| "learning_rate": 6.3371715642355665e-06, | |
| "loss": 0.5850310325622559, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.1265822784810124, | |
| "grad_norm": 1.5357205867767334, | |
| "learning_rate": 6.3301076572889804e-06, | |
| "loss": 0.6495864391326904, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.130801687763713, | |
| "grad_norm": 1.9483667612075806, | |
| "learning_rate": 6.32303330574014e-06, | |
| "loss": 0.6409696340560913, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.1350210970464136, | |
| "grad_norm": 5.264192581176758, | |
| "learning_rate": 6.3159485478712504e-06, | |
| "loss": 0.8244346976280212, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.1392405063291138, | |
| "grad_norm": 6.72544527053833, | |
| "learning_rate": 6.308853422020838e-06, | |
| "loss": 1.0458412170410156, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.1434599156118144, | |
| "grad_norm": 4.4975738525390625, | |
| "learning_rate": 6.301747966583533e-06, | |
| "loss": 0.5240525007247925, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.147679324894515, | |
| "grad_norm": 2.4814205169677734, | |
| "learning_rate": 6.294632220009858e-06, | |
| "loss": 0.7953197360038757, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.151898734177215, | |
| "grad_norm": 1.7783337831497192, | |
| "learning_rate": 6.2875062208060345e-06, | |
| "loss": 0.6177500486373901, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.1561181434599157, | |
| "grad_norm": 4.14943790435791, | |
| "learning_rate": 6.280370007533755e-06, | |
| "loss": 0.7844660878181458, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.160337552742616, | |
| "grad_norm": 17.742002487182617, | |
| "learning_rate": 6.2732236188099925e-06, | |
| "loss": 0.7165024280548096, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.1645569620253164, | |
| "grad_norm": 1.9838588237762451, | |
| "learning_rate": 6.266067093306778e-06, | |
| "loss": 0.9177765846252441, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.168776371308017, | |
| "grad_norm": 1.8689168691635132, | |
| "learning_rate": 6.258900469751002e-06, | |
| "loss": 0.9903367757797241, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.172995780590717, | |
| "grad_norm": 1.8121206760406494, | |
| "learning_rate": 6.251723786924195e-06, | |
| "loss": 0.9095609188079834, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.1772151898734178, | |
| "grad_norm": 1.719159483909607, | |
| "learning_rate": 6.244537083662325e-06, | |
| "loss": 0.9629115462303162, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.181434599156118, | |
| "grad_norm": 3.3003413677215576, | |
| "learning_rate": 6.237340398855583e-06, | |
| "loss": 0.9314064979553223, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.1856540084388185, | |
| "grad_norm": 1.902093768119812, | |
| "learning_rate": 6.230133771448174e-06, | |
| "loss": 0.8848311305046082, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.189873417721519, | |
| "grad_norm": 4.84321403503418, | |
| "learning_rate": 6.222917240438112e-06, | |
| "loss": 0.9192149639129639, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.1940928270042193, | |
| "grad_norm": 1.7536414861679077, | |
| "learning_rate": 6.215690844876994e-06, | |
| "loss": 1.1547870635986328, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.19831223628692, | |
| "grad_norm": 6.852333068847656, | |
| "learning_rate": 6.208454623869805e-06, | |
| "loss": 0.32395103573799133, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2025316455696204, | |
| "grad_norm": 1.9538402557373047, | |
| "learning_rate": 6.2012086165747e-06, | |
| "loss": 0.9581727981567383, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.2067510548523206, | |
| "grad_norm": 4.133998394012451, | |
| "learning_rate": 6.193952862202785e-06, | |
| "loss": 0.6086496710777283, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.210970464135021, | |
| "grad_norm": 1.9011021852493286, | |
| "learning_rate": 6.18668740001792e-06, | |
| "loss": 0.7543759346008301, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.2151898734177213, | |
| "grad_norm": 1.46916663646698, | |
| "learning_rate": 6.17941226933649e-06, | |
| "loss": 0.9485968947410583, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.219409282700422, | |
| "grad_norm": 5.856541156768799, | |
| "learning_rate": 6.172127509527205e-06, | |
| "loss": 0.8059616088867188, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.2236286919831225, | |
| "grad_norm": 4.198894023895264, | |
| "learning_rate": 6.164833160010882e-06, | |
| "loss": 0.7487938404083252, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.2278481012658227, | |
| "grad_norm": 23.342222213745117, | |
| "learning_rate": 6.157529260260229e-06, | |
| "loss": 0.7880909442901611, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.2320675105485233, | |
| "grad_norm": 23.40158462524414, | |
| "learning_rate": 6.150215849799637e-06, | |
| "loss": 0.5327481031417847, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.2362869198312234, | |
| "grad_norm": 1.634332537651062, | |
| "learning_rate": 6.142892968204963e-06, | |
| "loss": 0.883295476436615, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.240506329113924, | |
| "grad_norm": 0.7251645922660828, | |
| "learning_rate": 6.135560655103316e-06, | |
| "loss": 0.5540227890014648, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.2447257383966246, | |
| "grad_norm": 1.5355224609375, | |
| "learning_rate": 6.12821895017284e-06, | |
| "loss": 0.50773686170578, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.2489451476793247, | |
| "grad_norm": 2.305499792098999, | |
| "learning_rate": 6.120867893142506e-06, | |
| "loss": 0.8910026550292969, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.2531645569620253, | |
| "grad_norm": 3.746581792831421, | |
| "learning_rate": 6.1135075237918905e-06, | |
| "loss": 1.0884243249893188, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.257383966244726, | |
| "grad_norm": 3.282155752182007, | |
| "learning_rate": 6.106137881950965e-06, | |
| "loss": 1.0420414209365845, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.261603375527426, | |
| "grad_norm": 2.951901435852051, | |
| "learning_rate": 6.098759007499875e-06, | |
| "loss": 0.9006770849227905, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.2658227848101267, | |
| "grad_norm": 2.8723626136779785, | |
| "learning_rate": 6.091370940368729e-06, | |
| "loss": 1.1099491119384766, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.270042194092827, | |
| "grad_norm": 1.841613531112671, | |
| "learning_rate": 6.083973720537386e-06, | |
| "loss": 0.9306420087814331, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.2742616033755274, | |
| "grad_norm": 0.8245161771774292, | |
| "learning_rate": 6.0765673880352224e-06, | |
| "loss": 0.6501108407974243, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.278481012658228, | |
| "grad_norm": 16.89291763305664, | |
| "learning_rate": 6.069151982940936e-06, | |
| "loss": 0.7018378376960754, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.282700421940928, | |
| "grad_norm": 15.395925521850586, | |
| "learning_rate": 6.06172754538232e-06, | |
| "loss": 0.3668671250343323, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.2869198312236287, | |
| "grad_norm": 7.03673791885376, | |
| "learning_rate": 6.054294115536044e-06, | |
| "loss": 0.6594992280006409, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.291139240506329, | |
| "grad_norm": 1.275587797164917, | |
| "learning_rate": 6.046851733627436e-06, | |
| "loss": 0.48280084133148193, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.2953586497890295, | |
| "grad_norm": 3.333641290664673, | |
| "learning_rate": 6.039400439930271e-06, | |
| "loss": 0.6253411769866943, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.29957805907173, | |
| "grad_norm": 1.850312352180481, | |
| "learning_rate": 6.031940274766546e-06, | |
| "loss": 0.49555736780166626, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.3037974683544302, | |
| "grad_norm": 3.2576518058776855, | |
| "learning_rate": 6.024471278506269e-06, | |
| "loss": 0.7540421485900879, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.308016877637131, | |
| "grad_norm": 2.6489086151123047, | |
| "learning_rate": 6.016993491567234e-06, | |
| "loss": 0.6014547944068909, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.3122362869198314, | |
| "grad_norm": 5.111599445343018, | |
| "learning_rate": 6.0095069544148075e-06, | |
| "loss": 0.3525955379009247, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.3164556962025316, | |
| "grad_norm": 13.784951210021973, | |
| "learning_rate": 6.002011707561704e-06, | |
| "loss": 0.8247784376144409, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.320675105485232, | |
| "grad_norm": 4.951453685760498, | |
| "learning_rate": 5.9945077915677695e-06, | |
| "loss": 0.8657753467559814, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.3248945147679323, | |
| "grad_norm": 1.6704450845718384, | |
| "learning_rate": 5.9869952470397655e-06, | |
| "loss": 0.841392993927002, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.329113924050633, | |
| "grad_norm": 1.9911108016967773, | |
| "learning_rate": 5.979474114631144e-06, | |
| "loss": 1.0287697315216064, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 1.8969277143478394, | |
| "learning_rate": 5.971944435041831e-06, | |
| "loss": 0.730893611907959, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.3375527426160336, | |
| "grad_norm": 4.81918478012085, | |
| "learning_rate": 5.9644062490180004e-06, | |
| "loss": 0.5627094507217407, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.3417721518987342, | |
| "grad_norm": 2.462564468383789, | |
| "learning_rate": 5.956859597351862e-06, | |
| "loss": 0.8845915198326111, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.3459915611814344, | |
| "grad_norm": 2.167839527130127, | |
| "learning_rate": 5.94930452088144e-06, | |
| "loss": 0.9817606210708618, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.350210970464135, | |
| "grad_norm": 8.067427635192871, | |
| "learning_rate": 5.941741060490339e-06, | |
| "loss": 1.1635032892227173, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.3544303797468356, | |
| "grad_norm": 0.5956460237503052, | |
| "learning_rate": 5.93416925710754e-06, | |
| "loss": 0.4855182468891144, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.3586497890295357, | |
| "grad_norm": 15.0598783493042, | |
| "learning_rate": 5.9265891517071695e-06, | |
| "loss": 0.9245091676712036, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.3628691983122363, | |
| "grad_norm": 2.6616246700286865, | |
| "learning_rate": 5.9190007853082795e-06, | |
| "loss": 0.6047594547271729, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.367088607594937, | |
| "grad_norm": 7.563075542449951, | |
| "learning_rate": 5.911404198974625e-06, | |
| "loss": 0.9117496013641357, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.371308016877637, | |
| "grad_norm": 5.370510101318359, | |
| "learning_rate": 5.903799433814442e-06, | |
| "loss": 0.5350353717803955, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.3755274261603376, | |
| "grad_norm": 1.819912075996399, | |
| "learning_rate": 5.8961865309802285e-06, | |
| "loss": 0.667518138885498, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.379746835443038, | |
| "grad_norm": 2.640817165374756, | |
| "learning_rate": 5.888565531668514e-06, | |
| "loss": 0.8784997463226318, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.3839662447257384, | |
| "grad_norm": 66.16091918945312, | |
| "learning_rate": 5.880936477119645e-06, | |
| "loss": 0.4616549611091614, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.388185654008439, | |
| "grad_norm": 5.994080066680908, | |
| "learning_rate": 5.873299408617559e-06, | |
| "loss": 0.3559979200363159, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.392405063291139, | |
| "grad_norm": 23.00125503540039, | |
| "learning_rate": 5.865654367489556e-06, | |
| "loss": 0.40349674224853516, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.3966244725738397, | |
| "grad_norm": 5.636394500732422, | |
| "learning_rate": 5.858001395106082e-06, | |
| "loss": 0.5823970437049866, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.40084388185654, | |
| "grad_norm": 2.3996365070343018, | |
| "learning_rate": 5.850340532880504e-06, | |
| "loss": 0.921074628829956, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.4050632911392404, | |
| "grad_norm": 10.836583137512207, | |
| "learning_rate": 5.842671822268878e-06, | |
| "loss": 0.7500771880149841, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.409282700421941, | |
| "grad_norm": 7.443431854248047, | |
| "learning_rate": 5.83499530476974e-06, | |
| "loss": 0.3230987787246704, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.413502109704641, | |
| "grad_norm": 2.597289800643921, | |
| "learning_rate": 5.827311021923863e-06, | |
| "loss": 0.732123851776123, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.4177215189873418, | |
| "grad_norm": 1.982795238494873, | |
| "learning_rate": 5.819619015314047e-06, | |
| "loss": 0.9608519077301025, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.4219409282700424, | |
| "grad_norm": 3.819395065307617, | |
| "learning_rate": 5.8119193265648865e-06, | |
| "loss": 0.6804056167602539, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.4261603375527425, | |
| "grad_norm": 6.851272106170654, | |
| "learning_rate": 5.80421199734255e-06, | |
| "loss": 1.004921555519104, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.430379746835443, | |
| "grad_norm": 3.0809147357940674, | |
| "learning_rate": 5.7964970693545466e-06, | |
| "loss": 0.6196656823158264, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.4345991561181437, | |
| "grad_norm": 1.8345930576324463, | |
| "learning_rate": 5.788774584349508e-06, | |
| "loss": 1.043914556503296, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.438818565400844, | |
| "grad_norm": 3.7520220279693604, | |
| "learning_rate": 5.781044584116963e-06, | |
| "loss": 0.30900609493255615, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.4430379746835444, | |
| "grad_norm": 1.6004582643508911, | |
| "learning_rate": 5.773307110487106e-06, | |
| "loss": 0.7037574052810669, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.4472573839662446, | |
| "grad_norm": 1.8472445011138916, | |
| "learning_rate": 5.765562205330568e-06, | |
| "loss": 0.9773483872413635, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.451476793248945, | |
| "grad_norm": 2.698925018310547, | |
| "learning_rate": 5.757809910558205e-06, | |
| "loss": 0.6617934703826904, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.4556962025316453, | |
| "grad_norm": 1.6956886053085327, | |
| "learning_rate": 5.750050268120851e-06, | |
| "loss": 0.851616382598877, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.459915611814346, | |
| "grad_norm": 1.288453221321106, | |
| "learning_rate": 5.742283320009111e-06, | |
| "loss": 0.8924407958984375, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.4641350210970465, | |
| "grad_norm": 1.4182209968566895, | |
| "learning_rate": 5.734509108253117e-06, | |
| "loss": 0.48247936367988586, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.4683544303797467, | |
| "grad_norm": 2.1459646224975586, | |
| "learning_rate": 5.726727674922309e-06, | |
| "loss": 0.8906441926956177, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.4725738396624473, | |
| "grad_norm": 1.393717885017395, | |
| "learning_rate": 5.718939062125207e-06, | |
| "loss": 0.876624584197998, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.476793248945148, | |
| "grad_norm": 1.8553041219711304, | |
| "learning_rate": 5.711143312009183e-06, | |
| "loss": 0.9824315309524536, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.481012658227848, | |
| "grad_norm": 2.4953160285949707, | |
| "learning_rate": 5.703340466760228e-06, | |
| "loss": 0.7499101161956787, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.4852320675105486, | |
| "grad_norm": 5.494137763977051, | |
| "learning_rate": 5.695530568602733e-06, | |
| "loss": 0.42195141315460205, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.489451476793249, | |
| "grad_norm": 4.595831394195557, | |
| "learning_rate": 5.687713659799253e-06, | |
| "loss": 0.7049263715744019, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.4936708860759493, | |
| "grad_norm": 5.080184459686279, | |
| "learning_rate": 5.679889782650275e-06, | |
| "loss": 0.880506157875061, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.49789029535865, | |
| "grad_norm": 27.051029205322266, | |
| "learning_rate": 5.672058979494004e-06, | |
| "loss": 0.5125079154968262, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.50210970464135, | |
| "grad_norm": 1.5325865745544434, | |
| "learning_rate": 5.6642212927061185e-06, | |
| "loss": 0.385905385017395, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.5063291139240507, | |
| "grad_norm": 7.488584995269775, | |
| "learning_rate": 5.656376764699549e-06, | |
| "loss": 0.5802481770515442, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.510548523206751, | |
| "grad_norm": 2.4869072437286377, | |
| "learning_rate": 5.648525437924244e-06, | |
| "loss": 0.810112476348877, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.5147679324894514, | |
| "grad_norm": 5.196420192718506, | |
| "learning_rate": 5.640667354866948e-06, | |
| "loss": 0.40649741888046265, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.518987341772152, | |
| "grad_norm": 10.881569862365723, | |
| "learning_rate": 5.632802558050964e-06, | |
| "loss": 1.1927690505981445, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.523206751054852, | |
| "grad_norm": 2.63506817817688, | |
| "learning_rate": 5.6249310900359236e-06, | |
| "loss": 0.969944179058075, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.5274261603375527, | |
| "grad_norm": 11.932506561279297, | |
| "learning_rate": 5.617052993417562e-06, | |
| "loss": 0.9280753135681152, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.5316455696202533, | |
| "grad_norm": 45.778499603271484, | |
| "learning_rate": 5.609168310827482e-06, | |
| "loss": 0.7399793267250061, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.5358649789029535, | |
| "grad_norm": 1.8853310346603394, | |
| "learning_rate": 5.6012770849329275e-06, | |
| "loss": 0.7420691251754761, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.540084388185654, | |
| "grad_norm": 3.9101645946502686, | |
| "learning_rate": 5.593379358436551e-06, | |
| "loss": 0.7088044285774231, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.5443037974683547, | |
| "grad_norm": 3.0373728275299072, | |
| "learning_rate": 5.585475174076184e-06, | |
| "loss": 0.8735544681549072, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.548523206751055, | |
| "grad_norm": 8.735350608825684, | |
| "learning_rate": 5.577564574624599e-06, | |
| "loss": 0.6918007135391235, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.5527426160337554, | |
| "grad_norm": 3.198167085647583, | |
| "learning_rate": 5.569647602889289e-06, | |
| "loss": 1.1403307914733887, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.5569620253164556, | |
| "grad_norm": 2.7564687728881836, | |
| "learning_rate": 5.561724301712225e-06, | |
| "loss": 0.9847512245178223, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.561181434599156, | |
| "grad_norm": 5.109920501708984, | |
| "learning_rate": 5.553794713969632e-06, | |
| "loss": 0.30179572105407715, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.5654008438818563, | |
| "grad_norm": 4.672483921051025, | |
| "learning_rate": 5.545858882571755e-06, | |
| "loss": 0.7192697525024414, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.569620253164557, | |
| "grad_norm": 2.7057371139526367, | |
| "learning_rate": 5.5379168504626256e-06, | |
| "loss": 0.9119170308113098, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.5738396624472575, | |
| "grad_norm": 5.782102584838867, | |
| "learning_rate": 5.5299686606198255e-06, | |
| "loss": 0.59529709815979, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.5780590717299576, | |
| "grad_norm": 5.356619834899902, | |
| "learning_rate": 5.522014356054264e-06, | |
| "loss": 0.888773500919342, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.5822784810126582, | |
| "grad_norm": 34.81894302368164, | |
| "learning_rate": 5.51405397980994e-06, | |
| "loss": 0.671154260635376, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.586497890295359, | |
| "grad_norm": 4.4581193923950195, | |
| "learning_rate": 5.506087574963703e-06, | |
| "loss": 0.5387101173400879, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.590717299578059, | |
| "grad_norm": 4.779836177825928, | |
| "learning_rate": 5.49811518462503e-06, | |
| "loss": 0.9294767379760742, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.5949367088607596, | |
| "grad_norm": 4.019083499908447, | |
| "learning_rate": 5.4901368519357886e-06, | |
| "loss": 0.9565463066101074, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.59915611814346, | |
| "grad_norm": 2.806299924850464, | |
| "learning_rate": 5.482152620070001e-06, | |
| "loss": 0.8302749991416931, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.6033755274261603, | |
| "grad_norm": 46.3029899597168, | |
| "learning_rate": 5.474162532233609e-06, | |
| "loss": 0.28912973403930664, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.607594936708861, | |
| "grad_norm": 4.818080425262451, | |
| "learning_rate": 5.4661666316642534e-06, | |
| "loss": 1.0101039409637451, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.611814345991561, | |
| "grad_norm": 4.704904556274414, | |
| "learning_rate": 5.458164961631019e-06, | |
| "loss": 1.141682505607605, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.6160337552742616, | |
| "grad_norm": 2.6413064002990723, | |
| "learning_rate": 5.450157565434217e-06, | |
| "loss": 0.7691728472709656, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.620253164556962, | |
| "grad_norm": 2.3116259574890137, | |
| "learning_rate": 5.442144486405146e-06, | |
| "loss": 0.8952039480209351, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.6244725738396624, | |
| "grad_norm": 3.752659797668457, | |
| "learning_rate": 5.434125767905855e-06, | |
| "loss": 0.41019898653030396, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.628691983122363, | |
| "grad_norm": 2.37690806388855, | |
| "learning_rate": 5.426101453328911e-06, | |
| "loss": 0.704147219657898, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.632911392405063, | |
| "grad_norm": 3.650939702987671, | |
| "learning_rate": 5.418071586097162e-06, | |
| "loss": 1.3898766040802002, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.6371308016877637, | |
| "grad_norm": 1.6102105379104614, | |
| "learning_rate": 5.410036209663506e-06, | |
| "loss": 0.961624026298523, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.6413502109704643, | |
| "grad_norm": 3.446720600128174, | |
| "learning_rate": 5.401995367510652e-06, | |
| "loss": 0.924649715423584, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.6455696202531644, | |
| "grad_norm": 4.68242073059082, | |
| "learning_rate": 5.393949103150889e-06, | |
| "loss": 0.4435887932777405, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.649789029535865, | |
| "grad_norm": 51.22077178955078, | |
| "learning_rate": 5.385897460125841e-06, | |
| "loss": 0.5546849370002747, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.6540084388185656, | |
| "grad_norm": 2.3170604705810547, | |
| "learning_rate": 5.377840482006247e-06, | |
| "loss": 0.7113304138183594, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.6582278481012658, | |
| "grad_norm": 4.746129512786865, | |
| "learning_rate": 5.369778212391713e-06, | |
| "loss": 0.8765827417373657, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.6624472573839664, | |
| "grad_norm": 4.732134819030762, | |
| "learning_rate": 5.361710694910476e-06, | |
| "loss": 0.8504003882408142, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 1.4788029193878174, | |
| "learning_rate": 5.3536379732191735e-06, | |
| "loss": 0.5229237079620361, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.670886075949367, | |
| "grad_norm": 9.586766242980957, | |
| "learning_rate": 5.3455600910026075e-06, | |
| "loss": 0.776203989982605, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.6751054852320673, | |
| "grad_norm": 1.5792274475097656, | |
| "learning_rate": 5.337477091973503e-06, | |
| "loss": 0.7061780691146851, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.679324894514768, | |
| "grad_norm": 1.9227761030197144, | |
| "learning_rate": 5.3293890198722765e-06, | |
| "loss": 0.40927794575691223, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.6835443037974684, | |
| "grad_norm": 2.802013635635376, | |
| "learning_rate": 5.321295918466793e-06, | |
| "loss": 0.9143922924995422, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.6877637130801686, | |
| "grad_norm": 14.795599937438965, | |
| "learning_rate": 5.3131978315521355e-06, | |
| "loss": 0.6321116089820862, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.691983122362869, | |
| "grad_norm": 3.627547264099121, | |
| "learning_rate": 5.305094802950368e-06, | |
| "loss": 0.7536362409591675, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.6962025316455698, | |
| "grad_norm": 0.6867983937263489, | |
| "learning_rate": 5.296986876510293e-06, | |
| "loss": 0.27872833609580994, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.70042194092827, | |
| "grad_norm": 3.1109073162078857, | |
| "learning_rate": 5.288874096107218e-06, | |
| "loss": 0.8334829807281494, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.7046413502109705, | |
| "grad_norm": 1.4203203916549683, | |
| "learning_rate": 5.2807565056427155e-06, | |
| "loss": 0.9659562110900879, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.708860759493671, | |
| "grad_norm": 2.113590955734253, | |
| "learning_rate": 5.2726341490443915e-06, | |
| "loss": 0.3422914743423462, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.7130801687763713, | |
| "grad_norm": 4.1795759201049805, | |
| "learning_rate": 5.264507070265639e-06, | |
| "loss": 0.44313400983810425, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.717299578059072, | |
| "grad_norm": 0.8358619809150696, | |
| "learning_rate": 5.256375313285407e-06, | |
| "loss": 0.50257408618927, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.721518987341772, | |
| "grad_norm": 0.6378387808799744, | |
| "learning_rate": 5.248238922107958e-06, | |
| "loss": 0.5335341095924377, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.7257383966244726, | |
| "grad_norm": 2.0149025917053223, | |
| "learning_rate": 5.240097940762638e-06, | |
| "loss": 0.9738786220550537, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.7299578059071727, | |
| "grad_norm": 3.022477149963379, | |
| "learning_rate": 5.231952413303623e-06, | |
| "loss": 0.41252389550209045, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.7341772151898733, | |
| "grad_norm": 2.5130767822265625, | |
| "learning_rate": 5.2238023838097e-06, | |
| "loss": 0.9761707186698914, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.738396624472574, | |
| "grad_norm": 2.43636155128479, | |
| "learning_rate": 5.21564789638401e-06, | |
| "loss": 0.9268041253089905, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.742616033755274, | |
| "grad_norm": 1.058410406112671, | |
| "learning_rate": 5.207488995153821e-06, | |
| "loss": 0.6909565925598145, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.7468354430379747, | |
| "grad_norm": 1.987685203552246, | |
| "learning_rate": 5.1993257242702874e-06, | |
| "loss": 1.0122733116149902, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.7510548523206753, | |
| "grad_norm": 1.9147788286209106, | |
| "learning_rate": 5.191158127908207e-06, | |
| "loss": 0.5920695066452026, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.7552742616033754, | |
| "grad_norm": 2.714449405670166, | |
| "learning_rate": 5.182986250265786e-06, | |
| "loss": 1.0310044288635254, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.759493670886076, | |
| "grad_norm": 1.7678923606872559, | |
| "learning_rate": 5.174810135564397e-06, | |
| "loss": 0.9253189563751221, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.7637130801687766, | |
| "grad_norm": 5.892001152038574, | |
| "learning_rate": 5.1666298280483436e-06, | |
| "loss": 0.80256587266922, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.7679324894514767, | |
| "grad_norm": 12.360274314880371, | |
| "learning_rate": 5.158445371984614e-06, | |
| "loss": 0.9463623762130737, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.7721518987341773, | |
| "grad_norm": 2.3304073810577393, | |
| "learning_rate": 5.150256811662653e-06, | |
| "loss": 0.9907184839248657, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.7763713080168775, | |
| "grad_norm": 3.796537160873413, | |
| "learning_rate": 5.142064191394107e-06, | |
| "loss": 0.609095573425293, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.780590717299578, | |
| "grad_norm": 2.1420092582702637, | |
| "learning_rate": 5.133867555512599e-06, | |
| "loss": 0.5119812488555908, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.7848101265822782, | |
| "grad_norm": 3.1301980018615723, | |
| "learning_rate": 5.125666948373477e-06, | |
| "loss": 0.9296759366989136, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.789029535864979, | |
| "grad_norm": 2.0127930641174316, | |
| "learning_rate": 5.1174624143535845e-06, | |
| "loss": 0.4965199828147888, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.7932489451476794, | |
| "grad_norm": 6.2706217765808105, | |
| "learning_rate": 5.10925399785101e-06, | |
| "loss": 1.0675724744796753, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.7974683544303796, | |
| "grad_norm": 9.385963439941406, | |
| "learning_rate": 5.101041743284855e-06, | |
| "loss": 0.8606825470924377, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.80168776371308, | |
| "grad_norm": 1.8741198778152466, | |
| "learning_rate": 5.0928256950949874e-06, | |
| "loss": 0.6247942447662354, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.8059071729957807, | |
| "grad_norm": 1.5202703475952148, | |
| "learning_rate": 5.084605897741808e-06, | |
| "loss": 0.9863821268081665, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.810126582278481, | |
| "grad_norm": 2.4014227390289307, | |
| "learning_rate": 5.076382395706001e-06, | |
| "loss": 0.7821711301803589, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.8143459915611815, | |
| "grad_norm": 2.5113935470581055, | |
| "learning_rate": 5.0681552334883015e-06, | |
| "loss": 0.48877081274986267, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.818565400843882, | |
| "grad_norm": 1.116832971572876, | |
| "learning_rate": 5.059924455609252e-06, | |
| "loss": 0.639763593673706, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.8227848101265822, | |
| "grad_norm": 2.161829710006714, | |
| "learning_rate": 5.051690106608958e-06, | |
| "loss": 0.6487863063812256, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.827004219409283, | |
| "grad_norm": 3.6037724018096924, | |
| "learning_rate": 5.04345223104685e-06, | |
| "loss": 0.8599737882614136, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.831223628691983, | |
| "grad_norm": 25.895301818847656, | |
| "learning_rate": 5.035210873501446e-06, | |
| "loss": 0.8409707546234131, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.8354430379746836, | |
| "grad_norm": 1.7677528858184814, | |
| "learning_rate": 5.026966078570102e-06, | |
| "loss": 1.0647809505462646, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.8396624472573837, | |
| "grad_norm": 5.221706390380859, | |
| "learning_rate": 5.0187178908687765e-06, | |
| "loss": 0.6761691570281982, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.8438818565400843, | |
| "grad_norm": 2.069338798522949, | |
| "learning_rate": 5.010466355031788e-06, | |
| "loss": 0.5935064554214478, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.848101265822785, | |
| "grad_norm": 1.759072184562683, | |
| "learning_rate": 5.002211515711574e-06, | |
| "loss": 0.9735701680183411, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.852320675105485, | |
| "grad_norm": 6.272833824157715, | |
| "learning_rate": 4.993953417578447e-06, | |
| "loss": 0.6328434944152832, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.8565400843881856, | |
| "grad_norm": 8.284124374389648, | |
| "learning_rate": 4.985692105320356e-06, | |
| "loss": 0.6582671403884888, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.8607594936708862, | |
| "grad_norm": 4.635993003845215, | |
| "learning_rate": 4.977427623642641e-06, | |
| "loss": 0.56138014793396, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.8649789029535864, | |
| "grad_norm": 1.4133601188659668, | |
| "learning_rate": 4.9691600172677945e-06, | |
| "loss": 0.9400450587272644, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.869198312236287, | |
| "grad_norm": 3.7701480388641357, | |
| "learning_rate": 4.960889330935215e-06, | |
| "loss": 0.8297948837280273, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.8734177215189876, | |
| "grad_norm": 3.301804780960083, | |
| "learning_rate": 4.952615609400973e-06, | |
| "loss": 0.5724865794181824, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.8776371308016877, | |
| "grad_norm": 3.880316972732544, | |
| "learning_rate": 4.94433889743756e-06, | |
| "loss": 0.9015120267868042, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.8818565400843883, | |
| "grad_norm": 2.4567813873291016, | |
| "learning_rate": 4.93605923983365e-06, | |
| "loss": 1.0346885919570923, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.8860759493670884, | |
| "grad_norm": 6.220330238342285, | |
| "learning_rate": 4.92777668139386e-06, | |
| "loss": 0.9936701059341431, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.890295358649789, | |
| "grad_norm": 2.123796224594116, | |
| "learning_rate": 4.919491266938501e-06, | |
| "loss": 0.9021327495574951, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.894514767932489, | |
| "grad_norm": 5.658734321594238, | |
| "learning_rate": 4.911203041303342e-06, | |
| "loss": 0.4772055745124817, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.8987341772151898, | |
| "grad_norm": 3.852552890777588, | |
| "learning_rate": 4.902912049339362e-06, | |
| "loss": 0.7514923214912415, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.9029535864978904, | |
| "grad_norm": 1.6569684743881226, | |
| "learning_rate": 4.894618335912511e-06, | |
| "loss": 0.9316278696060181, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.9071729957805905, | |
| "grad_norm": 3.75467586517334, | |
| "learning_rate": 4.886321945903466e-06, | |
| "loss": 0.7876487374305725, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.911392405063291, | |
| "grad_norm": 5.47602653503418, | |
| "learning_rate": 4.8780229242073895e-06, | |
| "loss": 1.141374111175537, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.9156118143459917, | |
| "grad_norm": 4.051374435424805, | |
| "learning_rate": 4.86972131573368e-06, | |
| "loss": 0.640509307384491, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.919831223628692, | |
| "grad_norm": 7.645662307739258, | |
| "learning_rate": 4.86141716540574e-06, | |
| "loss": 0.6477132439613342, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.9240506329113924, | |
| "grad_norm": 2.780121088027954, | |
| "learning_rate": 4.853110518160723e-06, | |
| "loss": 0.5821589827537537, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.928270042194093, | |
| "grad_norm": 4.329258441925049, | |
| "learning_rate": 4.844801418949299e-06, | |
| "loss": 0.9183673858642578, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.932489451476793, | |
| "grad_norm": 1.9503802061080933, | |
| "learning_rate": 4.836489912735402e-06, | |
| "loss": 0.8357143402099609, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.9367088607594938, | |
| "grad_norm": 1.0354031324386597, | |
| "learning_rate": 4.8281760444959926e-06, | |
| "loss": 0.45355841517448425, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.9409282700421944, | |
| "grad_norm": 5.098978519439697, | |
| "learning_rate": 4.8198598592208126e-06, | |
| "loss": 0.6029504537582397, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.9451476793248945, | |
| "grad_norm": 1.7021719217300415, | |
| "learning_rate": 4.811541401912146e-06, | |
| "loss": 0.8993232250213623, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.9493670886075947, | |
| "grad_norm": 0.9127289652824402, | |
| "learning_rate": 4.803220717584566e-06, | |
| "loss": 0.7546182870864868, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.9535864978902953, | |
| "grad_norm": 4.401092529296875, | |
| "learning_rate": 4.7948978512647016e-06, | |
| "loss": 0.813082218170166, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.957805907172996, | |
| "grad_norm": 4.511460304260254, | |
| "learning_rate": 4.786572847990987e-06, | |
| "loss": 0.5571738481521606, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.962025316455696, | |
| "grad_norm": 1.7611995935440063, | |
| "learning_rate": 4.778245752813421e-06, | |
| "loss": 0.9406437277793884, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.9662447257383966, | |
| "grad_norm": 5.091633319854736, | |
| "learning_rate": 4.769916610793324e-06, | |
| "loss": 0.7957962155342102, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.970464135021097, | |
| "grad_norm": 4.824461460113525, | |
| "learning_rate": 4.76158546700309e-06, | |
| "loss": 0.4907096028327942, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.9746835443037973, | |
| "grad_norm": 3.2384073734283447, | |
| "learning_rate": 4.75325236652595e-06, | |
| "loss": 0.742721676826477, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.978902953586498, | |
| "grad_norm": 3.1998229026794434, | |
| "learning_rate": 4.744917354455715e-06, | |
| "loss": 0.9864751100540161, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.9831223628691985, | |
| "grad_norm": 8.642891883850098, | |
| "learning_rate": 4.73658047589655e-06, | |
| "loss": 0.8089879751205444, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.9873417721518987, | |
| "grad_norm": 1.6954267024993896, | |
| "learning_rate": 4.7282417759627134e-06, | |
| "loss": 0.8185816407203674, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.9915611814345993, | |
| "grad_norm": 1.4983083009719849, | |
| "learning_rate": 4.719901299778325e-06, | |
| "loss": 0.8309789896011353, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.9957805907173, | |
| "grad_norm": 5.758739948272705, | |
| "learning_rate": 4.71155909247711e-06, | |
| "loss": 0.9021912813186646, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.6232898235321045, | |
| "learning_rate": 4.703215199202169e-06, | |
| "loss": 0.2926831841468811, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.0042194092827006, | |
| "grad_norm": 2.3824779987335205, | |
| "learning_rate": 4.6948696651057225e-06, | |
| "loss": 0.5607067346572876, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.0084388185654007, | |
| "grad_norm": 10.018150329589844, | |
| "learning_rate": 4.6865225353488675e-06, | |
| "loss": 0.5354501008987427, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.0126582278481013, | |
| "grad_norm": 16.426225662231445, | |
| "learning_rate": 4.678173855101341e-06, | |
| "loss": 0.5269479155540466, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.0168776371308015, | |
| "grad_norm": 15.212722778320312, | |
| "learning_rate": 4.669823669541266e-06, | |
| "loss": 0.39293336868286133, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.021097046413502, | |
| "grad_norm": 2.7751386165618896, | |
| "learning_rate": 4.661472023854916e-06, | |
| "loss": 0.8252520561218262, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.0253164556962027, | |
| "grad_norm": 2.98408842086792, | |
| "learning_rate": 4.653118963236458e-06, | |
| "loss": 0.7142210006713867, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.029535864978903, | |
| "grad_norm": 1.7343214750289917, | |
| "learning_rate": 4.644764532887726e-06, | |
| "loss": 0.8274791240692139, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.0337552742616034, | |
| "grad_norm": 2.1739771366119385, | |
| "learning_rate": 4.636408778017957e-06, | |
| "loss": 0.3643840551376343, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.037974683544304, | |
| "grad_norm": 1.6796656847000122, | |
| "learning_rate": 4.6280517438435616e-06, | |
| "loss": 0.7152677178382874, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.042194092827004, | |
| "grad_norm": 1.2829041481018066, | |
| "learning_rate": 4.61969347558787e-06, | |
| "loss": 0.5194791555404663, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.0464135021097047, | |
| "grad_norm": 1.6921758651733398, | |
| "learning_rate": 4.6113340184808925e-06, | |
| "loss": 0.6532431840896606, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.050632911392405, | |
| "grad_norm": 1.7007097005844116, | |
| "learning_rate": 4.602973417759071e-06, | |
| "loss": 0.7926474809646606, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.0548523206751055, | |
| "grad_norm": 2854.81396484375, | |
| "learning_rate": 4.594611718665038e-06, | |
| "loss": 0.5383695960044861, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.059071729957806, | |
| "grad_norm": 4.033429145812988, | |
| "learning_rate": 4.586248966447367e-06, | |
| "loss": 0.6349921822547913, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.0632911392405062, | |
| "grad_norm": 1.7910646200180054, | |
| "learning_rate": 4.577885206360334e-06, | |
| "loss": 0.7665805220603943, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.067510548523207, | |
| "grad_norm": 2.6463685035705566, | |
| "learning_rate": 4.5695204836636655e-06, | |
| "loss": 0.617534875869751, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.071729957805907, | |
| "grad_norm": 3.8018009662628174, | |
| "learning_rate": 4.561154843622299e-06, | |
| "loss": 0.4436488151550293, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.0759493670886076, | |
| "grad_norm": 1.6345051527023315, | |
| "learning_rate": 4.552788331506134e-06, | |
| "loss": 0.45668232440948486, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.080168776371308, | |
| "grad_norm": 10.521839141845703, | |
| "learning_rate": 4.544420992589792e-06, | |
| "loss": 0.5640779733657837, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.0843881856540083, | |
| "grad_norm": 2.4284989833831787, | |
| "learning_rate": 4.53605287215237e-06, | |
| "loss": 0.7709170579910278, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.088607594936709, | |
| "grad_norm": 1.7529208660125732, | |
| "learning_rate": 4.527684015477188e-06, | |
| "loss": 0.7688764333724976, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.0928270042194095, | |
| "grad_norm": 29.2982177734375, | |
| "learning_rate": 4.519314467851555e-06, | |
| "loss": 0.7450973987579346, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.0970464135021096, | |
| "grad_norm": 6.658877372741699, | |
| "learning_rate": 4.510944274566518e-06, | |
| "loss": 0.5714298486709595, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.1012658227848102, | |
| "grad_norm": 10.68608283996582, | |
| "learning_rate": 4.502573480916617e-06, | |
| "loss": 0.17385733127593994, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.1054852320675104, | |
| "grad_norm": 5.784574508666992, | |
| "learning_rate": 4.494202132199643e-06, | |
| "loss": 0.9861471652984619, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.109704641350211, | |
| "grad_norm": 0.49223047494888306, | |
| "learning_rate": 4.485830273716386e-06, | |
| "loss": 0.3651547431945801, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.1139240506329116, | |
| "grad_norm": 3.3575212955474854, | |
| "learning_rate": 4.4774579507704e-06, | |
| "loss": 0.8966869115829468, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.1181434599156117, | |
| "grad_norm": 3.334437370300293, | |
| "learning_rate": 4.46908520866775e-06, | |
| "loss": 0.9988681077957153, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.1223628691983123, | |
| "grad_norm": 1.8539319038391113, | |
| "learning_rate": 4.460712092716768e-06, | |
| "loss": 0.7239236831665039, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.1265822784810124, | |
| "grad_norm": 3.4966561794281006, | |
| "learning_rate": 4.452338648227813e-06, | |
| "loss": 0.8891302347183228, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.130801687763713, | |
| "grad_norm": 2.0679361820220947, | |
| "learning_rate": 4.443964920513017e-06, | |
| "loss": 0.8403599262237549, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.1350210970464136, | |
| "grad_norm": 6.140291690826416, | |
| "learning_rate": 4.435590954886047e-06, | |
| "loss": 0.5731205940246582, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.1392405063291138, | |
| "grad_norm": 2.278001070022583, | |
| "learning_rate": 4.427216796661857e-06, | |
| "loss": 0.5262531638145447, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.1434599156118144, | |
| "grad_norm": 3.5377111434936523, | |
| "learning_rate": 4.418842491156445e-06, | |
| "loss": 0.8218955993652344, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.147679324894515, | |
| "grad_norm": 1.6017743349075317, | |
| "learning_rate": 4.410468083686605e-06, | |
| "loss": 0.5413227081298828, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.151898734177215, | |
| "grad_norm": 3.7507541179656982, | |
| "learning_rate": 4.402093619569679e-06, | |
| "loss": 0.7044564485549927, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.1561181434599157, | |
| "grad_norm": 3.2674598693847656, | |
| "learning_rate": 4.393719144123321e-06, | |
| "loss": 0.6519253253936768, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.160337552742616, | |
| "grad_norm": 1.4527074098587036, | |
| "learning_rate": 4.385344702665246e-06, | |
| "loss": 0.3425239622592926, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.1645569620253164, | |
| "grad_norm": 1.9561620950698853, | |
| "learning_rate": 4.376970340512979e-06, | |
| "loss": 0.4482334852218628, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.168776371308017, | |
| "grad_norm": 2.0695550441741943, | |
| "learning_rate": 4.368596102983623e-06, | |
| "loss": 0.7770338654518127, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.172995780590717, | |
| "grad_norm": 2.4615397453308105, | |
| "learning_rate": 4.360222035393603e-06, | |
| "loss": 0.6019558906555176, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.1772151898734178, | |
| "grad_norm": 2.1790032386779785, | |
| "learning_rate": 4.351848183058427e-06, | |
| "loss": 0.8018068075180054, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.181434599156118, | |
| "grad_norm": 3.2153782844543457, | |
| "learning_rate": 4.343474591292432e-06, | |
| "loss": 0.8963441848754883, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.1856540084388185, | |
| "grad_norm": 4.124518394470215, | |
| "learning_rate": 4.335101305408552e-06, | |
| "loss": 0.7522740960121155, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.189873417721519, | |
| "grad_norm": 0.789757251739502, | |
| "learning_rate": 4.3267283707180635e-06, | |
| "loss": 0.2408222258090973, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.1940928270042193, | |
| "grad_norm": 2.3271267414093018, | |
| "learning_rate": 4.31835583253034e-06, | |
| "loss": 0.7659440636634827, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.19831223628692, | |
| "grad_norm": 1.0062589645385742, | |
| "learning_rate": 4.309983736152612e-06, | |
| "loss": 0.5749263763427734, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.2025316455696204, | |
| "grad_norm": 3.043958902359009, | |
| "learning_rate": 4.301612126889719e-06, | |
| "loss": 0.7307943105697632, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.2067510548523206, | |
| "grad_norm": 4.167404651641846, | |
| "learning_rate": 4.293241050043863e-06, | |
| "loss": 0.6726250648498535, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.210970464135021, | |
| "grad_norm": 7.460043430328369, | |
| "learning_rate": 4.284870550914368e-06, | |
| "loss": 0.27290791273117065, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.2151898734177213, | |
| "grad_norm": 7.231512546539307, | |
| "learning_rate": 4.276500674797427e-06, | |
| "loss": 0.6644264459609985, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.219409282700422, | |
| "grad_norm": 2.330780029296875, | |
| "learning_rate": 4.268131466985867e-06, | |
| "loss": 0.5520614385604858, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.2236286919831225, | |
| "grad_norm": 0.9592808485031128, | |
| "learning_rate": 4.259762972768895e-06, | |
| "loss": 0.2992947995662689, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.2278481012658227, | |
| "grad_norm": 4.56012487411499, | |
| "learning_rate": 4.2513952374318556e-06, | |
| "loss": 0.6852157115936279, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.2320675105485233, | |
| "grad_norm": 1.3004289865493774, | |
| "learning_rate": 4.24302830625599e-06, | |
| "loss": 0.18629956245422363, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.2362869198312234, | |
| "grad_norm": 2.980001449584961, | |
| "learning_rate": 4.2346622245181864e-06, | |
| "loss": 0.6743506193161011, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.240506329113924, | |
| "grad_norm": 2.0032145977020264, | |
| "learning_rate": 4.226297037490735e-06, | |
| "loss": 0.779093861579895, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.2447257383966246, | |
| "grad_norm": 1.4844651222229004, | |
| "learning_rate": 4.217932790441087e-06, | |
| "loss": 0.7138203382492065, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.2489451476793247, | |
| "grad_norm": 9.559041023254395, | |
| "learning_rate": 4.209569528631604e-06, | |
| "loss": 0.726833701133728, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.2531645569620253, | |
| "grad_norm": 5.846635341644287, | |
| "learning_rate": 4.201207297319318e-06, | |
| "loss": 0.577594518661499, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.257383966244726, | |
| "grad_norm": 14.799168586730957, | |
| "learning_rate": 4.192846141755686e-06, | |
| "loss": 0.6153043508529663, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.261603375527426, | |
| "grad_norm": 4.5415568351745605, | |
| "learning_rate": 4.184486107186338e-06, | |
| "loss": 0.3514612317085266, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.2658227848101267, | |
| "grad_norm": 1.909765362739563, | |
| "learning_rate": 4.176127238850845e-06, | |
| "loss": 0.6445936560630798, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.270042194092827, | |
| "grad_norm": 4.638195037841797, | |
| "learning_rate": 4.1677695819824615e-06, | |
| "loss": 0.32674679160118103, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.2742616033755274, | |
| "grad_norm": 1.878891944885254, | |
| "learning_rate": 4.159413181807891e-06, | |
| "loss": 0.2638033628463745, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.278481012658228, | |
| "grad_norm": 6.237101078033447, | |
| "learning_rate": 4.151058083547031e-06, | |
| "loss": 0.46362948417663574, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.282700421940928, | |
| "grad_norm": 2.6107330322265625, | |
| "learning_rate": 4.142704332412738e-06, | |
| "loss": 0.767645001411438, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.2869198312236287, | |
| "grad_norm": 0.5579416155815125, | |
| "learning_rate": 4.1343519736105785e-06, | |
| "loss": 0.6301885843276978, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.291139240506329, | |
| "grad_norm": 2.669329881668091, | |
| "learning_rate": 4.126001052338581e-06, | |
| "loss": 0.4373775124549866, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.2953586497890295, | |
| "grad_norm": 3.803680181503296, | |
| "learning_rate": 4.1176516137870004e-06, | |
| "loss": 0.5417683720588684, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.29957805907173, | |
| "grad_norm": 7.650591850280762, | |
| "learning_rate": 4.109303703138063e-06, | |
| "loss": 0.7619826793670654, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.3037974683544302, | |
| "grad_norm": 12.526813507080078, | |
| "learning_rate": 4.1009573655657295e-06, | |
| "loss": 0.696597695350647, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.308016877637131, | |
| "grad_norm": 1.6425329446792603, | |
| "learning_rate": 4.092612646235447e-06, | |
| "loss": 0.4307796359062195, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.3122362869198314, | |
| "grad_norm": 3.352663516998291, | |
| "learning_rate": 4.084269590303907e-06, | |
| "loss": 0.3921862244606018, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.3164556962025316, | |
| "grad_norm": 2.774632215499878, | |
| "learning_rate": 4.075928242918798e-06, | |
| "loss": 0.4460093677043915, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.320675105485232, | |
| "grad_norm": 2.935922384262085, | |
| "learning_rate": 4.067588649218564e-06, | |
| "loss": 0.935857892036438, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.3248945147679323, | |
| "grad_norm": 2.058513641357422, | |
| "learning_rate": 4.059250854332159e-06, | |
| "loss": 0.6347423791885376, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.329113924050633, | |
| "grad_norm": 1.685788869857788, | |
| "learning_rate": 4.050914903378802e-06, | |
| "loss": 0.7031244039535522, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 2.3649990558624268, | |
| "learning_rate": 4.0425808414677345e-06, | |
| "loss": 0.43982017040252686, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.3375527426160336, | |
| "grad_norm": 1.6167001724243164, | |
| "learning_rate": 4.034248713697977e-06, | |
| "loss": 0.40530964732170105, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.3417721518987342, | |
| "grad_norm": 10.420100212097168, | |
| "learning_rate": 4.025918565158079e-06, | |
| "loss": 0.6115049123764038, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.3459915611814344, | |
| "grad_norm": 7.325076580047607, | |
| "learning_rate": 4.0175904409258844e-06, | |
| "loss": 0.5467356443405151, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.350210970464135, | |
| "grad_norm": 2.629723310470581, | |
| "learning_rate": 4.009264386068281e-06, | |
| "loss": 0.3660237789154053, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.3544303797468356, | |
| "grad_norm": 1.3594039678573608, | |
| "learning_rate": 4.000940445640959e-06, | |
| "loss": 0.8356277942657471, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.3586497890295357, | |
| "grad_norm": 2.2674577236175537, | |
| "learning_rate": 3.992618664688165e-06, | |
| "loss": 0.7481639981269836, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.3628691983122363, | |
| "grad_norm": 0.9013781547546387, | |
| "learning_rate": 3.98429908824246e-06, | |
| "loss": 0.3871142268180847, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.367088607594937, | |
| "grad_norm": 3.642791271209717, | |
| "learning_rate": 3.975981761324477e-06, | |
| "loss": 0.4039541482925415, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.371308016877637, | |
| "grad_norm": 1.674315333366394, | |
| "learning_rate": 3.967666728942675e-06, | |
| "loss": 0.3363262712955475, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.3755274261603376, | |
| "grad_norm": 4.324201583862305, | |
| "learning_rate": 3.959354036093097e-06, | |
| "loss": 0.45887890458106995, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.379746835443038, | |
| "grad_norm": 1.464799404144287, | |
| "learning_rate": 3.951043727759125e-06, | |
| "loss": 0.47278836369514465, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.3839662447257384, | |
| "grad_norm": 0.5969643592834473, | |
| "learning_rate": 3.942735848911236e-06, | |
| "loss": 0.4599458575248718, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.388185654008439, | |
| "grad_norm": 34.39630126953125, | |
| "learning_rate": 3.9344304445067644e-06, | |
| "loss": 0.8346083164215088, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.392405063291139, | |
| "grad_norm": 2.1359801292419434, | |
| "learning_rate": 3.9261275594896495e-06, | |
| "loss": 0.532837450504303, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.3966244725738397, | |
| "grad_norm": 1.2699977159500122, | |
| "learning_rate": 3.9178272387902e-06, | |
| "loss": 0.2630946636199951, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.40084388185654, | |
| "grad_norm": 2.12693452835083, | |
| "learning_rate": 3.909529527324849e-06, | |
| "loss": 0.7574643492698669, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.4050632911392404, | |
| "grad_norm": 2.4170689582824707, | |
| "learning_rate": 3.9012344699959045e-06, | |
| "loss": 0.2519644498825073, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.409282700421941, | |
| "grad_norm": 3.567059278488159, | |
| "learning_rate": 3.892942111691319e-06, | |
| "loss": 0.6072185039520264, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.413502109704641, | |
| "grad_norm": 2.984300136566162, | |
| "learning_rate": 3.884652497284436e-06, | |
| "loss": 0.9044985771179199, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.4177215189873418, | |
| "grad_norm": 1.356608271598816, | |
| "learning_rate": 3.8763656716337496e-06, | |
| "loss": 0.8276529908180237, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.4219409282700424, | |
| "grad_norm": 2.127027750015259, | |
| "learning_rate": 3.868081679582664e-06, | |
| "loss": 0.45381200313568115, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.4261603375527425, | |
| "grad_norm": 9.49200439453125, | |
| "learning_rate": 3.8598005659592505e-06, | |
| "loss": 0.35857370495796204, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.430379746835443, | |
| "grad_norm": 7.919655799865723, | |
| "learning_rate": 3.851522375576004e-06, | |
| "loss": 0.2886282801628113, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.4345991561181437, | |
| "grad_norm": 2.5264101028442383, | |
| "learning_rate": 3.843247153229598e-06, | |
| "loss": 0.7439049482345581, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.438818565400844, | |
| "grad_norm": 3.5521364212036133, | |
| "learning_rate": 3.834974943700646e-06, | |
| "loss": 0.1677057147026062, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.4430379746835444, | |
| "grad_norm": 3.5299649238586426, | |
| "learning_rate": 3.82670579175346e-06, | |
| "loss": 0.867900013923645, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.4472573839662446, | |
| "grad_norm": 1.0358866453170776, | |
| "learning_rate": 3.818439742135804e-06, | |
| "loss": 0.4616679549217224, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.451476793248945, | |
| "grad_norm": 17.166608810424805, | |
| "learning_rate": 3.8101768395786555e-06, | |
| "loss": 0.8641064167022705, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.4556962025316453, | |
| "grad_norm": 3.9892635345458984, | |
| "learning_rate": 3.80191712879596e-06, | |
| "loss": 0.7791248559951782, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.459915611814346, | |
| "grad_norm": 3.8166918754577637, | |
| "learning_rate": 3.7936606544843936e-06, | |
| "loss": 0.8491038084030151, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.4641350210970465, | |
| "grad_norm": 12.44215202331543, | |
| "learning_rate": 3.7854074613231156e-06, | |
| "loss": 0.8689329624176025, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.4683544303797467, | |
| "grad_norm": 9.996358871459961, | |
| "learning_rate": 3.777157593973531e-06, | |
| "loss": 0.1393229067325592, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.4725738396624473, | |
| "grad_norm": 2.609384298324585, | |
| "learning_rate": 3.768911097079048e-06, | |
| "loss": 0.5422978401184082, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.476793248945148, | |
| "grad_norm": 1.4818031787872314, | |
| "learning_rate": 3.7606680152648363e-06, | |
| "loss": 0.6728254556655884, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.481012658227848, | |
| "grad_norm": 2.1601715087890625, | |
| "learning_rate": 3.752428393137582e-06, | |
| "loss": 0.35271987318992615, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.4852320675105486, | |
| "grad_norm": 2.9078116416931152, | |
| "learning_rate": 3.744192275285254e-06, | |
| "loss": 0.6402429938316345, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.489451476793249, | |
| "grad_norm": 1.4320260286331177, | |
| "learning_rate": 3.735959706276855e-06, | |
| "loss": 0.4159366488456726, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.4936708860759493, | |
| "grad_norm": 2.64323091506958, | |
| "learning_rate": 3.727730730662185e-06, | |
| "loss": 0.45933040976524353, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.49789029535865, | |
| "grad_norm": 5.375485897064209, | |
| "learning_rate": 3.719505392971597e-06, | |
| "loss": 0.7267172336578369, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.50210970464135, | |
| "grad_norm": 1.8793143033981323, | |
| "learning_rate": 3.7112837377157595e-06, | |
| "loss": 0.750633955001831, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.5063291139240507, | |
| "grad_norm": 3.7220072746276855, | |
| "learning_rate": 3.7030658093854116e-06, | |
| "loss": 0.7886282205581665, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.510548523206751, | |
| "grad_norm": 4.647188186645508, | |
| "learning_rate": 3.6948516524511284e-06, | |
| "loss": 0.4952489733695984, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.5147679324894514, | |
| "grad_norm": 4.3419575691223145, | |
| "learning_rate": 3.686641311363072e-06, | |
| "loss": 0.7061523199081421, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.518987341772152, | |
| "grad_norm": 2.792799949645996, | |
| "learning_rate": 3.678434830550758e-06, | |
| "loss": 0.4294711947441101, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.523206751054852, | |
| "grad_norm": 3.9279825687408447, | |
| "learning_rate": 3.670232254422812e-06, | |
| "loss": 0.6987364888191223, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.5274261603375527, | |
| "grad_norm": 4.345192909240723, | |
| "learning_rate": 3.6620336273667292e-06, | |
| "loss": 0.2978661060333252, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.5316455696202533, | |
| "grad_norm": 2.069209575653076, | |
| "learning_rate": 3.6538389937486356e-06, | |
| "loss": 0.4812040627002716, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.5358649789029535, | |
| "grad_norm": 14.877073287963867, | |
| "learning_rate": 3.6456483979130477e-06, | |
| "loss": 0.5612766146659851, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.540084388185654, | |
| "grad_norm": 6.497949600219727, | |
| "learning_rate": 3.6374618841826285e-06, | |
| "loss": 0.6456748843193054, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.5443037974683547, | |
| "grad_norm": 6.732306003570557, | |
| "learning_rate": 3.629279496857955e-06, | |
| "loss": 0.713530421257019, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.548523206751055, | |
| "grad_norm": 2.6278674602508545, | |
| "learning_rate": 3.621101280217272e-06, | |
| "loss": 0.6881183385848999, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.5527426160337554, | |
| "grad_norm": 5.155986785888672, | |
| "learning_rate": 3.612927278516257e-06, | |
| "loss": 0.5856807827949524, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.5569620253164556, | |
| "grad_norm": 6.799246788024902, | |
| "learning_rate": 3.6047575359877768e-06, | |
| "loss": 0.36446380615234375, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.561181434599156, | |
| "grad_norm": 1.061477541923523, | |
| "learning_rate": 3.596592096841651e-06, | |
| "loss": 0.4035094976425171, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.5654008438818563, | |
| "grad_norm": 4.340595245361328, | |
| "learning_rate": 3.5884310052644127e-06, | |
| "loss": 0.7940167188644409, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.569620253164557, | |
| "grad_norm": 8.34933853149414, | |
| "learning_rate": 3.580274305419067e-06, | |
| "loss": 0.25536781549453735, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.5738396624472575, | |
| "grad_norm": 0.517219603061676, | |
| "learning_rate": 3.572122041444853e-06, | |
| "loss": 0.3392212688922882, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.5780590717299576, | |
| "grad_norm": 7.081967830657959, | |
| "learning_rate": 3.5639742574570084e-06, | |
| "loss": 0.24323059618473053, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.5822784810126582, | |
| "grad_norm": 1.9360688924789429, | |
| "learning_rate": 3.5558309975465256e-06, | |
| "loss": 0.600135326385498, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.586497890295359, | |
| "grad_norm": 2.5145275592803955, | |
| "learning_rate": 3.5476923057799165e-06, | |
| "loss": 0.4567859172821045, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.590717299578059, | |
| "grad_norm": 3.178347110748291, | |
| "learning_rate": 3.53955822619897e-06, | |
| "loss": 0.4825342893600464, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.5949367088607596, | |
| "grad_norm": 2.0541470050811768, | |
| "learning_rate": 3.531428802820521e-06, | |
| "loss": 1.0025891065597534, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.59915611814346, | |
| "grad_norm": 1.977526307106018, | |
| "learning_rate": 3.5233040796362038e-06, | |
| "loss": 0.5798022747039795, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.6033755274261603, | |
| "grad_norm": 4.426157474517822, | |
| "learning_rate": 3.515184100612222e-06, | |
| "loss": 0.5708905458450317, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.607594936708861, | |
| "grad_norm": 0.8931450843811035, | |
| "learning_rate": 3.5070689096891045e-06, | |
| "loss": 0.3289738893508911, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.611814345991561, | |
| "grad_norm": 2.223947048187256, | |
| "learning_rate": 3.4989585507814684e-06, | |
| "loss": 0.6438009142875671, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.6160337552742616, | |
| "grad_norm": 4.223023414611816, | |
| "learning_rate": 3.4908530677777846e-06, | |
| "loss": 0.8552393913269043, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.620253164556962, | |
| "grad_norm": 1.7854139804840088, | |
| "learning_rate": 3.482752504540138e-06, | |
| "loss": 0.4675080180168152, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.6244725738396624, | |
| "grad_norm": 2.395404577255249, | |
| "learning_rate": 3.474656904903991e-06, | |
| "loss": 0.35858801007270813, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.628691983122363, | |
| "grad_norm": 4.765064239501953, | |
| "learning_rate": 3.466566312677946e-06, | |
| "loss": 0.3300427198410034, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.632911392405063, | |
| "grad_norm": 2.5676372051239014, | |
| "learning_rate": 3.458480771643507e-06, | |
| "loss": 0.7667765617370605, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.6371308016877637, | |
| "grad_norm": 5.16605281829834, | |
| "learning_rate": 3.4504003255548454e-06, | |
| "loss": 0.3946114182472229, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.6413502109704643, | |
| "grad_norm": 15.37302303314209, | |
| "learning_rate": 3.44232501813856e-06, | |
| "loss": 0.31146499514579773, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.6455696202531644, | |
| "grad_norm": 11.36103343963623, | |
| "learning_rate": 3.4342548930934447e-06, | |
| "loss": 0.7634888887405396, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.649789029535865, | |
| "grad_norm": 8.875736236572266, | |
| "learning_rate": 3.426189994090249e-06, | |
| "loss": 0.20420894026756287, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.6540084388185656, | |
| "grad_norm": 6.140727996826172, | |
| "learning_rate": 3.418130364771438e-06, | |
| "loss": 0.7999590635299683, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.6582278481012658, | |
| "grad_norm": 3.5605967044830322, | |
| "learning_rate": 3.4100760487509677e-06, | |
| "loss": 0.22376415133476257, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.6624472573839664, | |
| "grad_norm": 2.0715627670288086, | |
| "learning_rate": 3.4020270896140338e-06, | |
| "loss": 0.30320820212364197, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.6666666666666665, | |
| "grad_norm": 1.8760136365890503, | |
| "learning_rate": 3.3939835309168494e-06, | |
| "loss": 0.5345732569694519, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.670886075949367, | |
| "grad_norm": 5.121237277984619, | |
| "learning_rate": 3.385945416186402e-06, | |
| "loss": 0.25805044174194336, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.6751054852320673, | |
| "grad_norm": 1.5474026203155518, | |
| "learning_rate": 3.377912788920218e-06, | |
| "loss": 0.811784029006958, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.679324894514768, | |
| "grad_norm": 1.5448044538497925, | |
| "learning_rate": 3.3698856925861306e-06, | |
| "loss": 0.4863538146018982, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.6835443037974684, | |
| "grad_norm": 4.263956069946289, | |
| "learning_rate": 3.361864170622043e-06, | |
| "loss": 0.38036102056503296, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 3.6877637130801686, | |
| "grad_norm": 2.2748067378997803, | |
| "learning_rate": 3.3538482664356938e-06, | |
| "loss": 0.8080613613128662, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 3.691983122362869, | |
| "grad_norm": 2.969224214553833, | |
| "learning_rate": 3.345838023404419e-06, | |
| "loss": 0.7013299465179443, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.6962025316455698, | |
| "grad_norm": 2.08278751373291, | |
| "learning_rate": 3.3378334848749193e-06, | |
| "loss": 0.6944292187690735, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 3.70042194092827, | |
| "grad_norm": 2.476149797439575, | |
| "learning_rate": 3.329834694163032e-06, | |
| "loss": 0.8725452423095703, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 3.7046413502109705, | |
| "grad_norm": 18.001956939697266, | |
| "learning_rate": 3.321841694553482e-06, | |
| "loss": 0.6215965747833252, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 3.708860759493671, | |
| "grad_norm": 5.473003387451172, | |
| "learning_rate": 3.3138545292996636e-06, | |
| "loss": 0.7003090977668762, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 3.7130801687763713, | |
| "grad_norm": 24.688859939575195, | |
| "learning_rate": 3.305873241623395e-06, | |
| "loss": 0.6492451429367065, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.717299578059072, | |
| "grad_norm": 5.999505996704102, | |
| "learning_rate": 3.2978978747146886e-06, | |
| "loss": 0.27890729904174805, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 3.721518987341772, | |
| "grad_norm": 6.559441566467285, | |
| "learning_rate": 3.28992847173152e-06, | |
| "loss": 0.382098525762558, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 3.7257383966244726, | |
| "grad_norm": 2.456238269805908, | |
| "learning_rate": 3.2819650757995882e-06, | |
| "loss": 0.7096537947654724, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 3.7299578059071727, | |
| "grad_norm": 8.340387344360352, | |
| "learning_rate": 3.2740077300120874e-06, | |
| "loss": 0.5058803558349609, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 3.7341772151898733, | |
| "grad_norm": 2.959620952606201, | |
| "learning_rate": 3.2660564774294698e-06, | |
| "loss": 0.5690555572509766, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.738396624472574, | |
| "grad_norm": 1.9937338829040527, | |
| "learning_rate": 3.2581113610792186e-06, | |
| "loss": 0.6931591033935547, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 3.742616033755274, | |
| "grad_norm": 1.217617392539978, | |
| "learning_rate": 3.2501724239556093e-06, | |
| "loss": 0.20921635627746582, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 3.7468354430379747, | |
| "grad_norm": 0.2667827904224396, | |
| "learning_rate": 3.2422397090194763e-06, | |
| "loss": 0.3903126120567322, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 3.7510548523206753, | |
| "grad_norm": 3.232510566711426, | |
| "learning_rate": 3.2343132591979893e-06, | |
| "loss": 0.6602214574813843, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 3.7552742616033754, | |
| "grad_norm": 1.6198503971099854, | |
| "learning_rate": 3.2263931173844077e-06, | |
| "loss": 0.7261852025985718, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.759493670886076, | |
| "grad_norm": 2.057166814804077, | |
| "learning_rate": 3.2184793264378635e-06, | |
| "loss": 0.6649327278137207, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 3.7637130801687766, | |
| "grad_norm": 2.829087495803833, | |
| "learning_rate": 3.210571929183115e-06, | |
| "loss": 0.6382551789283752, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.7679324894514767, | |
| "grad_norm": 2.4798736572265625, | |
| "learning_rate": 3.2026709684103248e-06, | |
| "loss": 0.6738499402999878, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 3.7721518987341773, | |
| "grad_norm": 10.70611572265625, | |
| "learning_rate": 3.194776486874825e-06, | |
| "loss": 0.19844934344291687, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 3.7763713080168775, | |
| "grad_norm": 4.095230579376221, | |
| "learning_rate": 3.186888527296885e-06, | |
| "loss": 0.5124695301055908, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.780590717299578, | |
| "grad_norm": 2.3026554584503174, | |
| "learning_rate": 3.1790071323614794e-06, | |
| "loss": 0.6329219937324524, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 3.7848101265822782, | |
| "grad_norm": 5.607376575469971, | |
| "learning_rate": 3.1711323447180637e-06, | |
| "loss": 0.5636836290359497, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 3.789029535864979, | |
| "grad_norm": 2.444586992263794, | |
| "learning_rate": 3.163264206980336e-06, | |
| "loss": 0.6737933158874512, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 3.7932489451476794, | |
| "grad_norm": 4.4093451499938965, | |
| "learning_rate": 3.155402761726006e-06, | |
| "loss": 0.8205442428588867, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 3.7974683544303796, | |
| "grad_norm": 2.5362284183502197, | |
| "learning_rate": 3.1475480514965733e-06, | |
| "loss": 0.7304701209068298, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.80168776371308, | |
| "grad_norm": 1.82133150100708, | |
| "learning_rate": 3.139700118797088e-06, | |
| "loss": 0.7703126072883606, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 3.8059071729957807, | |
| "grad_norm": 1.8650217056274414, | |
| "learning_rate": 3.131859006095926e-06, | |
| "loss": 0.45118463039398193, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 3.810126582278481, | |
| "grad_norm": 17.568998336791992, | |
| "learning_rate": 3.124024755824554e-06, | |
| "loss": 0.2017352283000946, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 3.8143459915611815, | |
| "grad_norm": 3.5482592582702637, | |
| "learning_rate": 3.1161974103773066e-06, | |
| "loss": 0.728500485420227, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 3.818565400843882, | |
| "grad_norm": 2.8701515197753906, | |
| "learning_rate": 3.108377012111154e-06, | |
| "loss": 0.7613662481307983, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.8227848101265822, | |
| "grad_norm": 3.2422940731048584, | |
| "learning_rate": 3.10056360334547e-06, | |
| "loss": 0.37432968616485596, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 3.827004219409283, | |
| "grad_norm": 1.7439910173416138, | |
| "learning_rate": 3.0927572263618062e-06, | |
| "loss": 0.7083200216293335, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 3.831223628691983, | |
| "grad_norm": 3.794440746307373, | |
| "learning_rate": 3.084957923403662e-06, | |
| "loss": 0.7253645658493042, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 3.8354430379746836, | |
| "grad_norm": 8.467775344848633, | |
| "learning_rate": 3.0771657366762586e-06, | |
| "loss": 0.6260569095611572, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 3.8396624472573837, | |
| "grad_norm": 6.704847812652588, | |
| "learning_rate": 3.069380708346305e-06, | |
| "loss": 0.5025795698165894, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.8438818565400843, | |
| "grad_norm": 1.6902318000793457, | |
| "learning_rate": 3.061602880541776e-06, | |
| "loss": 0.6335855722427368, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 3.848101265822785, | |
| "grad_norm": 3.424485206604004, | |
| "learning_rate": 3.0538322953516807e-06, | |
| "loss": 0.5025821328163147, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 3.852320675105485, | |
| "grad_norm": 3.550658941268921, | |
| "learning_rate": 3.046068994825832e-06, | |
| "loss": 0.7374518513679504, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 3.8565400843881856, | |
| "grad_norm": 4.101608753204346, | |
| "learning_rate": 3.0383130209746287e-06, | |
| "loss": 0.7142576575279236, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 3.8607594936708862, | |
| "grad_norm": 1.8561471700668335, | |
| "learning_rate": 3.0305644157688175e-06, | |
| "loss": 0.6271055936813354, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.8649789029535864, | |
| "grad_norm": 19.705900192260742, | |
| "learning_rate": 3.022823221139272e-06, | |
| "loss": 0.3404349088668823, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 3.869198312236287, | |
| "grad_norm": 9.467658042907715, | |
| "learning_rate": 3.0150894789767627e-06, | |
| "loss": 0.5793641805648804, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 3.8734177215189876, | |
| "grad_norm": 6.555062294006348, | |
| "learning_rate": 3.007363231131733e-06, | |
| "loss": 0.5979642868041992, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 3.8776371308016877, | |
| "grad_norm": 12.590143203735352, | |
| "learning_rate": 2.9996445194140723e-06, | |
| "loss": 0.49834197759628296, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 3.8818565400843883, | |
| "grad_norm": 11.55475902557373, | |
| "learning_rate": 2.9919333855928875e-06, | |
| "loss": 0.7811706066131592, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.8860759493670884, | |
| "grad_norm": 1.6321529150009155, | |
| "learning_rate": 2.9842298713962795e-06, | |
| "loss": 0.4640495777130127, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 3.890295358649789, | |
| "grad_norm": 1.108053207397461, | |
| "learning_rate": 2.9765340185111134e-06, | |
| "loss": 0.5240273475646973, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 3.894514767932489, | |
| "grad_norm": 1.4660061597824097, | |
| "learning_rate": 2.968845868582799e-06, | |
| "loss": 0.6336109042167664, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 3.8987341772151898, | |
| "grad_norm": 7.276936054229736, | |
| "learning_rate": 2.961165463215062e-06, | |
| "loss": 0.48461082577705383, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 3.9029535864978904, | |
| "grad_norm": 1.9613572359085083, | |
| "learning_rate": 2.9534928439697186e-06, | |
| "loss": 0.6677671670913696, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.9071729957805905, | |
| "grad_norm": 1.520216464996338, | |
| "learning_rate": 2.9458280523664493e-06, | |
| "loss": 0.8395076990127563, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 3.911392405063291, | |
| "grad_norm": 3.0033154487609863, | |
| "learning_rate": 2.938171129882579e-06, | |
| "loss": 0.6944848299026489, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 3.9156118143459917, | |
| "grad_norm": 1.6401822566986084, | |
| "learning_rate": 2.930522117952847e-06, | |
| "loss": 0.7018183469772339, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 3.919831223628692, | |
| "grad_norm": 2.8167307376861572, | |
| "learning_rate": 2.922881057969188e-06, | |
| "loss": 0.7709340453147888, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 3.9240506329113924, | |
| "grad_norm": 2.7081515789031982, | |
| "learning_rate": 2.9152479912805028e-06, | |
| "loss": 0.7548224925994873, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.928270042194093, | |
| "grad_norm": 3.791499137878418, | |
| "learning_rate": 2.907622959192439e-06, | |
| "loss": 0.5371965169906616, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 3.932489451476793, | |
| "grad_norm": 2.503772497177124, | |
| "learning_rate": 2.9000060029671644e-06, | |
| "loss": 0.5366585850715637, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 3.9367088607594938, | |
| "grad_norm": 6.065065383911133, | |
| "learning_rate": 2.8923971638231466e-06, | |
| "loss": 0.9665102958679199, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 3.9409282700421944, | |
| "grad_norm": 2.7202789783477783, | |
| "learning_rate": 2.884796482934927e-06, | |
| "loss": 0.7356393337249756, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 3.9451476793248945, | |
| "grad_norm": 3.0500247478485107, | |
| "learning_rate": 2.877204001432899e-06, | |
| "loss": 0.5012904405593872, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 3.9493670886075947, | |
| "grad_norm": 0.8024043440818787, | |
| "learning_rate": 2.869619760403089e-06, | |
| "loss": 0.3538365662097931, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 3.9535864978902953, | |
| "grad_norm": 191.6532745361328, | |
| "learning_rate": 2.8620438008869264e-06, | |
| "loss": 0.434034138917923, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 3.957805907172996, | |
| "grad_norm": 2.8688251972198486, | |
| "learning_rate": 2.8544761638810277e-06, | |
| "loss": 0.6301808953285217, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 3.962025316455696, | |
| "grad_norm": 4.15130090713501, | |
| "learning_rate": 2.8469168903369733e-06, | |
| "loss": 0.596470832824707, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 3.9662447257383966, | |
| "grad_norm": 2.7118093967437744, | |
| "learning_rate": 2.8393660211610864e-06, | |
| "loss": 0.4589231610298157, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.970464135021097, | |
| "grad_norm": 2.9497010707855225, | |
| "learning_rate": 2.8318235972142075e-06, | |
| "loss": 0.7778608798980713, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 3.9746835443037973, | |
| "grad_norm": 10.08464241027832, | |
| "learning_rate": 2.824289659311481e-06, | |
| "loss": 0.3298872113227844, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 3.978902953586498, | |
| "grad_norm": 2.5433638095855713, | |
| "learning_rate": 2.8167642482221274e-06, | |
| "loss": 0.6300213932991028, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 3.9831223628691985, | |
| "grad_norm": 11.90830135345459, | |
| "learning_rate": 2.8092474046692227e-06, | |
| "loss": 0.4418677091598511, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 3.9873417721518987, | |
| "grad_norm": 4.765434741973877, | |
| "learning_rate": 2.801739169329486e-06, | |
| "loss": 0.6927688121795654, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 3.9915611814345993, | |
| "grad_norm": 6.100020408630371, | |
| "learning_rate": 2.7942395828330477e-06, | |
| "loss": 0.5399014949798584, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 3.9957805907173, | |
| "grad_norm": 1.7746268510818481, | |
| "learning_rate": 2.7867486857632417e-06, | |
| "loss": 0.7801375389099121, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.9672950506210327, | |
| "learning_rate": 2.7792665186563753e-06, | |
| "loss": 0.6976273059844971, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 4.0042194092827, | |
| "grad_norm": 2.0892131328582764, | |
| "learning_rate": 2.771793122001518e-06, | |
| "loss": 0.5950413942337036, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 4.008438818565401, | |
| "grad_norm": 3.418523073196411, | |
| "learning_rate": 2.764328536240274e-06, | |
| "loss": 0.48346221446990967, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.012658227848101, | |
| "grad_norm": 2.4079160690307617, | |
| "learning_rate": 2.7568728017665734e-06, | |
| "loss": 0.5231744647026062, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 4.0168776371308015, | |
| "grad_norm": 10.42201042175293, | |
| "learning_rate": 2.749425958926447e-06, | |
| "loss": 0.36587753891944885, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 4.0210970464135025, | |
| "grad_norm": 0.36827781796455383, | |
| "learning_rate": 2.7419880480178055e-06, | |
| "loss": 0.18869513273239136, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 4.025316455696203, | |
| "grad_norm": 2.4577043056488037, | |
| "learning_rate": 2.734559109290229e-06, | |
| "loss": 0.5424115061759949, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 4.029535864978903, | |
| "grad_norm": 7.176480293273926, | |
| "learning_rate": 2.7271391829447447e-06, | |
| "loss": 0.09614966064691544, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 4.033755274261603, | |
| "grad_norm": 2.078049898147583, | |
| "learning_rate": 2.71972830913361e-06, | |
| "loss": 0.5041449069976807, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 4.037974683544304, | |
| "grad_norm": 3.0364325046539307, | |
| "learning_rate": 2.712326527960096e-06, | |
| "loss": 0.6174269914627075, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 4.042194092827004, | |
| "grad_norm": 0.6836444139480591, | |
| "learning_rate": 2.704933879478268e-06, | |
| "loss": 0.3205277919769287, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 4.046413502109704, | |
| "grad_norm": 6.195359230041504, | |
| "learning_rate": 2.697550403692773e-06, | |
| "loss": 0.14734962582588196, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 4.050632911392405, | |
| "grad_norm": 2.888777732849121, | |
| "learning_rate": 2.69017614055862e-06, | |
| "loss": 0.5565149784088135, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 4.0548523206751055, | |
| "grad_norm": 12.064739227294922, | |
| "learning_rate": 2.682811129980962e-06, | |
| "loss": 0.47878050804138184, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 4.059071729957806, | |
| "grad_norm": 1.9031803607940674, | |
| "learning_rate": 2.6754554118148857e-06, | |
| "loss": 0.3945463299751282, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 4.063291139240507, | |
| "grad_norm": 6.993194103240967, | |
| "learning_rate": 2.668109025865191e-06, | |
| "loss": 0.2721104919910431, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 4.067510548523207, | |
| "grad_norm": 7.187300205230713, | |
| "learning_rate": 2.660772011886178e-06, | |
| "loss": 0.572750449180603, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 4.071729957805907, | |
| "grad_norm": 9.433985710144043, | |
| "learning_rate": 2.6534444095814334e-06, | |
| "loss": 0.14224952459335327, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 4.075949367088608, | |
| "grad_norm": 6.624326705932617, | |
| "learning_rate": 2.646126258603612e-06, | |
| "loss": 0.429046630859375, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 4.080168776371308, | |
| "grad_norm": 5.319462776184082, | |
| "learning_rate": 2.6388175985542193e-06, | |
| "loss": 0.4175564646720886, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 4.084388185654008, | |
| "grad_norm": 7.918082237243652, | |
| "learning_rate": 2.631518468983407e-06, | |
| "loss": 0.5208654403686523, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 4.0886075949367084, | |
| "grad_norm": 2.524588108062744, | |
| "learning_rate": 2.6242289093897533e-06, | |
| "loss": 0.30576610565185547, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 4.0928270042194095, | |
| "grad_norm": 17.760915756225586, | |
| "learning_rate": 2.6169489592200457e-06, | |
| "loss": 0.3638699948787689, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 4.09704641350211, | |
| "grad_norm": 3.6685545444488525, | |
| "learning_rate": 2.6096786578690738e-06, | |
| "loss": 0.2502339482307434, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 4.10126582278481, | |
| "grad_norm": 1.735503077507019, | |
| "learning_rate": 2.6024180446794133e-06, | |
| "loss": 0.2844234108924866, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 4.105485232067511, | |
| "grad_norm": 2.3414032459259033, | |
| "learning_rate": 2.5951671589412127e-06, | |
| "loss": 0.5370857119560242, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 4.109704641350211, | |
| "grad_norm": 9.196849822998047, | |
| "learning_rate": 2.587926039891983e-06, | |
| "loss": 0.45078617334365845, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 4.113924050632911, | |
| "grad_norm": 0.8639876842498779, | |
| "learning_rate": 2.580694726716379e-06, | |
| "loss": 0.3761923313140869, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 4.118143459915612, | |
| "grad_norm": 1.6307646036148071, | |
| "learning_rate": 2.573473258545997e-06, | |
| "loss": 0.44236212968826294, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 4.122362869198312, | |
| "grad_norm": 4.56338357925415, | |
| "learning_rate": 2.566261674459156e-06, | |
| "loss": 0.707075834274292, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 4.1265822784810124, | |
| "grad_norm": 3.0363290309906006, | |
| "learning_rate": 2.5590600134806873e-06, | |
| "loss": 0.12159548699855804, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 4.1308016877637135, | |
| "grad_norm": 2.8413619995117188, | |
| "learning_rate": 2.551868314581726e-06, | |
| "loss": 0.6649860739707947, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 4.135021097046414, | |
| "grad_norm": 4.109986782073975, | |
| "learning_rate": 2.544686616679497e-06, | |
| "loss": 0.6205018758773804, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 4.139240506329114, | |
| "grad_norm": 6.5747504234313965, | |
| "learning_rate": 2.537514958637107e-06, | |
| "loss": 0.37222298979759216, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 4.143459915611814, | |
| "grad_norm": 2.581211566925049, | |
| "learning_rate": 2.5303533792633306e-06, | |
| "loss": 0.4583626687526703, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 4.147679324894515, | |
| "grad_norm": 0.3391718864440918, | |
| "learning_rate": 2.5232019173124043e-06, | |
| "loss": 0.24545279145240784, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 4.151898734177215, | |
| "grad_norm": 3.354196071624756, | |
| "learning_rate": 2.5160606114838158e-06, | |
| "loss": 0.6107680797576904, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 4.156118143459915, | |
| "grad_norm": 1.9463728666305542, | |
| "learning_rate": 2.5089295004220927e-06, | |
| "loss": 0.41494786739349365, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 4.160337552742616, | |
| "grad_norm": 3.8241024017333984, | |
| "learning_rate": 2.5018086227165937e-06, | |
| "loss": 0.5631481409072876, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 4.1645569620253164, | |
| "grad_norm": 3.7971303462982178, | |
| "learning_rate": 2.494698016901302e-06, | |
| "loss": 0.13252116739749908, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 4.168776371308017, | |
| "grad_norm": 5.456217288970947, | |
| "learning_rate": 2.487597721454616e-06, | |
| "loss": 0.4099525213241577, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 4.172995780590718, | |
| "grad_norm": 18.906333923339844, | |
| "learning_rate": 2.4805077747991403e-06, | |
| "loss": 0.33811259269714355, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 4.177215189873418, | |
| "grad_norm": 11.150616645812988, | |
| "learning_rate": 2.473428215301474e-06, | |
| "loss": 0.2853623032569885, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 4.181434599156118, | |
| "grad_norm": 23.042560577392578, | |
| "learning_rate": 2.466359081272012e-06, | |
| "loss": 0.3581426441669464, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 4.185654008438819, | |
| "grad_norm": 4.002007007598877, | |
| "learning_rate": 2.459300410964731e-06, | |
| "loss": 0.3014911413192749, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 4.189873417721519, | |
| "grad_norm": 6.566624164581299, | |
| "learning_rate": 2.452252242576984e-06, | |
| "loss": 0.11508725583553314, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 4.194092827004219, | |
| "grad_norm": 18.410457611083984, | |
| "learning_rate": 2.445214614249294e-06, | |
| "loss": 0.3810286521911621, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 4.198312236286919, | |
| "grad_norm": 6.431080341339111, | |
| "learning_rate": 2.4381875640651466e-06, | |
| "loss": 0.20014682412147522, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 4.2025316455696204, | |
| "grad_norm": 3.2412610054016113, | |
| "learning_rate": 2.431171130050788e-06, | |
| "loss": 0.6001700162887573, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 4.206751054852321, | |
| "grad_norm": 3.1228854656219482, | |
| "learning_rate": 2.4241653501750117e-06, | |
| "loss": 0.29799264669418335, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 4.210970464135021, | |
| "grad_norm": 2.178508996963501, | |
| "learning_rate": 2.4171702623489588e-06, | |
| "loss": 0.5007591247558594, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 4.215189873417722, | |
| "grad_norm": 7.447211265563965, | |
| "learning_rate": 2.410185904425912e-06, | |
| "loss": 0.7163572907447815, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 4.219409282700422, | |
| "grad_norm": 2.8777246475219727, | |
| "learning_rate": 2.403212314201088e-06, | |
| "loss": 0.5820721387863159, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.223628691983122, | |
| "grad_norm": 4.454619884490967, | |
| "learning_rate": 2.3962495294114403e-06, | |
| "loss": 0.41988158226013184, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 4.227848101265823, | |
| "grad_norm": 4.4292426109313965, | |
| "learning_rate": 2.3892975877354452e-06, | |
| "loss": 0.14902547001838684, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 4.232067510548523, | |
| "grad_norm": 2.666948080062866, | |
| "learning_rate": 2.3823565267929036e-06, | |
| "loss": 0.6181389093399048, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 4.236286919831223, | |
| "grad_norm": 3.547452688217163, | |
| "learning_rate": 2.375426384144735e-06, | |
| "loss": 0.33217155933380127, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 4.2405063291139244, | |
| "grad_norm": 2.134594440460205, | |
| "learning_rate": 2.368507197292777e-06, | |
| "loss": 0.4793064594268799, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 4.244725738396625, | |
| "grad_norm": 25.654151916503906, | |
| "learning_rate": 2.361599003679582e-06, | |
| "loss": 0.13546811044216156, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 4.248945147679325, | |
| "grad_norm": 4.229970455169678, | |
| "learning_rate": 2.3547018406882104e-06, | |
| "loss": 0.3434482216835022, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 4.253164556962025, | |
| "grad_norm": 4.361436367034912, | |
| "learning_rate": 2.347815745642035e-06, | |
| "loss": 0.6057535409927368, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 4.257383966244726, | |
| "grad_norm": 17.874441146850586, | |
| "learning_rate": 2.340940755804532e-06, | |
| "loss": 0.5280637741088867, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 4.261603375527426, | |
| "grad_norm": 8.038070678710938, | |
| "learning_rate": 2.334076908379086e-06, | |
| "loss": 0.07331550121307373, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 4.265822784810126, | |
| "grad_norm": 0.6873889565467834, | |
| "learning_rate": 2.327224240508784e-06, | |
| "loss": 0.15723557770252228, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 4.270042194092827, | |
| "grad_norm": 4.693041801452637, | |
| "learning_rate": 2.3203827892762136e-06, | |
| "loss": 0.45733606815338135, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 4.274261603375527, | |
| "grad_norm": 21.652511596679688, | |
| "learning_rate": 2.313552591703267e-06, | |
| "loss": 0.20987409353256226, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 4.2784810126582276, | |
| "grad_norm": 1.620386004447937, | |
| "learning_rate": 2.3067336847509405e-06, | |
| "loss": 0.18322864174842834, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 4.282700421940929, | |
| "grad_norm": 0.5709916949272156, | |
| "learning_rate": 2.2999261053191264e-06, | |
| "loss": 0.264180064201355, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 4.286919831223629, | |
| "grad_norm": 6.232452392578125, | |
| "learning_rate": 2.2931298902464242e-06, | |
| "loss": 0.581986129283905, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 4.291139240506329, | |
| "grad_norm": 2.427851438522339, | |
| "learning_rate": 2.286345076309935e-06, | |
| "loss": 0.08267831802368164, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 4.29535864978903, | |
| "grad_norm": 7.5021843910217285, | |
| "learning_rate": 2.279571700225061e-06, | |
| "loss": 0.3914198875427246, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 4.29957805907173, | |
| "grad_norm": 17.116886138916016, | |
| "learning_rate": 2.272809798645313e-06, | |
| "loss": 0.4527243375778198, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 4.30379746835443, | |
| "grad_norm": 9.568516731262207, | |
| "learning_rate": 2.2660594081621068e-06, | |
| "loss": 0.5110298991203308, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 4.308016877637131, | |
| "grad_norm": 2.8458101749420166, | |
| "learning_rate": 2.259320565304568e-06, | |
| "loss": 0.3989183306694031, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 4.312236286919831, | |
| "grad_norm": 3.3316569328308105, | |
| "learning_rate": 2.2525933065393316e-06, | |
| "loss": 0.4240986406803131, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 4.3164556962025316, | |
| "grad_norm": 3.5117201805114746, | |
| "learning_rate": 2.2458776682703478e-06, | |
| "loss": 0.5510097146034241, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 4.320675105485232, | |
| "grad_norm": 2.211899757385254, | |
| "learning_rate": 2.2391736868386826e-06, | |
| "loss": 0.47137928009033203, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 4.324894514767933, | |
| "grad_norm": 2.8007261753082275, | |
| "learning_rate": 2.2324813985223236e-06, | |
| "loss": 0.13788414001464844, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 4.329113924050633, | |
| "grad_norm": 5.883923530578613, | |
| "learning_rate": 2.2258008395359814e-06, | |
| "loss": 0.21625080704689026, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 4.333333333333333, | |
| "grad_norm": 2.6445043087005615, | |
| "learning_rate": 2.2191320460308913e-06, | |
| "loss": 0.43525630235671997, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 4.337552742616034, | |
| "grad_norm": 4.206122875213623, | |
| "learning_rate": 2.2124750540946258e-06, | |
| "loss": 0.22658753395080566, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 4.341772151898734, | |
| "grad_norm": 7.528255462646484, | |
| "learning_rate": 2.2058298997508916e-06, | |
| "loss": 0.19083625078201294, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 4.345991561181434, | |
| "grad_norm": 2.3334925174713135, | |
| "learning_rate": 2.1991966189593375e-06, | |
| "loss": 0.5279438495635986, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 4.350210970464135, | |
| "grad_norm": 3.07808780670166, | |
| "learning_rate": 2.1925752476153598e-06, | |
| "loss": 0.5324735641479492, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 4.3544303797468356, | |
| "grad_norm": 7.293347358703613, | |
| "learning_rate": 2.1859658215499094e-06, | |
| "loss": 0.4442484378814697, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 4.358649789029536, | |
| "grad_norm": 3.767479419708252, | |
| "learning_rate": 2.1793683765292943e-06, | |
| "loss": 0.6478234529495239, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 4.362869198312236, | |
| "grad_norm": 1.7366708517074585, | |
| "learning_rate": 2.172782948254989e-06, | |
| "loss": 0.22714099287986755, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 4.367088607594937, | |
| "grad_norm": 2.4501614570617676, | |
| "learning_rate": 2.1662095723634387e-06, | |
| "loss": 0.7067612409591675, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 4.371308016877637, | |
| "grad_norm": 2.0209014415740967, | |
| "learning_rate": 2.159648284425872e-06, | |
| "loss": 0.6720374226570129, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 4.375527426160337, | |
| "grad_norm": 2.6613192558288574, | |
| "learning_rate": 2.1530991199481e-06, | |
| "loss": 0.46383750438690186, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 4.379746835443038, | |
| "grad_norm": 10.552399635314941, | |
| "learning_rate": 2.1465621143703354e-06, | |
| "loss": 0.4360678195953369, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 4.383966244725738, | |
| "grad_norm": 2.3267464637756348, | |
| "learning_rate": 2.1400373030669878e-06, | |
| "loss": 0.32150259613990784, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 4.3881856540084385, | |
| "grad_norm": 11.424999237060547, | |
| "learning_rate": 2.1335247213464816e-06, | |
| "loss": 0.6122124195098877, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 4.3924050632911396, | |
| "grad_norm": 1.8929657936096191, | |
| "learning_rate": 2.1270244044510596e-06, | |
| "loss": 0.29143026471138, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 4.39662447257384, | |
| "grad_norm": 5.961505889892578, | |
| "learning_rate": 2.120536387556597e-06, | |
| "loss": 0.44119709730148315, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 4.40084388185654, | |
| "grad_norm": 4.30864953994751, | |
| "learning_rate": 2.114060705772409e-06, | |
| "loss": 0.7014176845550537, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 4.405063291139241, | |
| "grad_norm": 2.612563371658325, | |
| "learning_rate": 2.107597394141057e-06, | |
| "loss": 0.5459550023078918, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 4.409282700421941, | |
| "grad_norm": 2.4660723209381104, | |
| "learning_rate": 2.1011464876381663e-06, | |
| "loss": 0.46325892210006714, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 4.413502109704641, | |
| "grad_norm": 4.131664276123047, | |
| "learning_rate": 2.0947080211722317e-06, | |
| "loss": 0.4953617453575134, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 4.417721518987342, | |
| "grad_norm": 1.9574029445648193, | |
| "learning_rate": 2.0882820295844285e-06, | |
| "loss": 0.5186775922775269, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 4.421940928270042, | |
| "grad_norm": 3.840588092803955, | |
| "learning_rate": 2.081868547648429e-06, | |
| "loss": 0.31746193766593933, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 4.4261603375527425, | |
| "grad_norm": 2.727635383605957, | |
| "learning_rate": 2.0754676100702045e-06, | |
| "loss": 0.7108813524246216, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 4.430379746835443, | |
| "grad_norm": 4.424046039581299, | |
| "learning_rate": 2.0690792514878495e-06, | |
| "loss": 0.48461851477622986, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.434599156118144, | |
| "grad_norm": 2.04559326171875, | |
| "learning_rate": 2.0627035064713857e-06, | |
| "loss": 0.4159836769104004, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 4.438818565400844, | |
| "grad_norm": 1.8618910312652588, | |
| "learning_rate": 2.056340409522577e-06, | |
| "loss": 0.36201441287994385, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 4.443037974683544, | |
| "grad_norm": 2.5027105808258057, | |
| "learning_rate": 2.049989995074746e-06, | |
| "loss": 0.5959118008613586, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 4.447257383966245, | |
| "grad_norm": 11.552289009094238, | |
| "learning_rate": 2.043652297492583e-06, | |
| "loss": 0.3659658432006836, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 4.451476793248945, | |
| "grad_norm": 4.931119441986084, | |
| "learning_rate": 2.037327351071963e-06, | |
| "loss": 0.48589879274368286, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 4.455696202531645, | |
| "grad_norm": 4.232883930206299, | |
| "learning_rate": 2.031015190039759e-06, | |
| "loss": 0.5243382453918457, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 4.459915611814346, | |
| "grad_norm": 0.3998461961746216, | |
| "learning_rate": 2.0247158485536565e-06, | |
| "loss": 0.5077897310256958, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 4.4641350210970465, | |
| "grad_norm": 1.7971662282943726, | |
| "learning_rate": 2.0184293607019707e-06, | |
| "loss": 0.2606506943702698, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 4.468354430379747, | |
| "grad_norm": 2.3619842529296875, | |
| "learning_rate": 2.012155760503458e-06, | |
| "loss": 0.543289065361023, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 4.472573839662447, | |
| "grad_norm": 1.1135996580123901, | |
| "learning_rate": 2.0058950819071384e-06, | |
| "loss": 0.08294013142585754, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 4.476793248945148, | |
| "grad_norm": 6.450394630432129, | |
| "learning_rate": 1.999647358792103e-06, | |
| "loss": 0.27434927225112915, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 4.481012658227848, | |
| "grad_norm": 9.028851509094238, | |
| "learning_rate": 1.993412624967339e-06, | |
| "loss": 0.18550115823745728, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 4.485232067510548, | |
| "grad_norm": 3.7954587936401367, | |
| "learning_rate": 1.9871909141715433e-06, | |
| "loss": 0.25095483660697937, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 4.489451476793249, | |
| "grad_norm": 2.933171033859253, | |
| "learning_rate": 1.980982260072936e-06, | |
| "loss": 0.29782503843307495, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 4.493670886075949, | |
| "grad_norm": 5.5410475730896, | |
| "learning_rate": 1.9747866962690864e-06, | |
| "loss": 0.37597131729125977, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 4.4978902953586495, | |
| "grad_norm": 7.844871997833252, | |
| "learning_rate": 1.9686042562867247e-06, | |
| "loss": 0.591028094291687, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 4.5021097046413505, | |
| "grad_norm": 5.038850784301758, | |
| "learning_rate": 1.962434973581564e-06, | |
| "loss": 0.45768237113952637, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 4.506329113924051, | |
| "grad_norm": 6.212744235992432, | |
| "learning_rate": 1.9562788815381164e-06, | |
| "loss": 0.11174334585666656, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 4.510548523206751, | |
| "grad_norm": 1.0894521474838257, | |
| "learning_rate": 1.950136013469515e-06, | |
| "loss": 0.1324283480644226, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 4.514767932489452, | |
| "grad_norm": 5.448882579803467, | |
| "learning_rate": 1.944006402617333e-06, | |
| "loss": 0.13975661993026733, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 4.518987341772152, | |
| "grad_norm": 0.6249382495880127, | |
| "learning_rate": 1.937890082151403e-06, | |
| "loss": 0.32427144050598145, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 4.523206751054852, | |
| "grad_norm": 11.115077018737793, | |
| "learning_rate": 1.9317870851696356e-06, | |
| "loss": 0.10621624439954758, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 4.527426160337553, | |
| "grad_norm": 3.9892995357513428, | |
| "learning_rate": 1.9256974446978464e-06, | |
| "loss": 0.38272783160209656, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 4.531645569620253, | |
| "grad_norm": 2.471816301345825, | |
| "learning_rate": 1.919621193689569e-06, | |
| "loss": 0.3882204294204712, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 4.5358649789029535, | |
| "grad_norm": 0.016054954379796982, | |
| "learning_rate": 1.9135583650258873e-06, | |
| "loss": 0.2680031657218933, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 4.540084388185654, | |
| "grad_norm": 2.9162821769714355, | |
| "learning_rate": 1.9075089915152464e-06, | |
| "loss": 0.3421184718608856, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 4.544303797468355, | |
| "grad_norm": 3.484391212463379, | |
| "learning_rate": 1.9014731058932827e-06, | |
| "loss": 0.5047986507415771, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 4.548523206751055, | |
| "grad_norm": 1.9593169689178467, | |
| "learning_rate": 1.8954507408226409e-06, | |
| "loss": 0.46260231733322144, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 4.552742616033755, | |
| "grad_norm": 2.716538667678833, | |
| "learning_rate": 1.8894419288928027e-06, | |
| "loss": 0.5966385006904602, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 4.556962025316456, | |
| "grad_norm": 3.1801514625549316, | |
| "learning_rate": 1.883446702619909e-06, | |
| "loss": 0.37797853350639343, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 4.561181434599156, | |
| "grad_norm": 2.5519282817840576, | |
| "learning_rate": 1.8774650944465816e-06, | |
| "loss": 0.4353446960449219, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 4.565400843881856, | |
| "grad_norm": 3.090348243713379, | |
| "learning_rate": 1.8714971367417503e-06, | |
| "loss": 0.36761924624443054, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 4.569620253164557, | |
| "grad_norm": 2.526357889175415, | |
| "learning_rate": 1.8655428618004757e-06, | |
| "loss": 0.5436191558837891, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 4.5738396624472575, | |
| "grad_norm": 6.702995777130127, | |
| "learning_rate": 1.8596023018437756e-06, | |
| "loss": 0.5698112845420837, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 4.578059071729958, | |
| "grad_norm": 21.6278133392334, | |
| "learning_rate": 1.8536754890184514e-06, | |
| "loss": 0.12127143144607544, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 4.582278481012658, | |
| "grad_norm": 2.4644062519073486, | |
| "learning_rate": 1.8477624553969126e-06, | |
| "loss": 0.3572949767112732, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 4.586497890295359, | |
| "grad_norm": 4.449887275695801, | |
| "learning_rate": 1.8418632329770014e-06, | |
| "loss": 0.4991232752799988, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 4.590717299578059, | |
| "grad_norm": 2.306753396987915, | |
| "learning_rate": 1.8359778536818252e-06, | |
| "loss": 0.6089332103729248, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 4.594936708860759, | |
| "grad_norm": 9.263266563415527, | |
| "learning_rate": 1.8301063493595794e-06, | |
| "loss": 0.44372105598449707, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 4.59915611814346, | |
| "grad_norm": 1.82095205783844, | |
| "learning_rate": 1.824248751783377e-06, | |
| "loss": 0.3401510715484619, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 4.60337552742616, | |
| "grad_norm": 2.3795061111450195, | |
| "learning_rate": 1.8184050926510743e-06, | |
| "loss": 0.5080521106719971, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 4.6075949367088604, | |
| "grad_norm": 29.6896915435791, | |
| "learning_rate": 1.8125754035851018e-06, | |
| "loss": 0.0813543051481247, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 4.6118143459915615, | |
| "grad_norm": 3.2905502319335938, | |
| "learning_rate": 1.806759716132293e-06, | |
| "loss": 0.5500208139419556, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 4.616033755274262, | |
| "grad_norm": 2.1505532264709473, | |
| "learning_rate": 1.800958061763712e-06, | |
| "loss": 0.26043060421943665, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 4.620253164556962, | |
| "grad_norm": 2.0198612213134766, | |
| "learning_rate": 1.7951704718744841e-06, | |
| "loss": 0.6140601634979248, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 4.624472573839663, | |
| "grad_norm": 2.324085235595703, | |
| "learning_rate": 1.7893969777836265e-06, | |
| "loss": 0.20785805583000183, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 4.628691983122363, | |
| "grad_norm": 2.0707149505615234, | |
| "learning_rate": 1.7836376107338783e-06, | |
| "loss": 0.5573110580444336, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 4.632911392405063, | |
| "grad_norm": 3.6579232215881348, | |
| "learning_rate": 1.7778924018915302e-06, | |
| "loss": 0.2335490882396698, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 4.637130801687764, | |
| "grad_norm": 2.841978073120117, | |
| "learning_rate": 1.772161382346259e-06, | |
| "loss": 0.3419453501701355, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 4.641350210970464, | |
| "grad_norm": 2.595341682434082, | |
| "learning_rate": 1.7664445831109566e-06, | |
| "loss": 0.535962700843811, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.6455696202531644, | |
| "grad_norm": 2.8027384281158447, | |
| "learning_rate": 1.7607420351215616e-06, | |
| "loss": 0.4780561923980713, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 4.649789029535865, | |
| "grad_norm": 0.4611937701702118, | |
| "learning_rate": 1.7550537692368942e-06, | |
| "loss": 0.3059866428375244, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 4.654008438818566, | |
| "grad_norm": 1.5873767137527466, | |
| "learning_rate": 1.74937981623849e-06, | |
| "loss": 0.46250712871551514, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 4.658227848101266, | |
| "grad_norm": 1.6936619281768799, | |
| "learning_rate": 1.7437202068304287e-06, | |
| "loss": 0.452869713306427, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 4.662447257383966, | |
| "grad_norm": 2.697862386703491, | |
| "learning_rate": 1.7380749716391737e-06, | |
| "loss": 0.5035865306854248, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 4.666666666666667, | |
| "grad_norm": 3.739734649658203, | |
| "learning_rate": 1.7324441412134013e-06, | |
| "loss": 0.3993757367134094, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 4.670886075949367, | |
| "grad_norm": 7.6267852783203125, | |
| "learning_rate": 1.7268277460238397e-06, | |
| "loss": 0.3390964865684509, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 4.675105485232067, | |
| "grad_norm": 1.8734283447265625, | |
| "learning_rate": 1.7212258164631027e-06, | |
| "loss": 0.5280478000640869, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 4.679324894514768, | |
| "grad_norm": 6.668360710144043, | |
| "learning_rate": 1.7156383828455204e-06, | |
| "loss": 0.4059964418411255, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 4.6835443037974684, | |
| "grad_norm": 2.475369930267334, | |
| "learning_rate": 1.710065475406983e-06, | |
| "loss": 0.4801621735095978, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 4.687763713080169, | |
| "grad_norm": 2.3857297897338867, | |
| "learning_rate": 1.7045071243047728e-06, | |
| "loss": 0.0963069349527359, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 4.691983122362869, | |
| "grad_norm": 2.433400869369507, | |
| "learning_rate": 1.6989633596174029e-06, | |
| "loss": 0.47518980503082275, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 4.69620253164557, | |
| "grad_norm": 2.3119516372680664, | |
| "learning_rate": 1.6934342113444524e-06, | |
| "loss": 0.2933182120323181, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 4.70042194092827, | |
| "grad_norm": 12.671791076660156, | |
| "learning_rate": 1.6879197094064043e-06, | |
| "loss": 0.08877721428871155, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 4.70464135021097, | |
| "grad_norm": 2.207108497619629, | |
| "learning_rate": 1.6824198836444858e-06, | |
| "loss": 0.622957706451416, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 4.708860759493671, | |
| "grad_norm": 6.14840030670166, | |
| "learning_rate": 1.676934763820503e-06, | |
| "loss": 0.5102095603942871, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 4.713080168776371, | |
| "grad_norm": 4.767087936401367, | |
| "learning_rate": 1.6714643796166835e-06, | |
| "loss": 0.5292322635650635, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 4.717299578059071, | |
| "grad_norm": 0.11136994510889053, | |
| "learning_rate": 1.6660087606355153e-06, | |
| "loss": 0.31627708673477173, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 4.7215189873417724, | |
| "grad_norm": 4.295990467071533, | |
| "learning_rate": 1.6605679363995848e-06, | |
| "loss": 0.33531737327575684, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 4.725738396624473, | |
| "grad_norm": 6.078010559082031, | |
| "learning_rate": 1.6551419363514182e-06, | |
| "loss": 0.43265092372894287, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 4.729957805907173, | |
| "grad_norm": 3.5457258224487305, | |
| "learning_rate": 1.6497307898533218e-06, | |
| "loss": 0.6657654047012329, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 4.734177215189874, | |
| "grad_norm": 0.8174402713775635, | |
| "learning_rate": 1.6443345261872228e-06, | |
| "loss": 0.05635060369968414, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 4.738396624472574, | |
| "grad_norm": 2.277449369430542, | |
| "learning_rate": 1.6389531745545138e-06, | |
| "loss": 0.40952473878860474, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 4.742616033755274, | |
| "grad_norm": 2.1519405841827393, | |
| "learning_rate": 1.6335867640758876e-06, | |
| "loss": 0.6268118023872375, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 4.746835443037975, | |
| "grad_norm": 10.723348617553711, | |
| "learning_rate": 1.6282353237911881e-06, | |
| "loss": 0.08097459375858307, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 4.751054852320675, | |
| "grad_norm": 6.452489852905273, | |
| "learning_rate": 1.6228988826592484e-06, | |
| "loss": 0.5121550559997559, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 4.755274261603375, | |
| "grad_norm": 3.1199183464050293, | |
| "learning_rate": 1.617577469557735e-06, | |
| "loss": 0.417529433965683, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 4.759493670886076, | |
| "grad_norm": 1.670754075050354, | |
| "learning_rate": 1.6122711132829917e-06, | |
| "loss": 0.23685501515865326, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 4.763713080168777, | |
| "grad_norm": 3.9786603450775146, | |
| "learning_rate": 1.606979842549883e-06, | |
| "loss": 0.08441432565450668, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 4.767932489451477, | |
| "grad_norm": 3.4749438762664795, | |
| "learning_rate": 1.60170368599164e-06, | |
| "loss": 0.1604347825050354, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 4.772151898734177, | |
| "grad_norm": 2.590517044067383, | |
| "learning_rate": 1.5964426721597048e-06, | |
| "loss": 0.3043164014816284, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 4.776371308016878, | |
| "grad_norm": 3.320221185684204, | |
| "learning_rate": 1.5911968295235756e-06, | |
| "loss": 0.5432933568954468, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 4.780590717299578, | |
| "grad_norm": 6.788969993591309, | |
| "learning_rate": 1.5859661864706533e-06, | |
| "loss": 0.4840553402900696, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 4.784810126582278, | |
| "grad_norm": 5.413600444793701, | |
| "learning_rate": 1.5807507713060879e-06, | |
| "loss": 0.6614431142807007, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 4.789029535864979, | |
| "grad_norm": 3.4263274669647217, | |
| "learning_rate": 1.5755506122526248e-06, | |
| "loss": 0.4286192059516907, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 4.793248945147679, | |
| "grad_norm": 3.0580050945281982, | |
| "learning_rate": 1.5703657374504516e-06, | |
| "loss": 0.7800706624984741, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 4.7974683544303796, | |
| "grad_norm": 6.522762775421143, | |
| "learning_rate": 1.565196174957049e-06, | |
| "loss": 0.2070183902978897, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 4.80168776371308, | |
| "grad_norm": 1.439235806465149, | |
| "learning_rate": 1.5600419527470331e-06, | |
| "loss": 0.10173705220222473, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 4.805907172995781, | |
| "grad_norm": 3.4041426181793213, | |
| "learning_rate": 1.5549030987120095e-06, | |
| "loss": 0.3341836929321289, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 4.810126582278481, | |
| "grad_norm": 3.3857309818267822, | |
| "learning_rate": 1.5497796406604202e-06, | |
| "loss": 0.20992735028266907, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 4.814345991561181, | |
| "grad_norm": 2.2873666286468506, | |
| "learning_rate": 1.5446716063173935e-06, | |
| "loss": 0.424138605594635, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 4.818565400843882, | |
| "grad_norm": 8.471506118774414, | |
| "learning_rate": 1.5395790233245924e-06, | |
| "loss": 0.5139745473861694, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 4.822784810126582, | |
| "grad_norm": 2.079385757446289, | |
| "learning_rate": 1.5345019192400677e-06, | |
| "loss": 0.494828999042511, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 4.827004219409282, | |
| "grad_norm": 2.116379737854004, | |
| "learning_rate": 1.529440321538107e-06, | |
| "loss": 0.12557630240917206, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 4.831223628691983, | |
| "grad_norm": 5.065046787261963, | |
| "learning_rate": 1.5243942576090872e-06, | |
| "loss": 0.6678446531295776, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 4.8354430379746836, | |
| "grad_norm": 2.4034457206726074, | |
| "learning_rate": 1.5193637547593231e-06, | |
| "loss": 0.4627326428890228, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 4.839662447257384, | |
| "grad_norm": 2.859379291534424, | |
| "learning_rate": 1.5143488402109239e-06, | |
| "loss": 0.44882258772850037, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 4.843881856540085, | |
| "grad_norm": 1.7705358266830444, | |
| "learning_rate": 1.509349541101646e-06, | |
| "loss": 0.4356788694858551, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 4.848101265822785, | |
| "grad_norm": 2.958854913711548, | |
| "learning_rate": 1.5043658844847414e-06, | |
| "loss": 0.7101269960403442, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 4.852320675105485, | |
| "grad_norm": 5.127024173736572, | |
| "learning_rate": 1.499397897328815e-06, | |
| "loss": 0.6652213931083679, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.856540084388186, | |
| "grad_norm": 0.7210382223129272, | |
| "learning_rate": 1.4944456065176785e-06, | |
| "loss": 0.23934832215309143, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 4.860759493670886, | |
| "grad_norm": 2.468538284301758, | |
| "learning_rate": 1.4895090388502043e-06, | |
| "loss": 0.26671305298805237, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 4.864978902953586, | |
| "grad_norm": 2.5370748043060303, | |
| "learning_rate": 1.4845882210401776e-06, | |
| "loss": 0.4928842782974243, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 4.869198312236287, | |
| "grad_norm": 2.779625654220581, | |
| "learning_rate": 1.479683179716159e-06, | |
| "loss": 0.2867523729801178, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 4.8734177215189876, | |
| "grad_norm": 4.043989658355713, | |
| "learning_rate": 1.4747939414213334e-06, | |
| "loss": 0.4452981948852539, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 4.877637130801688, | |
| "grad_norm": 2.612654209136963, | |
| "learning_rate": 1.4699205326133696e-06, | |
| "loss": 0.47218436002731323, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 4.881856540084388, | |
| "grad_norm": 4.8351593017578125, | |
| "learning_rate": 1.4650629796642774e-06, | |
| "loss": 0.5447877049446106, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 4.886075949367089, | |
| "grad_norm": 2.5699872970581055, | |
| "learning_rate": 1.460221308860262e-06, | |
| "loss": 0.5671508312225342, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 4.890295358649789, | |
| "grad_norm": 3.086909055709839, | |
| "learning_rate": 1.4553955464015868e-06, | |
| "loss": 0.39557531476020813, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 4.894514767932489, | |
| "grad_norm": 5.661040782928467, | |
| "learning_rate": 1.4505857184024262e-06, | |
| "loss": 0.44218748807907104, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 4.89873417721519, | |
| "grad_norm": 3.8085811138153076, | |
| "learning_rate": 1.4457918508907268e-06, | |
| "loss": 0.3575529456138611, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 4.90295358649789, | |
| "grad_norm": 2.3151283264160156, | |
| "learning_rate": 1.441013969808068e-06, | |
| "loss": 0.5917726755142212, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 4.9071729957805905, | |
| "grad_norm": 3.560556650161743, | |
| "learning_rate": 1.4362521010095186e-06, | |
| "loss": 0.33830514550209045, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 4.911392405063291, | |
| "grad_norm": 2.334346294403076, | |
| "learning_rate": 1.4315062702634997e-06, | |
| "loss": 0.4876287281513214, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 4.915611814345992, | |
| "grad_norm": 2.1452908515930176, | |
| "learning_rate": 1.426776503251643e-06, | |
| "loss": 0.6366673111915588, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 4.919831223628692, | |
| "grad_norm": 0.41547343134880066, | |
| "learning_rate": 1.4220628255686533e-06, | |
| "loss": 0.25237974524497986, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 4.924050632911392, | |
| "grad_norm": 54.84702682495117, | |
| "learning_rate": 1.4173652627221686e-06, | |
| "loss": 0.43499624729156494, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 4.928270042194093, | |
| "grad_norm": 2.2124152183532715, | |
| "learning_rate": 1.4126838401326243e-06, | |
| "loss": 0.5627238750457764, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 4.932489451476793, | |
| "grad_norm": 7.185441017150879, | |
| "learning_rate": 1.4080185831331126e-06, | |
| "loss": 0.25834035873413086, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 4.936708860759493, | |
| "grad_norm": 5.250967502593994, | |
| "learning_rate": 1.4033695169692485e-06, | |
| "loss": 0.2957782447338104, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 4.940928270042194, | |
| "grad_norm": 6.259135723114014, | |
| "learning_rate": 1.398736666799031e-06, | |
| "loss": 0.6378402709960938, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 4.9451476793248945, | |
| "grad_norm": 16.73641586303711, | |
| "learning_rate": 1.3941200576927088e-06, | |
| "loss": 0.35595816373825073, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 4.949367088607595, | |
| "grad_norm": 3.1287739276885986, | |
| "learning_rate": 1.3895197146326414e-06, | |
| "loss": 0.7204777002334595, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 4.953586497890296, | |
| "grad_norm": 2.49485445022583, | |
| "learning_rate": 1.3849356625131692e-06, | |
| "loss": 0.3135877847671509, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 4.957805907172996, | |
| "grad_norm": 3.383075714111328, | |
| "learning_rate": 1.3803679261404716e-06, | |
| "loss": 0.49237698316574097, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 4.962025316455696, | |
| "grad_norm": 2.2556025981903076, | |
| "learning_rate": 1.3758165302324397e-06, | |
| "loss": 0.16111743450164795, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 4.966244725738397, | |
| "grad_norm": 4.6603546142578125, | |
| "learning_rate": 1.3712814994185395e-06, | |
| "loss": 0.6392441987991333, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 4.970464135021097, | |
| "grad_norm": 12.963358879089355, | |
| "learning_rate": 1.366762858239679e-06, | |
| "loss": 0.35483598709106445, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 4.974683544303797, | |
| "grad_norm": 3.961883068084717, | |
| "learning_rate": 1.3622606311480729e-06, | |
| "loss": 0.5934839248657227, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 4.978902953586498, | |
| "grad_norm": 1.0137317180633545, | |
| "learning_rate": 1.3577748425071152e-06, | |
| "loss": 0.28861305117607117, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 4.9831223628691985, | |
| "grad_norm": 3.8444621562957764, | |
| "learning_rate": 1.3533055165912433e-06, | |
| "loss": 0.5528509616851807, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 4.987341772151899, | |
| "grad_norm": 12.442330360412598, | |
| "learning_rate": 1.3488526775858087e-06, | |
| "loss": 0.6871875524520874, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 4.991561181434599, | |
| "grad_norm": 2.018998622894287, | |
| "learning_rate": 1.3444163495869444e-06, | |
| "loss": 0.6601129770278931, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 4.9957805907173, | |
| "grad_norm": 4.588871002197266, | |
| "learning_rate": 1.3399965566014363e-06, | |
| "loss": 0.3472335934638977, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 20.997011184692383, | |
| "learning_rate": 1.3355933225465938e-06, | |
| "loss": 0.1488598883152008, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 5.0042194092827, | |
| "grad_norm": 1.974225401878357, | |
| "learning_rate": 1.3312066712501176e-06, | |
| "loss": 0.4649539589881897, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 5.008438818565401, | |
| "grad_norm": 2.101741075515747, | |
| "learning_rate": 1.3268366264499723e-06, | |
| "loss": 0.40653684735298157, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 5.012658227848101, | |
| "grad_norm": 3.2043211460113525, | |
| "learning_rate": 1.322483211794259e-06, | |
| "loss": 0.20105722546577454, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 5.0168776371308015, | |
| "grad_norm": 3.830211877822876, | |
| "learning_rate": 1.3181464508410858e-06, | |
| "loss": 0.4869913160800934, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 5.0210970464135025, | |
| "grad_norm": 3.2576708793640137, | |
| "learning_rate": 1.3138263670584392e-06, | |
| "loss": 0.3144640028476715, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 5.025316455696203, | |
| "grad_norm": 2.3268511295318604, | |
| "learning_rate": 1.309522983824061e-06, | |
| "loss": 0.4795665144920349, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 5.029535864978903, | |
| "grad_norm": 4.2797112464904785, | |
| "learning_rate": 1.3052363244253188e-06, | |
| "loss": 0.303976833820343, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 5.033755274261603, | |
| "grad_norm": 2.950766086578369, | |
| "learning_rate": 1.3009664120590806e-06, | |
| "loss": 0.2566067576408386, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 5.037974683544304, | |
| "grad_norm": 4.176736354827881, | |
| "learning_rate": 1.296713269831589e-06, | |
| "loss": 0.33072197437286377, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 5.042194092827004, | |
| "grad_norm": 0.061758268624544144, | |
| "learning_rate": 1.2924769207583368e-06, | |
| "loss": 0.17066842317581177, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 5.046413502109704, | |
| "grad_norm": 4.471809387207031, | |
| "learning_rate": 1.2882573877639427e-06, | |
| "loss": 0.24980589747428894, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 5.050632911392405, | |
| "grad_norm": 4.955497741699219, | |
| "learning_rate": 1.2840546936820263e-06, | |
| "loss": 0.2576749622821808, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 5.0548523206751055, | |
| "grad_norm": 0.7454743981361389, | |
| "learning_rate": 1.2798688612550838e-06, | |
| "loss": 0.040055617690086365, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 5.059071729957806, | |
| "grad_norm": 2.270764112472534, | |
| "learning_rate": 1.2756999131343677e-06, | |
| "loss": 0.4545499086380005, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 5.063291139240507, | |
| "grad_norm": 8.608718872070312, | |
| "learning_rate": 1.271547871879762e-06, | |
| "loss": 0.46691781282424927, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.067510548523207, | |
| "grad_norm": 4.590333938598633, | |
| "learning_rate": 1.267412759959661e-06, | |
| "loss": 0.3534661829471588, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 5.071729957805907, | |
| "grad_norm": 6.176363468170166, | |
| "learning_rate": 1.2632945997508469e-06, | |
| "loss": 0.03008463606238365, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 5.075949367088608, | |
| "grad_norm": 4.096558570861816, | |
| "learning_rate": 1.25919341353837e-06, | |
| "loss": 0.4609118103981018, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 5.080168776371308, | |
| "grad_norm": 1.3339556455612183, | |
| "learning_rate": 1.2551092235154265e-06, | |
| "loss": 0.25634127855300903, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 5.084388185654008, | |
| "grad_norm": 3.1009860038757324, | |
| "learning_rate": 1.2510420517832399e-06, | |
| "loss": 0.3237183690071106, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 5.0886075949367084, | |
| "grad_norm": 6.112014293670654, | |
| "learning_rate": 1.2469919203509406e-06, | |
| "loss": 0.45163053274154663, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 5.0928270042194095, | |
| "grad_norm": 1.8072830438613892, | |
| "learning_rate": 1.2429588511354468e-06, | |
| "loss": 0.3245161175727844, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 5.09704641350211, | |
| "grad_norm": 1.5899354219436646, | |
| "learning_rate": 1.2389428659613465e-06, | |
| "loss": 0.09791871905326843, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 5.10126582278481, | |
| "grad_norm": 2.595155954360962, | |
| "learning_rate": 1.2349439865607783e-06, | |
| "loss": 0.20728906989097595, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 5.105485232067511, | |
| "grad_norm": 2.6492655277252197, | |
| "learning_rate": 1.2309622345733153e-06, | |
| "loss": 0.52880859375, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 5.109704641350211, | |
| "grad_norm": 3.113187789916992, | |
| "learning_rate": 1.226997631545846e-06, | |
| "loss": 0.34188008308410645, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 5.113924050632911, | |
| "grad_norm": 0.3923889100551605, | |
| "learning_rate": 1.2230501989324606e-06, | |
| "loss": 0.39657163619995117, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 5.118143459915612, | |
| "grad_norm": 2.9111595153808594, | |
| "learning_rate": 1.219119958094331e-06, | |
| "loss": 0.37215137481689453, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 5.122362869198312, | |
| "grad_norm": 5.908500671386719, | |
| "learning_rate": 1.215206930299599e-06, | |
| "loss": 0.28079548478126526, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 5.1265822784810124, | |
| "grad_norm": 0.15474487841129303, | |
| "learning_rate": 1.2113111367232582e-06, | |
| "loss": 0.16562075912952423, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 5.1308016877637135, | |
| "grad_norm": 2.4118106365203857, | |
| "learning_rate": 1.2074325984470428e-06, | |
| "loss": 0.3783321678638458, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 5.135021097046414, | |
| "grad_norm": 2.499565601348877, | |
| "learning_rate": 1.2035713364593102e-06, | |
| "loss": 0.4123075604438782, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 5.139240506329114, | |
| "grad_norm": 3.3785624504089355, | |
| "learning_rate": 1.1997273716549284e-06, | |
| "loss": 0.25959959626197815, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 5.143459915611814, | |
| "grad_norm": 3.4707491397857666, | |
| "learning_rate": 1.195900724835164e-06, | |
| "loss": 0.03673313558101654, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 5.147679324894515, | |
| "grad_norm": 2.67907977104187, | |
| "learning_rate": 1.1920914167075696e-06, | |
| "loss": 0.2947133779525757, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 5.151898734177215, | |
| "grad_norm": 3.3348686695098877, | |
| "learning_rate": 1.1882994678858675e-06, | |
| "loss": 0.3776189684867859, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 5.156118143459915, | |
| "grad_norm": 3.6222927570343018, | |
| "learning_rate": 1.1845248988898464e-06, | |
| "loss": 0.2443552017211914, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 5.160337552742616, | |
| "grad_norm": 3.27504301071167, | |
| "learning_rate": 1.1807677301452437e-06, | |
| "loss": 0.5414304733276367, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 5.1645569620253164, | |
| "grad_norm": 3.2731869220733643, | |
| "learning_rate": 1.1770279819836355e-06, | |
| "loss": 0.18883806467056274, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 5.168776371308017, | |
| "grad_norm": 3.275451421737671, | |
| "learning_rate": 1.1733056746423304e-06, | |
| "loss": 0.37931862473487854, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 5.172995780590718, | |
| "grad_norm": 0.034749243408441544, | |
| "learning_rate": 1.1696008282642559e-06, | |
| "loss": 0.20449881255626678, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 5.177215189873418, | |
| "grad_norm": 4.930509567260742, | |
| "learning_rate": 1.165913462897852e-06, | |
| "loss": 0.035537637770175934, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 5.181434599156118, | |
| "grad_norm": 1.7042666673660278, | |
| "learning_rate": 1.1622435984969602e-06, | |
| "loss": 0.20552217960357666, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 5.185654008438819, | |
| "grad_norm": 14.41930103302002, | |
| "learning_rate": 1.1585912549207196e-06, | |
| "loss": 0.3709006607532501, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 5.189873417721519, | |
| "grad_norm": 6.651240348815918, | |
| "learning_rate": 1.1549564519334556e-06, | |
| "loss": 0.18409161269664764, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 5.194092827004219, | |
| "grad_norm": 2.1087753772735596, | |
| "learning_rate": 1.1513392092045736e-06, | |
| "loss": 0.39856773614883423, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 5.198312236286919, | |
| "grad_norm": 2.5043351650238037, | |
| "learning_rate": 1.147739546308455e-06, | |
| "loss": 0.3595339059829712, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 5.2025316455696204, | |
| "grad_norm": 3.3729639053344727, | |
| "learning_rate": 1.1441574827243478e-06, | |
| "loss": 0.17214104533195496, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 5.206751054852321, | |
| "grad_norm": 2.227221965789795, | |
| "learning_rate": 1.1405930378362648e-06, | |
| "loss": 0.3033697009086609, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 5.210970464135021, | |
| "grad_norm": 5.146759510040283, | |
| "learning_rate": 1.1370462309328743e-06, | |
| "loss": 0.36619800329208374, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 5.215189873417722, | |
| "grad_norm": 3.0241358280181885, | |
| "learning_rate": 1.1335170812073999e-06, | |
| "loss": 0.30589285492897034, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 5.219409282700422, | |
| "grad_norm": 2.229212522506714, | |
| "learning_rate": 1.1300056077575154e-06, | |
| "loss": 0.3369923233985901, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 5.223628691983122, | |
| "grad_norm": 2.9154298305511475, | |
| "learning_rate": 1.1265118295852404e-06, | |
| "loss": 0.19644200801849365, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 5.227848101265823, | |
| "grad_norm": 5.483319282531738, | |
| "learning_rate": 1.1230357655968371e-06, | |
| "loss": 0.06274639070034027, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 5.232067510548523, | |
| "grad_norm": 0.5124228596687317, | |
| "learning_rate": 1.119577434602711e-06, | |
| "loss": 0.20770075917243958, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 5.236286919831223, | |
| "grad_norm": 2.4156696796417236, | |
| "learning_rate": 1.116136855317307e-06, | |
| "loss": 0.29468050599098206, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 5.2405063291139244, | |
| "grad_norm": 1.4421368837356567, | |
| "learning_rate": 1.1127140463590055e-06, | |
| "loss": 0.23361340165138245, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 5.244725738396625, | |
| "grad_norm": 6.403635025024414, | |
| "learning_rate": 1.1093090262500266e-06, | |
| "loss": 0.4423346519470215, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 5.248945147679325, | |
| "grad_norm": 4.7648606300354, | |
| "learning_rate": 1.105921813416328e-06, | |
| "loss": 0.5721250772476196, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 5.253164556962025, | |
| "grad_norm": 4.072231292724609, | |
| "learning_rate": 1.1025524261875041e-06, | |
| "loss": 0.5335391163825989, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 5.257383966244726, | |
| "grad_norm": 2.6612138748168945, | |
| "learning_rate": 1.0992008827966874e-06, | |
| "loss": 0.5658106803894043, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 5.261603375527426, | |
| "grad_norm": 2.578683614730835, | |
| "learning_rate": 1.095867201380451e-06, | |
| "loss": 0.41335171461105347, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 5.265822784810126, | |
| "grad_norm": 5.410678863525391, | |
| "learning_rate": 1.0925513999787086e-06, | |
| "loss": 0.15254725515842438, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 5.270042194092827, | |
| "grad_norm": 3.9477524757385254, | |
| "learning_rate": 1.0892534965346192e-06, | |
| "loss": 0.44648611545562744, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 5.274261603375527, | |
| "grad_norm": 7.086328506469727, | |
| "learning_rate": 1.0859735088944868e-06, | |
| "loss": 0.16064085066318512, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.2784810126582276, | |
| "grad_norm": 2.0906810760498047, | |
| "learning_rate": 1.0827114548076663e-06, | |
| "loss": 0.2642805874347687, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 5.282700421940929, | |
| "grad_norm": 2.104074716567993, | |
| "learning_rate": 1.0794673519264675e-06, | |
| "loss": 0.24389728903770447, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 5.286919831223629, | |
| "grad_norm": 6.062882423400879, | |
| "learning_rate": 1.0762412178060587e-06, | |
| "loss": 0.31626439094543457, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 5.291139240506329, | |
| "grad_norm": 2.444314956665039, | |
| "learning_rate": 1.0730330699043717e-06, | |
| "loss": 0.4520007371902466, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 5.29535864978903, | |
| "grad_norm": 5.062936305999756, | |
| "learning_rate": 1.0698429255820068e-06, | |
| "loss": 0.09191440790891647, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 5.29957805907173, | |
| "grad_norm": 2.516993761062622, | |
| "learning_rate": 1.0666708021021406e-06, | |
| "loss": 0.21818026900291443, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 5.30379746835443, | |
| "grad_norm": 3.7304656505584717, | |
| "learning_rate": 1.063516716630432e-06, | |
| "loss": 0.33304083347320557, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 5.308016877637131, | |
| "grad_norm": 2.554382562637329, | |
| "learning_rate": 1.0603806862349255e-06, | |
| "loss": 0.3670189380645752, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 5.312236286919831, | |
| "grad_norm": 2.6083078384399414, | |
| "learning_rate": 1.0572627278859675e-06, | |
| "loss": 0.4783245027065277, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 5.3164556962025316, | |
| "grad_norm": 4.347960472106934, | |
| "learning_rate": 1.0541628584561052e-06, | |
| "loss": 0.4460408687591553, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 5.320675105485232, | |
| "grad_norm": 4.647004127502441, | |
| "learning_rate": 1.0510810947200003e-06, | |
| "loss": 0.3045784533023834, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 5.324894514767933, | |
| "grad_norm": 3.011627197265625, | |
| "learning_rate": 1.0480174533543372e-06, | |
| "loss": 0.33729833364486694, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 5.329113924050633, | |
| "grad_norm": 2.8627593517303467, | |
| "learning_rate": 1.044971950937734e-06, | |
| "loss": 0.5005810260772705, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 5.333333333333333, | |
| "grad_norm": 4.140803337097168, | |
| "learning_rate": 1.041944603950649e-06, | |
| "loss": 0.44916412234306335, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 5.337552742616034, | |
| "grad_norm": 3.677198886871338, | |
| "learning_rate": 1.038935428775296e-06, | |
| "loss": 0.5101116299629211, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 5.341772151898734, | |
| "grad_norm": 3.9427692890167236, | |
| "learning_rate": 1.0359444416955528e-06, | |
| "loss": 0.3052045702934265, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 5.345991561181434, | |
| "grad_norm": 3.024719715118408, | |
| "learning_rate": 1.0329716588968745e-06, | |
| "loss": 0.2897722125053406, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 5.350210970464135, | |
| "grad_norm": 2.671980619430542, | |
| "learning_rate": 1.030017096466205e-06, | |
| "loss": 0.3393900692462921, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 5.3544303797468356, | |
| "grad_norm": 2.4032411575317383, | |
| "learning_rate": 1.027080770391891e-06, | |
| "loss": 0.40280789136886597, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 5.358649789029536, | |
| "grad_norm": 1.9715185165405273, | |
| "learning_rate": 1.0241626965635942e-06, | |
| "loss": 0.2567780017852783, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 5.362869198312236, | |
| "grad_norm": 14.593756675720215, | |
| "learning_rate": 1.0212628907722062e-06, | |
| "loss": 0.04668917506933212, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 5.367088607594937, | |
| "grad_norm": 0.3638130724430084, | |
| "learning_rate": 1.0183813687097618e-06, | |
| "loss": 0.16572636365890503, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 5.371308016877637, | |
| "grad_norm": 4.365072250366211, | |
| "learning_rate": 1.0155181459693565e-06, | |
| "loss": 0.3552468717098236, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 5.375527426160337, | |
| "grad_norm": 2.508363723754883, | |
| "learning_rate": 1.0126732380450596e-06, | |
| "loss": 0.38389939069747925, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 5.379746835443038, | |
| "grad_norm": 3.3055357933044434, | |
| "learning_rate": 1.0098466603318323e-06, | |
| "loss": 0.31817764043807983, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 5.383966244725738, | |
| "grad_norm": 3.0569851398468018, | |
| "learning_rate": 1.0070384281254425e-06, | |
| "loss": 0.12491938471794128, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 5.3881856540084385, | |
| "grad_norm": 2.883695363998413, | |
| "learning_rate": 1.0042485566223848e-06, | |
| "loss": 0.4344925284385681, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 5.3924050632911396, | |
| "grad_norm": 11.280695915222168, | |
| "learning_rate": 1.0014770609197957e-06, | |
| "loss": 0.3988388180732727, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 5.39662447257384, | |
| "grad_norm": 8.63589096069336, | |
| "learning_rate": 9.98723956015371e-07, | |
| "loss": 0.17392376065254211, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 5.40084388185654, | |
| "grad_norm": 10.322222709655762, | |
| "learning_rate": 9.959892568072881e-07, | |
| "loss": 0.08735622465610504, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 5.405063291139241, | |
| "grad_norm": 3.0921213626861572, | |
| "learning_rate": 9.932729780941237e-07, | |
| "loss": 0.20220640301704407, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 5.409282700421941, | |
| "grad_norm": 2.6708106994628906, | |
| "learning_rate": 9.905751345747734e-07, | |
| "loss": 0.5822624564170837, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 5.413502109704641, | |
| "grad_norm": 2.136763572692871, | |
| "learning_rate": 9.878957408483718e-07, | |
| "loss": 0.16230207681655884, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 5.417721518987342, | |
| "grad_norm": 2.1476376056671143, | |
| "learning_rate": 9.852348114142155e-07, | |
| "loss": 0.3189689517021179, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 5.421940928270042, | |
| "grad_norm": 27.836748123168945, | |
| "learning_rate": 9.825923606716818e-07, | |
| "loss": 0.05949246510863304, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 5.4261603375527425, | |
| "grad_norm": 1.7536920309066772, | |
| "learning_rate": 9.799684029201536e-07, | |
| "loss": 0.23696368932724, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 5.430379746835443, | |
| "grad_norm": 0.2607567608356476, | |
| "learning_rate": 9.773629523589387e-07, | |
| "loss": 0.014276674017310143, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 5.434599156118144, | |
| "grad_norm": 2.661586046218872, | |
| "learning_rate": 9.747760230871965e-07, | |
| "loss": 0.27866894006729126, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 5.438818565400844, | |
| "grad_norm": 6.1830010414123535, | |
| "learning_rate": 9.722076291038605e-07, | |
| "loss": 0.5345185399055481, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 5.443037974683544, | |
| "grad_norm": 2.385190010070801, | |
| "learning_rate": 9.696577843075608e-07, | |
| "loss": 0.4049319624900818, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 5.447257383966245, | |
| "grad_norm": 3.472625732421875, | |
| "learning_rate": 9.671265024965509e-07, | |
| "loss": 0.35417062044143677, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 5.451476793248945, | |
| "grad_norm": 2.2873806953430176, | |
| "learning_rate": 9.646137973686324e-07, | |
| "loss": 0.22758211195468903, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 5.455696202531645, | |
| "grad_norm": 3.527923345565796, | |
| "learning_rate": 9.621196825210814e-07, | |
| "loss": 0.332139790058136, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 5.459915611814346, | |
| "grad_norm": 2.947986364364624, | |
| "learning_rate": 9.596441714505732e-07, | |
| "loss": 0.07135351002216339, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 5.4641350210970465, | |
| "grad_norm": 4.305266857147217, | |
| "learning_rate": 9.57187277553111e-07, | |
| "loss": 0.5696512460708618, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 5.468354430379747, | |
| "grad_norm": 3.028513193130493, | |
| "learning_rate": 9.547490141239534e-07, | |
| "loss": 0.19437383115291595, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 5.472573839662447, | |
| "grad_norm": 2.7946903705596924, | |
| "learning_rate": 9.523293943575414e-07, | |
| "loss": 0.15654590725898743, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 5.476793248945148, | |
| "grad_norm": 6.811203956604004, | |
| "learning_rate": 9.499284313474276e-07, | |
| "loss": 0.11999380588531494, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 5.481012658227848, | |
| "grad_norm": 2.8398826122283936, | |
| "learning_rate": 9.475461380862047e-07, | |
| "loss": 0.04623116925358772, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 5.485232067510548, | |
| "grad_norm": 0.62392657995224, | |
| "learning_rate": 9.451825274654373e-07, | |
| "loss": 0.1718018651008606, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.489451476793249, | |
| "grad_norm": 1.799946904182434, | |
| "learning_rate": 9.428376122755884e-07, | |
| "loss": 0.2459963858127594, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 5.493670886075949, | |
| "grad_norm": 3.90554141998291, | |
| "learning_rate": 9.405114052059541e-07, | |
| "loss": 0.23852768540382385, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 5.4978902953586495, | |
| "grad_norm": 6.165667533874512, | |
| "learning_rate": 9.382039188445925e-07, | |
| "loss": 0.05722271651029587, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 5.5021097046413505, | |
| "grad_norm": 2.0570311546325684, | |
| "learning_rate": 9.359151656782567e-07, | |
| "loss": 0.19151735305786133, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 5.506329113924051, | |
| "grad_norm": 1.9145231246948242, | |
| "learning_rate": 9.336451580923262e-07, | |
| "loss": 0.03127627447247505, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 5.510548523206751, | |
| "grad_norm": 3.4458625316619873, | |
| "learning_rate": 9.313939083707413e-07, | |
| "loss": 0.1748735010623932, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 5.514767932489452, | |
| "grad_norm": 3.4101099967956543, | |
| "learning_rate": 9.291614286959349e-07, | |
| "loss": 0.382763147354126, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 5.518987341772152, | |
| "grad_norm": 0.40910443663597107, | |
| "learning_rate": 9.269477311487686e-07, | |
| "loss": 0.1556778848171234, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 5.523206751054852, | |
| "grad_norm": 3.5669357776641846, | |
| "learning_rate": 9.247528277084645e-07, | |
| "loss": 0.1594393253326416, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 5.527426160337553, | |
| "grad_norm": 3.370866537094116, | |
| "learning_rate": 9.225767302525441e-07, | |
| "loss": 0.4137956500053406, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 5.531645569620253, | |
| "grad_norm": 0.21743591129779816, | |
| "learning_rate": 9.20419450556761e-07, | |
| "loss": 0.4230045676231384, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 5.5358649789029535, | |
| "grad_norm": 2.6428186893463135, | |
| "learning_rate": 9.182810002950378e-07, | |
| "loss": 0.42899954319000244, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 5.540084388185654, | |
| "grad_norm": 6.077566623687744, | |
| "learning_rate": 9.16161391039404e-07, | |
| "loss": 0.20327959954738617, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 5.544303797468355, | |
| "grad_norm": 5.41641902923584, | |
| "learning_rate": 9.140606342599332e-07, | |
| "loss": 0.44856715202331543, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 5.548523206751055, | |
| "grad_norm": 4.853996753692627, | |
| "learning_rate": 9.119787413246795e-07, | |
| "loss": 0.4271373748779297, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 5.552742616033755, | |
| "grad_norm": 4.13206672668457, | |
| "learning_rate": 9.099157234996173e-07, | |
| "loss": 0.23560848832130432, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 5.556962025316456, | |
| "grad_norm": 2.2997729778289795, | |
| "learning_rate": 9.078715919485798e-07, | |
| "loss": 0.23265743255615234, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 5.561181434599156, | |
| "grad_norm": 1.88694167137146, | |
| "learning_rate": 9.058463577331999e-07, | |
| "loss": 0.15787991881370544, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 5.565400843881856, | |
| "grad_norm": 2.646512746810913, | |
| "learning_rate": 9.03840031812848e-07, | |
| "loss": 0.1242508590221405, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 5.569620253164557, | |
| "grad_norm": 0.22765684127807617, | |
| "learning_rate": 9.018526250445747e-07, | |
| "loss": 0.07518874108791351, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 5.5738396624472575, | |
| "grad_norm": 2.321202278137207, | |
| "learning_rate": 8.998841481830515e-07, | |
| "loss": 0.30490678548812866, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 5.578059071729958, | |
| "grad_norm": 4.0073418617248535, | |
| "learning_rate": 8.97934611880512e-07, | |
| "loss": 0.611823558807373, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 5.582278481012658, | |
| "grad_norm": 18.506275177001953, | |
| "learning_rate": 8.960040266866948e-07, | |
| "loss": 0.3300861120223999, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 5.586497890295359, | |
| "grad_norm": 2.6837949752807617, | |
| "learning_rate": 8.94092403048786e-07, | |
| "loss": 0.018273882567882538, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 5.590717299578059, | |
| "grad_norm": 2.3034257888793945, | |
| "learning_rate": 8.921997513113637e-07, | |
| "loss": 0.25158876180648804, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 5.594936708860759, | |
| "grad_norm": 2.5194528102874756, | |
| "learning_rate": 8.903260817163402e-07, | |
| "loss": 0.18762826919555664, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 5.59915611814346, | |
| "grad_norm": 4.4369096755981445, | |
| "learning_rate": 8.884714044029092e-07, | |
| "loss": 0.06930024921894073, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 5.60337552742616, | |
| "grad_norm": 1.652228832244873, | |
| "learning_rate": 8.86635729407488e-07, | |
| "loss": 0.2389906346797943, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 5.6075949367088604, | |
| "grad_norm": 5.807703971862793, | |
| "learning_rate": 8.848190666636651e-07, | |
| "loss": 0.10554240643978119, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 5.6118143459915615, | |
| "grad_norm": 3.1773393154144287, | |
| "learning_rate": 8.830214260021459e-07, | |
| "loss": 0.16849491000175476, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 5.616033755274262, | |
| "grad_norm": 23.979202270507812, | |
| "learning_rate": 8.812428171506998e-07, | |
| "loss": 0.04333914816379547, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 5.620253164556962, | |
| "grad_norm": 0.8357203602790833, | |
| "learning_rate": 8.794832497341065e-07, | |
| "loss": 0.30027642846107483, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 5.624472573839663, | |
| "grad_norm": 2.281597375869751, | |
| "learning_rate": 8.77742733274106e-07, | |
| "loss": 0.027927353978157043, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 5.628691983122363, | |
| "grad_norm": 2.4539661407470703, | |
| "learning_rate": 8.760212771893442e-07, | |
| "loss": 0.10624615103006363, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 5.632911392405063, | |
| "grad_norm": 0.6876718401908875, | |
| "learning_rate": 8.743188907953251e-07, | |
| "loss": 0.3938605487346649, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 5.637130801687764, | |
| "grad_norm": 3.4516823291778564, | |
| "learning_rate": 8.726355833043575e-07, | |
| "loss": 0.3330395519733429, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 5.641350210970464, | |
| "grad_norm": 3.539989471435547, | |
| "learning_rate": 8.709713638255074e-07, | |
| "loss": 0.6006532907485962, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 5.6455696202531644, | |
| "grad_norm": 3.0337464809417725, | |
| "learning_rate": 8.693262413645464e-07, | |
| "loss": 0.3575003445148468, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 5.649789029535865, | |
| "grad_norm": 3.694211721420288, | |
| "learning_rate": 8.677002248239066e-07, | |
| "loss": 0.2969297766685486, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 5.654008438818566, | |
| "grad_norm": 1.0974704027175903, | |
| "learning_rate": 8.660933230026276e-07, | |
| "loss": 0.05868370085954666, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 5.658227848101266, | |
| "grad_norm": 2.3777191638946533, | |
| "learning_rate": 8.645055445963135e-07, | |
| "loss": 0.31508857011795044, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 5.662447257383966, | |
| "grad_norm": 0.7402134537696838, | |
| "learning_rate": 8.629368981970822e-07, | |
| "loss": 0.04464399069547653, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 5.666666666666667, | |
| "grad_norm": 1.678879737854004, | |
| "learning_rate": 8.613873922935217e-07, | |
| "loss": 0.4207780957221985, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 5.670886075949367, | |
| "grad_norm": 0.16458748281002045, | |
| "learning_rate": 8.598570352706425e-07, | |
| "loss": 0.33038753271102905, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 5.675105485232067, | |
| "grad_norm": 2.9141294956207275, | |
| "learning_rate": 8.583458354098318e-07, | |
| "loss": 0.471355140209198, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 5.679324894514768, | |
| "grad_norm": 2.882704973220825, | |
| "learning_rate": 8.56853800888812e-07, | |
| "loss": 0.01554950326681137, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 5.6835443037974684, | |
| "grad_norm": 2.264005422592163, | |
| "learning_rate": 8.553809397815909e-07, | |
| "loss": 0.5948341488838196, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 5.687763713080169, | |
| "grad_norm": 5.996740341186523, | |
| "learning_rate": 8.539272600584227e-07, | |
| "loss": 0.2293516844511032, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 5.691983122362869, | |
| "grad_norm": 3.936495542526245, | |
| "learning_rate": 8.524927695857636e-07, | |
| "loss": 0.4448416829109192, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 5.69620253164557, | |
| "grad_norm": 2.383849859237671, | |
| "learning_rate": 8.510774761262285e-07, | |
| "loss": 0.3430967926979065, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.70042194092827, | |
| "grad_norm": 3.2722134590148926, | |
| "learning_rate": 8.496813873385494e-07, | |
| "loss": 0.38816744089126587, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 5.70464135021097, | |
| "grad_norm": 0.12035968899726868, | |
| "learning_rate": 8.483045107775337e-07, | |
| "loss": 0.2644461393356323, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 5.708860759493671, | |
| "grad_norm": 0.15902626514434814, | |
| "learning_rate": 8.469468538940241e-07, | |
| "loss": 0.15841832756996155, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 5.713080168776371, | |
| "grad_norm": 2.9765915870666504, | |
| "learning_rate": 8.456084240348575e-07, | |
| "loss": 0.03421106934547424, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 5.717299578059071, | |
| "grad_norm": 0.5361355543136597, | |
| "learning_rate": 8.44289228442825e-07, | |
| "loss": 0.21108713746070862, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 5.7215189873417724, | |
| "grad_norm": 3.1111233234405518, | |
| "learning_rate": 8.429892742566344e-07, | |
| "loss": 0.38604629039764404, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 5.725738396624473, | |
| "grad_norm": 2.709472417831421, | |
| "learning_rate": 8.417085685108695e-07, | |
| "loss": 0.4284287095069885, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 5.729957805907173, | |
| "grad_norm": 2.9085452556610107, | |
| "learning_rate": 8.404471181359526e-07, | |
| "loss": 0.2729555666446686, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 5.734177215189874, | |
| "grad_norm": 13.333749771118164, | |
| "learning_rate": 8.392049299581083e-07, | |
| "loss": 0.49384695291519165, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 5.738396624472574, | |
| "grad_norm": 2.3657379150390625, | |
| "learning_rate": 8.379820106993253e-07, | |
| "loss": 0.42707446217536926, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 5.742616033755274, | |
| "grad_norm": 4.9322943687438965, | |
| "learning_rate": 8.367783669773196e-07, | |
| "loss": 0.4772263467311859, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 5.746835443037975, | |
| "grad_norm": 2.417219638824463, | |
| "learning_rate": 8.355940053054999e-07, | |
| "loss": 0.11725395172834396, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 5.751054852320675, | |
| "grad_norm": 3.8851850032806396, | |
| "learning_rate": 8.344289320929321e-07, | |
| "loss": 0.3932940363883972, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 5.755274261603375, | |
| "grad_norm": 2.6408393383026123, | |
| "learning_rate": 8.332831536443035e-07, | |
| "loss": 0.3797783851623535, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 5.759493670886076, | |
| "grad_norm": 4.485612392425537, | |
| "learning_rate": 8.321566761598909e-07, | |
| "loss": 0.28436335921287537, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 5.763713080168777, | |
| "grad_norm": 2.002480983734131, | |
| "learning_rate": 8.310495057355242e-07, | |
| "loss": 0.5089020729064941, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 5.767932489451477, | |
| "grad_norm": 2.7712652683258057, | |
| "learning_rate": 8.299616483625561e-07, | |
| "loss": 0.09954804182052612, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 5.772151898734177, | |
| "grad_norm": 8.521866798400879, | |
| "learning_rate": 8.288931099278275e-07, | |
| "loss": 0.28571265935897827, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 5.776371308016878, | |
| "grad_norm": 3.3578455448150635, | |
| "learning_rate": 8.27843896213637e-07, | |
| "loss": 0.4965103268623352, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 5.780590717299578, | |
| "grad_norm": 2.0499563217163086, | |
| "learning_rate": 8.2681401289771e-07, | |
| "loss": 0.2735576629638672, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 5.784810126582278, | |
| "grad_norm": 3.7373149394989014, | |
| "learning_rate": 8.258034655531661e-07, | |
| "loss": 0.4888134002685547, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 5.789029535864979, | |
| "grad_norm": 2.028367757797241, | |
| "learning_rate": 8.248122596484903e-07, | |
| "loss": 0.16572898626327515, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 5.793248945147679, | |
| "grad_norm": 2.7090587615966797, | |
| "learning_rate": 8.23840400547503e-07, | |
| "loss": 0.2500470280647278, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 5.7974683544303796, | |
| "grad_norm": 5.770694255828857, | |
| "learning_rate": 8.228878935093327e-07, | |
| "loss": 0.6361812949180603, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 5.80168776371308, | |
| "grad_norm": 5.137115478515625, | |
| "learning_rate": 8.219547436883832e-07, | |
| "loss": 0.25070175528526306, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 5.805907172995781, | |
| "grad_norm": 7.0401716232299805, | |
| "learning_rate": 8.210409561343112e-07, | |
| "loss": 0.4003854990005493, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 5.810126582278481, | |
| "grad_norm": 8.947293281555176, | |
| "learning_rate": 8.201465357919941e-07, | |
| "loss": 0.5776923894882202, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 5.814345991561181, | |
| "grad_norm": 2.4191689491271973, | |
| "learning_rate": 8.192714875015071e-07, | |
| "loss": 0.21931633353233337, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 5.818565400843882, | |
| "grad_norm": 3.477417469024658, | |
| "learning_rate": 8.184158159980942e-07, | |
| "loss": 0.034300077706575394, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 5.822784810126582, | |
| "grad_norm": 2.9756200313568115, | |
| "learning_rate": 8.175795259121438e-07, | |
| "loss": 0.38680142164230347, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 5.827004219409282, | |
| "grad_norm": 13.028040885925293, | |
| "learning_rate": 8.167626217691641e-07, | |
| "loss": 0.3836379647254944, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 5.831223628691983, | |
| "grad_norm": 8.563456535339355, | |
| "learning_rate": 8.15965107989757e-07, | |
| "loss": 0.17900025844573975, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 5.8354430379746836, | |
| "grad_norm": 3.2769968509674072, | |
| "learning_rate": 8.151869888895971e-07, | |
| "loss": 0.41699984669685364, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 5.839662447257384, | |
| "grad_norm": 4.447607040405273, | |
| "learning_rate": 8.144282686794042e-07, | |
| "loss": 0.3173035979270935, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 5.843881856540085, | |
| "grad_norm": 3.0378546714782715, | |
| "learning_rate": 8.136889514649242e-07, | |
| "loss": 0.40285831689834595, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 5.848101265822785, | |
| "grad_norm": 2.4900171756744385, | |
| "learning_rate": 8.129690412469046e-07, | |
| "loss": 0.2346557378768921, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 5.852320675105485, | |
| "grad_norm": 4.628073215484619, | |
| "learning_rate": 8.122685419210748e-07, | |
| "loss": 0.34134355187416077, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 5.856540084388186, | |
| "grad_norm": 2.439875602722168, | |
| "learning_rate": 8.11587457278123e-07, | |
| "loss": 0.32176950573921204, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 5.860759493670886, | |
| "grad_norm": 2.549513816833496, | |
| "learning_rate": 8.109257910036767e-07, | |
| "loss": 0.4297516345977783, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 5.864978902953586, | |
| "grad_norm": 2.4646005630493164, | |
| "learning_rate": 8.102835466782829e-07, | |
| "loss": 0.3611939251422882, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 5.869198312236287, | |
| "grad_norm": 2.1484479904174805, | |
| "learning_rate": 8.096607277773885e-07, | |
| "loss": 0.3919060528278351, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 5.8734177215189876, | |
| "grad_norm": 4.247211456298828, | |
| "learning_rate": 8.090573376713214e-07, | |
| "loss": 0.4738028943538666, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 5.877637130801688, | |
| "grad_norm": 12.467236518859863, | |
| "learning_rate": 8.084733796252727e-07, | |
| "loss": 0.14323553442955017, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 5.881856540084388, | |
| "grad_norm": 2.3769068717956543, | |
| "learning_rate": 8.079088567992778e-07, | |
| "loss": 0.3547300100326538, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 5.886075949367089, | |
| "grad_norm": 1.2352712154388428, | |
| "learning_rate": 8.073637722482008e-07, | |
| "loss": 0.028360096737742424, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 5.890295358649789, | |
| "grad_norm": 2.2478432655334473, | |
| "learning_rate": 8.068381289217173e-07, | |
| "loss": 0.13877378404140472, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 5.894514767932489, | |
| "grad_norm": 2.8125381469726562, | |
| "learning_rate": 8.063319296642983e-07, | |
| "loss": 0.40060657262802124, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 5.89873417721519, | |
| "grad_norm": 5.122012615203857, | |
| "learning_rate": 8.058451772151953e-07, | |
| "loss": 0.40660685300827026, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 5.90295358649789, | |
| "grad_norm": 3.058626890182495, | |
| "learning_rate": 8.05377874208425e-07, | |
| "loss": 0.2716779410839081, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 5.9071729957805905, | |
| "grad_norm": 3.8683018684387207, | |
| "learning_rate": 8.049300231727548e-07, | |
| "loss": 0.3559970259666443, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.911392405063291, | |
| "grad_norm": 3.477356433868408, | |
| "learning_rate": 8.045016265316904e-07, | |
| "loss": 0.29196757078170776, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 5.915611814345992, | |
| "grad_norm": 3.347072124481201, | |
| "learning_rate": 8.04092686603461e-07, | |
| "loss": 0.24977904558181763, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 5.919831223628692, | |
| "grad_norm": 2.251091957092285, | |
| "learning_rate": 8.037032056010077e-07, | |
| "loss": 0.03224069997668266, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 5.924050632911392, | |
| "grad_norm": 7.518223762512207, | |
| "learning_rate": 8.03333185631972e-07, | |
| "loss": 0.3484126329421997, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 5.928270042194093, | |
| "grad_norm": 3.4275622367858887, | |
| "learning_rate": 8.02982628698683e-07, | |
| "loss": 0.059894442558288574, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 5.932489451476793, | |
| "grad_norm": 2.064310073852539, | |
| "learning_rate": 8.026515366981481e-07, | |
| "loss": 0.12616072595119476, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 5.936708860759493, | |
| "grad_norm": 0.0712597668170929, | |
| "learning_rate": 8.023399114220411e-07, | |
| "loss": 0.2311958521604538, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 5.940928270042194, | |
| "grad_norm": 2.0846590995788574, | |
| "learning_rate": 8.020477545566941e-07, | |
| "loss": 0.27708864212036133, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 5.9451476793248945, | |
| "grad_norm": 2.385714530944824, | |
| "learning_rate": 8.017750676830876e-07, | |
| "loss": 0.19477054476737976, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 5.949367088607595, | |
| "grad_norm": 0.2615872025489807, | |
| "learning_rate": 8.015218522768414e-07, | |
| "loss": 0.12333428859710693, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 5.953586497890296, | |
| "grad_norm": 2.914166212081909, | |
| "learning_rate": 8.012881097082082e-07, | |
| "loss": 0.3903350234031677, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 5.957805907172996, | |
| "grad_norm": 2.832524538040161, | |
| "learning_rate": 8.010738412420643e-07, | |
| "loss": 0.25948387384414673, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 5.962025316455696, | |
| "grad_norm": 2.6522233486175537, | |
| "learning_rate": 8.008790480379041e-07, | |
| "loss": 0.42445188760757446, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 5.966244725738397, | |
| "grad_norm": 0.424772173166275, | |
| "learning_rate": 8.007037311498337e-07, | |
| "loss": 0.18149511516094208, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 5.970464135021097, | |
| "grad_norm": 2.7508718967437744, | |
| "learning_rate": 8.005478915265643e-07, | |
| "loss": 0.08192159235477448, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 5.974683544303797, | |
| "grad_norm": 7.296056270599365, | |
| "learning_rate": 8.004115300114071e-07, | |
| "loss": 0.3574886918067932, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 5.978902953586498, | |
| "grad_norm": 0.17789922654628754, | |
| "learning_rate": 8.002946473422713e-07, | |
| "loss": 0.2169741988182068, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 5.9831223628691985, | |
| "grad_norm": 5.66543436050415, | |
| "learning_rate": 8.001972441516558e-07, | |
| "loss": 0.06217677891254425, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 5.987341772151899, | |
| "grad_norm": 2.6673474311828613, | |
| "learning_rate": 8.001193209666501e-07, | |
| "loss": 0.3183894753456116, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 5.991561181434599, | |
| "grad_norm": 0.4517356753349304, | |
| "learning_rate": 8.000608782089275e-07, | |
| "loss": 0.26500433683395386, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 5.9957805907173, | |
| "grad_norm": 13.812853813171387, | |
| "learning_rate": 8.000219161947466e-07, | |
| "loss": 0.4387038052082062, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 2.1090548038482666, | |
| "learning_rate": 8.000024351349457e-07, | |
| "loss": 0.4745343327522278, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 2844, | |
| "total_flos": 5.392281114922451e+18, | |
| "train_loss": 0.6896465788091076, | |
| "train_runtime": 6887.9733, | |
| "train_samples_per_second": 12.387, | |
| "train_steps_per_second": 0.413 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2844, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.392281114922451e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |