Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-88 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-88 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-88") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-88") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-88") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-88 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-88" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-88", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-88
- SGLang
How to use furproxy/9b-88 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-88" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-88", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-88" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-88", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-88 with Docker Model Runner:
docker model run hf.co/furproxy/9b-88
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1804, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004434589800443459, | |
| "grad_norm": 3.7738454341888428, | |
| "learning_rate": 5.494505494505495e-08, | |
| "loss": 1.8639533519744873, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008869179600886918, | |
| "grad_norm": 6.427864074707031, | |
| "learning_rate": 1.6483516483516484e-07, | |
| "loss": 2.130033016204834, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013303769401330377, | |
| "grad_norm": 4.859611511230469, | |
| "learning_rate": 2.7472527472527475e-07, | |
| "loss": 1.9051225185394287, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.017738359201773836, | |
| "grad_norm": 2.374562978744507, | |
| "learning_rate": 3.846153846153847e-07, | |
| "loss": 1.8245947360992432, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.022172949002217297, | |
| "grad_norm": 3.0884368419647217, | |
| "learning_rate": 4.945054945054946e-07, | |
| "loss": 1.6384509801864624, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.026607538802660754, | |
| "grad_norm": 1.9582329988479614, | |
| "learning_rate": 6.043956043956044e-07, | |
| "loss": 2.195298671722412, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.031042128603104215, | |
| "grad_norm": 24.451740264892578, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 1.955391764640808, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03547671840354767, | |
| "grad_norm": 4.774598121643066, | |
| "learning_rate": 8.241758241758242e-07, | |
| "loss": 1.51339852809906, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03991130820399113, | |
| "grad_norm": 3.925572156906128, | |
| "learning_rate": 9.340659340659342e-07, | |
| "loss": 1.449619174003601, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04434589800443459, | |
| "grad_norm": 4.0381388664245605, | |
| "learning_rate": 1.0439560439560442e-06, | |
| "loss": 1.6513968706130981, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04878048780487805, | |
| "grad_norm": 8.873974800109863, | |
| "learning_rate": 1.153846153846154e-06, | |
| "loss": 1.4907894134521484, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05321507760532151, | |
| "grad_norm": 1.0349433422088623, | |
| "learning_rate": 1.2637362637362637e-06, | |
| "loss": 1.594868779182434, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.057649667405764965, | |
| "grad_norm": 1.1628096103668213, | |
| "learning_rate": 1.3736263736263738e-06, | |
| "loss": 1.5131434202194214, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06208425720620843, | |
| "grad_norm": 1.3587442636489868, | |
| "learning_rate": 1.4835164835164837e-06, | |
| "loss": 1.1953634023666382, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06651884700665188, | |
| "grad_norm": 1.4870824813842773, | |
| "learning_rate": 1.5934065934065933e-06, | |
| "loss": 1.2982568740844727, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07095343680709534, | |
| "grad_norm": 3.842538595199585, | |
| "learning_rate": 1.7032967032967034e-06, | |
| "loss": 1.2121697664260864, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07538802660753881, | |
| "grad_norm": 0.8271878957748413, | |
| "learning_rate": 1.8131868131868133e-06, | |
| "loss": 1.571251630783081, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07982261640798226, | |
| "grad_norm": 38.774314880371094, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 1.330682635307312, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08425720620842572, | |
| "grad_norm": 6.1416730880737305, | |
| "learning_rate": 2.032967032967033e-06, | |
| "loss": 1.591033935546875, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08869179600886919, | |
| "grad_norm": 2.0341765880584717, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 0.8703436851501465, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09312638580931264, | |
| "grad_norm": 1.3116881847381592, | |
| "learning_rate": 2.252747252747253e-06, | |
| "loss": 1.460440754890442, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0975609756097561, | |
| "grad_norm": 0.6786495447158813, | |
| "learning_rate": 2.362637362637363e-06, | |
| "loss": 1.403860330581665, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.10199556541019955, | |
| "grad_norm": 1.2536464929580688, | |
| "learning_rate": 2.472527472527473e-06, | |
| "loss": 1.4209010601043701, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10643015521064302, | |
| "grad_norm": 1.742255449295044, | |
| "learning_rate": 2.582417582417583e-06, | |
| "loss": 1.6356741189956665, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.11086474501108648, | |
| "grad_norm": 1.2730481624603271, | |
| "learning_rate": 2.6923076923076923e-06, | |
| "loss": 1.4965991973876953, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11529933481152993, | |
| "grad_norm": 1.8393515348434448, | |
| "learning_rate": 2.8021978021978024e-06, | |
| "loss": 1.4182051420211792, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1197339246119734, | |
| "grad_norm": 1.2401989698410034, | |
| "learning_rate": 2.9120879120879125e-06, | |
| "loss": 1.4068325757980347, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12416851441241686, | |
| "grad_norm": 4.087460517883301, | |
| "learning_rate": 3.021978021978022e-06, | |
| "loss": 1.1563267707824707, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1286031042128603, | |
| "grad_norm": 1.3389958143234253, | |
| "learning_rate": 3.1318681318681323e-06, | |
| "loss": 1.366070032119751, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13303769401330376, | |
| "grad_norm": 0.9454852342605591, | |
| "learning_rate": 3.2417582417582424e-06, | |
| "loss": 1.421806812286377, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13747228381374724, | |
| "grad_norm": 1.2951329946517944, | |
| "learning_rate": 3.3516483516483516e-06, | |
| "loss": 1.3717670440673828, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1419068736141907, | |
| "grad_norm": 1.5734539031982422, | |
| "learning_rate": 3.4615384615384617e-06, | |
| "loss": 1.4363198280334473, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14634146341463414, | |
| "grad_norm": 1.5510807037353516, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 1.4910911321640015, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.15077605321507762, | |
| "grad_norm": 1.0450427532196045, | |
| "learning_rate": 3.681318681318682e-06, | |
| "loss": 1.355659008026123, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.15521064301552107, | |
| "grad_norm": 0.8717012405395508, | |
| "learning_rate": 3.7912087912087915e-06, | |
| "loss": 1.3062938451766968, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15964523281596452, | |
| "grad_norm": 0.879036545753479, | |
| "learning_rate": 3.901098901098901e-06, | |
| "loss": 1.3595796823501587, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.164079822616408, | |
| "grad_norm": 0.9112725257873535, | |
| "learning_rate": 4.010989010989012e-06, | |
| "loss": 1.26053786277771, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16851441241685144, | |
| "grad_norm": 1.1469699144363403, | |
| "learning_rate": 4.120879120879121e-06, | |
| "loss": 1.3409998416900635, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1729490022172949, | |
| "grad_norm": 1.1423449516296387, | |
| "learning_rate": 4.230769230769231e-06, | |
| "loss": 1.3864156007766724, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.17738359201773837, | |
| "grad_norm": 3.403249502182007, | |
| "learning_rate": 4.340659340659341e-06, | |
| "loss": 1.3426871299743652, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 1.019535779953003, | |
| "learning_rate": 4.45054945054945e-06, | |
| "loss": 0.8479611277580261, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.18625277161862527, | |
| "grad_norm": 0.9872419238090515, | |
| "learning_rate": 4.560439560439561e-06, | |
| "loss": 1.0275837182998657, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.19068736141906872, | |
| "grad_norm": 2.4420623779296875, | |
| "learning_rate": 4.6703296703296706e-06, | |
| "loss": 0.9826464653015137, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1951219512195122, | |
| "grad_norm": 1.1877732276916504, | |
| "learning_rate": 4.780219780219781e-06, | |
| "loss": 1.2627744674682617, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19955654101995565, | |
| "grad_norm": 2.3475279808044434, | |
| "learning_rate": 4.890109890109891e-06, | |
| "loss": 1.4005193710327148, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2039911308203991, | |
| "grad_norm": 1.02353036403656, | |
| "learning_rate": 5e-06, | |
| "loss": 1.2373454570770264, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20842572062084258, | |
| "grad_norm": 0.9194503426551819, | |
| "learning_rate": 4.999984864490455e-06, | |
| "loss": 1.2537040710449219, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.21286031042128603, | |
| "grad_norm": 2.111903190612793, | |
| "learning_rate": 4.999939458165447e-06, | |
| "loss": 1.7977383136749268, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.21729490022172948, | |
| "grad_norm": 1.7523187398910522, | |
| "learning_rate": 4.999863781635863e-06, | |
| "loss": 0.9749844074249268, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.22172949002217296, | |
| "grad_norm": 2.5243184566497803, | |
| "learning_rate": 4.999757835919841e-06, | |
| "loss": 1.164440393447876, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2261640798226164, | |
| "grad_norm": 0.87540602684021, | |
| "learning_rate": 4.9996216224427495e-06, | |
| "loss": 1.3278536796569824, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.23059866962305986, | |
| "grad_norm": 1.5194096565246582, | |
| "learning_rate": 4.999455143037178e-06, | |
| "loss": 1.0450935363769531, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.23503325942350334, | |
| "grad_norm": 0.8938301801681519, | |
| "learning_rate": 4.999258399942903e-06, | |
| "loss": 1.2851738929748535, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2394678492239468, | |
| "grad_norm": 1.5863221883773804, | |
| "learning_rate": 4.9990313958068645e-06, | |
| "loss": 1.4904606342315674, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 2.0058369636535645, | |
| "learning_rate": 4.998774133683127e-06, | |
| "loss": 1.0308055877685547, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24833702882483372, | |
| "grad_norm": 1.978546142578125, | |
| "learning_rate": 4.9984866170328426e-06, | |
| "loss": 1.3031495809555054, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.25277161862527714, | |
| "grad_norm": 1.6387444734573364, | |
| "learning_rate": 4.998168849724196e-06, | |
| "loss": 0.7833878397941589, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2572062084257206, | |
| "grad_norm": 1.0411269664764404, | |
| "learning_rate": 4.997820836032363e-06, | |
| "loss": 1.3135759830474854, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2616407982261641, | |
| "grad_norm": 0.7238956689834595, | |
| "learning_rate": 4.997442580639443e-06, | |
| "loss": 1.0706069469451904, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2660753880266075, | |
| "grad_norm": 1.1347033977508545, | |
| "learning_rate": 4.997034088634404e-06, | |
| "loss": 1.252564549446106, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.270509977827051, | |
| "grad_norm": 1.396546721458435, | |
| "learning_rate": 4.996595365513012e-06, | |
| "loss": 1.1585849523544312, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2749445676274945, | |
| "grad_norm": 2.234487771987915, | |
| "learning_rate": 4.9961264171777515e-06, | |
| "loss": 1.5745362043380737, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2793791574279379, | |
| "grad_norm": 1.0086833238601685, | |
| "learning_rate": 4.995627249937755e-06, | |
| "loss": 0.9223954677581787, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2838137472283814, | |
| "grad_norm": 2.619391679763794, | |
| "learning_rate": 4.995097870508711e-06, | |
| "loss": 1.2081269025802612, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.28824833702882485, | |
| "grad_norm": 4.811182022094727, | |
| "learning_rate": 4.994538286012777e-06, | |
| "loss": 0.9236148595809937, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 2.1846261024475098, | |
| "learning_rate": 4.993948503978484e-06, | |
| "loss": 0.9870991706848145, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.29711751662971175, | |
| "grad_norm": 11.924750328063965, | |
| "learning_rate": 4.993328532340633e-06, | |
| "loss": 0.9675296545028687, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.30155210643015523, | |
| "grad_norm": 7.58044958114624, | |
| "learning_rate": 4.99267837944019e-06, | |
| "loss": 1.133386254310608, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.30598669623059865, | |
| "grad_norm": 1.9771738052368164, | |
| "learning_rate": 4.991998054024172e-06, | |
| "loss": 0.792849600315094, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.31042128603104213, | |
| "grad_norm": 1.3070610761642456, | |
| "learning_rate": 4.991287565245534e-06, | |
| "loss": 1.035190463066101, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3148558758314856, | |
| "grad_norm": 0.9725500345230103, | |
| "learning_rate": 4.990546922663039e-06, | |
| "loss": 0.9571182727813721, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.31929046563192903, | |
| "grad_norm": 1.864827036857605, | |
| "learning_rate": 4.989776136241134e-06, | |
| "loss": 0.9656538367271423, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3237250554323725, | |
| "grad_norm": 2.245912551879883, | |
| "learning_rate": 4.988975216349814e-06, | |
| "loss": 0.9354503154754639, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.328159645232816, | |
| "grad_norm": 1.4899214506149292, | |
| "learning_rate": 4.988144173764486e-06, | |
| "loss": 1.385457992553711, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3325942350332594, | |
| "grad_norm": 3.3478896617889404, | |
| "learning_rate": 4.987283019665817e-06, | |
| "loss": 1.1480491161346436, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3370288248337029, | |
| "grad_norm": 1.2977336645126343, | |
| "learning_rate": 4.986391765639592e-06, | |
| "loss": 1.0610523223876953, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.34146341463414637, | |
| "grad_norm": 0.8803158402442932, | |
| "learning_rate": 4.985470423676551e-06, | |
| "loss": 1.26231050491333, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3458980044345898, | |
| "grad_norm": 1.620816946029663, | |
| "learning_rate": 4.984519006172232e-06, | |
| "loss": 1.1941092014312744, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.35033259423503327, | |
| "grad_norm": 1.560025930404663, | |
| "learning_rate": 4.983537525926804e-06, | |
| "loss": 1.2882779836654663, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.35476718403547675, | |
| "grad_norm": 0.5345287919044495, | |
| "learning_rate": 4.982525996144891e-06, | |
| "loss": 1.0993400812149048, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.35920177383592017, | |
| "grad_norm": 0.7228168249130249, | |
| "learning_rate": 4.981484430435399e-06, | |
| "loss": 0.8929040431976318, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 1.204595685005188, | |
| "learning_rate": 4.98041284281133e-06, | |
| "loss": 0.8954707980155945, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.36807095343680707, | |
| "grad_norm": 0.7818751931190491, | |
| "learning_rate": 4.979311247689596e-06, | |
| "loss": 1.2652803659439087, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.37250554323725055, | |
| "grad_norm": 1.151502013206482, | |
| "learning_rate": 4.978179659890821e-06, | |
| "loss": 1.2041521072387695, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.376940133037694, | |
| "grad_norm": 1.6603472232818604, | |
| "learning_rate": 4.977018094639146e-06, | |
| "loss": 1.227750301361084, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.38137472283813745, | |
| "grad_norm": 2.9484810829162598, | |
| "learning_rate": 4.975826567562023e-06, | |
| "loss": 0.7771618366241455, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3858093126385809, | |
| "grad_norm": 2.0457444190979004, | |
| "learning_rate": 4.97460509469e-06, | |
| "loss": 1.6198290586471558, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 1.2279316186904907, | |
| "learning_rate": 4.973353692456513e-06, | |
| "loss": 1.2602885961532593, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3946784922394678, | |
| "grad_norm": 1.4660323858261108, | |
| "learning_rate": 4.972072377697661e-06, | |
| "loss": 1.3121440410614014, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3991130820399113, | |
| "grad_norm": 0.714910089969635, | |
| "learning_rate": 4.9707611676519775e-06, | |
| "loss": 1.0512202978134155, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4035476718403548, | |
| "grad_norm": 1.5509921312332153, | |
| "learning_rate": 4.969420079960203e-06, | |
| "loss": 1.259682059288025, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4079822616407982, | |
| "grad_norm": 1.5183836221694946, | |
| "learning_rate": 4.968049132665045e-06, | |
| "loss": 0.9290481805801392, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4124168514412417, | |
| "grad_norm": 2.068286180496216, | |
| "learning_rate": 4.966648344210936e-06, | |
| "loss": 0.9747883677482605, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.41685144124168516, | |
| "grad_norm": 0.7379282712936401, | |
| "learning_rate": 4.965217733443782e-06, | |
| "loss": 0.9480677843093872, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4212860310421286, | |
| "grad_norm": 1.9371439218521118, | |
| "learning_rate": 4.963757319610716e-06, | |
| "loss": 0.9953845143318176, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.42572062084257206, | |
| "grad_norm": 2.3159310817718506, | |
| "learning_rate": 4.962267122359835e-06, | |
| "loss": 0.8490515947341919, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.43015521064301554, | |
| "grad_norm": 2.2765939235687256, | |
| "learning_rate": 4.960747161739931e-06, | |
| "loss": 1.2720048427581787, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.43458980044345896, | |
| "grad_norm": 3.2983458042144775, | |
| "learning_rate": 4.9591974582002324e-06, | |
| "loss": 1.5371360778808594, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.43902439024390244, | |
| "grad_norm": 1.635307788848877, | |
| "learning_rate": 4.957618032590118e-06, | |
| "loss": 1.2765772342681885, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4434589800443459, | |
| "grad_norm": 0.527726411819458, | |
| "learning_rate": 4.956008906158842e-06, | |
| "loss": 1.1208865642547607, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.44789356984478934, | |
| "grad_norm": 1.2475026845932007, | |
| "learning_rate": 4.954370100555249e-06, | |
| "loss": 1.2454034090042114, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4523281596452328, | |
| "grad_norm": 0.8595089316368103, | |
| "learning_rate": 4.952701637827476e-06, | |
| "loss": 1.209128737449646, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4567627494456763, | |
| "grad_norm": 1.4940823316574097, | |
| "learning_rate": 4.951003540422668e-06, | |
| "loss": 1.0659428834915161, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4611973392461197, | |
| "grad_norm": 2.2497997283935547, | |
| "learning_rate": 4.949275831186663e-06, | |
| "loss": 1.0191223621368408, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4656319290465632, | |
| "grad_norm": 3.156873941421509, | |
| "learning_rate": 4.947518533363691e-06, | |
| "loss": 0.6471428871154785, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4700665188470067, | |
| "grad_norm": 9.676581382751465, | |
| "learning_rate": 4.945731670596062e-06, | |
| "loss": 0.849310040473938, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4745011086474501, | |
| "grad_norm": 0.766165018081665, | |
| "learning_rate": 4.943915266923845e-06, | |
| "loss": 1.0647257566452026, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4789356984478936, | |
| "grad_norm": 1.1118712425231934, | |
| "learning_rate": 4.942069346784547e-06, | |
| "loss": 1.082270622253418, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.48337028824833705, | |
| "grad_norm": 1.1239439249038696, | |
| "learning_rate": 4.940193935012785e-06, | |
| "loss": 1.113852858543396, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 1.8485164642333984, | |
| "learning_rate": 4.938289056839946e-06, | |
| "loss": 1.1989140510559082, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.49223946784922396, | |
| "grad_norm": 1.7778033018112183, | |
| "learning_rate": 4.936354737893854e-06, | |
| "loss": 1.223215103149414, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.49667405764966743, | |
| "grad_norm": 0.9021375775337219, | |
| "learning_rate": 4.934391004198424e-06, | |
| "loss": 1.1974425315856934, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5011086474501109, | |
| "grad_norm": 0.6981101036071777, | |
| "learning_rate": 4.932397882173307e-06, | |
| "loss": 1.20035982131958, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5055432372505543, | |
| "grad_norm": 0.9951314330101013, | |
| "learning_rate": 4.930375398633543e-06, | |
| "loss": 1.2479407787322998, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5099778270509978, | |
| "grad_norm": 2.1762142181396484, | |
| "learning_rate": 4.928323580789192e-06, | |
| "loss": 1.8489172458648682, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5144124168514412, | |
| "grad_norm": 1.2985577583312988, | |
| "learning_rate": 4.926242456244973e-06, | |
| "loss": 0.8134359121322632, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5188470066518847, | |
| "grad_norm": 1.1328946352005005, | |
| "learning_rate": 4.924132052999892e-06, | |
| "loss": 1.2583706378936768, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5232815964523282, | |
| "grad_norm": 0.6931086182594299, | |
| "learning_rate": 4.921992399446861e-06, | |
| "loss": 0.9323728084564209, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5277161862527716, | |
| "grad_norm": 1.0881880521774292, | |
| "learning_rate": 4.919823524372323e-06, | |
| "loss": 0.9777665734291077, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.532150776053215, | |
| "grad_norm": 2.0472190380096436, | |
| "learning_rate": 4.91762545695586e-06, | |
| "loss": 1.2541948556900024, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5365853658536586, | |
| "grad_norm": 1.1526095867156982, | |
| "learning_rate": 4.9153982267698e-06, | |
| "loss": 1.3235599994659424, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.541019955654102, | |
| "grad_norm": 2.487705707550049, | |
| "learning_rate": 4.913141863778822e-06, | |
| "loss": 0.9698494672775269, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.8203330039978027, | |
| "learning_rate": 4.910856398339553e-06, | |
| "loss": 1.2535961866378784, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.549889135254989, | |
| "grad_norm": 1.1372792720794678, | |
| "learning_rate": 4.9085418612001545e-06, | |
| "loss": 1.4743397235870361, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5543237250554324, | |
| "grad_norm": 1.7914355993270874, | |
| "learning_rate": 4.906198283499916e-06, | |
| "loss": 1.214085340499878, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5587583148558758, | |
| "grad_norm": 3.3823254108428955, | |
| "learning_rate": 4.903825696768829e-06, | |
| "loss": 0.6424598693847656, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5631929046563193, | |
| "grad_norm": 1.305862545967102, | |
| "learning_rate": 4.901424132927172e-06, | |
| "loss": 1.470249891281128, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5676274944567627, | |
| "grad_norm": 1.1991181373596191, | |
| "learning_rate": 4.898993624285069e-06, | |
| "loss": 1.3120447397232056, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5720620842572062, | |
| "grad_norm": 0.9037002921104431, | |
| "learning_rate": 4.896534203542062e-06, | |
| "loss": 1.316646933555603, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5764966740576497, | |
| "grad_norm": 1.464098572731018, | |
| "learning_rate": 4.894045903786675e-06, | |
| "loss": 1.2750022411346436, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5809312638580931, | |
| "grad_norm": 2.3139257431030273, | |
| "learning_rate": 4.891528758495961e-06, | |
| "loss": 0.7120662927627563, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 1.463733196258545, | |
| "learning_rate": 4.888982801535053e-06, | |
| "loss": 1.3662123680114746, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5898004434589801, | |
| "grad_norm": 1.677898645401001, | |
| "learning_rate": 4.886408067156712e-06, | |
| "loss": 1.0462416410446167, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5942350332594235, | |
| "grad_norm": 0.9817636013031006, | |
| "learning_rate": 4.883804590000865e-06, | |
| "loss": 1.5489835739135742, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5986696230598669, | |
| "grad_norm": 2.1008689403533936, | |
| "learning_rate": 4.881172405094138e-06, | |
| "loss": 1.1634057760238647, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6031042128603105, | |
| "grad_norm": 1.2963234186172485, | |
| "learning_rate": 4.878511547849383e-06, | |
| "loss": 1.2225638628005981, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6075388026607539, | |
| "grad_norm": 1.1285306215286255, | |
| "learning_rate": 4.875822054065203e-06, | |
| "loss": 1.2115212678909302, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6119733924611973, | |
| "grad_norm": 1.0443319082260132, | |
| "learning_rate": 4.8731039599254754e-06, | |
| "loss": 1.2144972085952759, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6164079822616408, | |
| "grad_norm": 9.736445426940918, | |
| "learning_rate": 4.870357301998856e-06, | |
| "loss": 1.210550308227539, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6208425720620843, | |
| "grad_norm": 0.8090811967849731, | |
| "learning_rate": 4.867582117238294e-06, | |
| "loss": 1.284433126449585, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6252771618625277, | |
| "grad_norm": 1.3445334434509277, | |
| "learning_rate": 4.864778442980532e-06, | |
| "loss": 0.8542180061340332, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6297117516629712, | |
| "grad_norm": 1.9137824773788452, | |
| "learning_rate": 4.861946316945605e-06, | |
| "loss": 1.2416703701019287, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6341463414634146, | |
| "grad_norm": 1.3683984279632568, | |
| "learning_rate": 4.859085777236331e-06, | |
| "loss": 1.3251290321350098, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6385809312638581, | |
| "grad_norm": 0.9978231191635132, | |
| "learning_rate": 4.8561968623377985e-06, | |
| "loss": 1.2392802238464355, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6430155210643016, | |
| "grad_norm": 0.7207977175712585, | |
| "learning_rate": 4.853279611116852e-06, | |
| "loss": 1.2283697128295898, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.647450110864745, | |
| "grad_norm": 1.396116018295288, | |
| "learning_rate": 4.850334062821566e-06, | |
| "loss": 1.3465940952301025, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6518847006651884, | |
| "grad_norm": 0.916964590549469, | |
| "learning_rate": 4.8473602570807185e-06, | |
| "loss": 0.9178141951560974, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.656319290465632, | |
| "grad_norm": 0.9746919870376587, | |
| "learning_rate": 4.844358233903254e-06, | |
| "loss": 0.9849019050598145, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6607538802660754, | |
| "grad_norm": 1.0369846820831299, | |
| "learning_rate": 4.841328033677753e-06, | |
| "loss": 1.1890286207199097, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6651884700665188, | |
| "grad_norm": 1.0989309549331665, | |
| "learning_rate": 4.83826969717188e-06, | |
| "loss": 1.2377430200576782, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6696230598669624, | |
| "grad_norm": 1.5578566789627075, | |
| "learning_rate": 4.835183265531843e-06, | |
| "loss": 1.1927175521850586, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6740576496674058, | |
| "grad_norm": 1.3345965147018433, | |
| "learning_rate": 4.832068780281831e-06, | |
| "loss": 1.2588074207305908, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6784922394678492, | |
| "grad_norm": 1.7896565198898315, | |
| "learning_rate": 4.828926283323464e-06, | |
| "loss": 1.2271308898925781, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6829268292682927, | |
| "grad_norm": 2.4165589809417725, | |
| "learning_rate": 4.8257558169352254e-06, | |
| "loss": 0.8486894965171814, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6873614190687362, | |
| "grad_norm": 1.6142799854278564, | |
| "learning_rate": 4.8225574237718906e-06, | |
| "loss": 1.2312774658203125, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6917960088691796, | |
| "grad_norm": 1.035621166229248, | |
| "learning_rate": 4.819331146863958e-06, | |
| "loss": 1.2014350891113281, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6962305986696231, | |
| "grad_norm": 0.8215665817260742, | |
| "learning_rate": 4.8160770296170685e-06, | |
| "loss": 1.2188996076583862, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7006651884700665, | |
| "grad_norm": 0.8507355451583862, | |
| "learning_rate": 4.812795115811419e-06, | |
| "loss": 1.3254660367965698, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.70509977827051, | |
| "grad_norm": 0.8648268580436707, | |
| "learning_rate": 4.809485449601177e-06, | |
| "loss": 0.978560745716095, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7095343680709535, | |
| "grad_norm": 1.1685987710952759, | |
| "learning_rate": 4.806148075513883e-06, | |
| "loss": 0.964942991733551, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7139689578713969, | |
| "grad_norm": 6.244750499725342, | |
| "learning_rate": 4.802783038449857e-06, | |
| "loss": 0.9973806142807007, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7184035476718403, | |
| "grad_norm": 2.577277660369873, | |
| "learning_rate": 4.799390383681587e-06, | |
| "loss": 1.064439058303833, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7228381374722838, | |
| "grad_norm": 1.7076307535171509, | |
| "learning_rate": 4.795970156853124e-06, | |
| "loss": 1.0868260860443115, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.3369574248790741, | |
| "learning_rate": 4.792522403979471e-06, | |
| "loss": 0.7968674302101135, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 0.9717708230018616, | |
| "learning_rate": 4.789047171445957e-06, | |
| "loss": 0.8146457672119141, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7361419068736141, | |
| "grad_norm": 1.6244539022445679, | |
| "learning_rate": 4.785544506007619e-06, | |
| "loss": 1.3726072311401367, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7405764966740577, | |
| "grad_norm": 3.454514980316162, | |
| "learning_rate": 4.782014454788566e-06, | |
| "loss": 1.1831202507019043, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7450110864745011, | |
| "grad_norm": 0.8379015922546387, | |
| "learning_rate": 4.778457065281355e-06, | |
| "loss": 1.216602087020874, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7494456762749445, | |
| "grad_norm": 2.0432989597320557, | |
| "learning_rate": 4.774872385346345e-06, | |
| "loss": 0.6669434309005737, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.753880266075388, | |
| "grad_norm": 2.758876085281372, | |
| "learning_rate": 4.7712604632110524e-06, | |
| "loss": 0.3195689618587494, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7583148558758315, | |
| "grad_norm": 7.965346336364746, | |
| "learning_rate": 4.767621347469506e-06, | |
| "loss": 0.8559633493423462, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7627494456762749, | |
| "grad_norm": 0.7290642261505127, | |
| "learning_rate": 4.7639550870815895e-06, | |
| "loss": 1.2730909585952759, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7671840354767184, | |
| "grad_norm": 0.8464753031730652, | |
| "learning_rate": 4.760261731372388e-06, | |
| "loss": 1.2238872051239014, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7716186252771619, | |
| "grad_norm": 4.048059940338135, | |
| "learning_rate": 4.75654133003152e-06, | |
| "loss": 1.084853172302246, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7760532150776053, | |
| "grad_norm": 0.9353682994842529, | |
| "learning_rate": 4.752793933112469e-06, | |
| "loss": 1.2124656438827515, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7804878048780488, | |
| "grad_norm": 1.724868655204773, | |
| "learning_rate": 4.749019591031914e-06, | |
| "loss": 1.137851595878601, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7849223946784922, | |
| "grad_norm": 1.9649287462234497, | |
| "learning_rate": 4.745218354569045e-06, | |
| "loss": 1.0045366287231445, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7893569844789357, | |
| "grad_norm": 2.2398102283477783, | |
| "learning_rate": 4.741390274864885e-06, | |
| "loss": 1.210289716720581, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7937915742793792, | |
| "grad_norm": 2.9946892261505127, | |
| "learning_rate": 4.737535403421601e-06, | |
| "loss": 1.250780701637268, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7982261640798226, | |
| "grad_norm": 1.1408754587173462, | |
| "learning_rate": 4.733653792101809e-06, | |
| "loss": 1.2131381034851074, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.802660753880266, | |
| "grad_norm": 3.3678433895111084, | |
| "learning_rate": 4.729745493127878e-06, | |
| "loss": 0.5941082835197449, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8070953436807096, | |
| "grad_norm": 1.4601346254348755, | |
| "learning_rate": 4.725810559081227e-06, | |
| "loss": 1.3125383853912354, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.811529933481153, | |
| "grad_norm": 0.950433075428009, | |
| "learning_rate": 4.7218490429016175e-06, | |
| "loss": 1.186044692993164, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8159645232815964, | |
| "grad_norm": 1.5122184753417969, | |
| "learning_rate": 4.717860997886442e-06, | |
| "loss": 0.9993484020233154, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8203991130820399, | |
| "grad_norm": 1.5292415618896484, | |
| "learning_rate": 4.713846477690005e-06, | |
| "loss": 0.8163633942604065, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8248337028824834, | |
| "grad_norm": 1.121957540512085, | |
| "learning_rate": 4.709805536322804e-06, | |
| "loss": 1.213725209236145, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8292682926829268, | |
| "grad_norm": 0.9955883026123047, | |
| "learning_rate": 4.7057382281508e-06, | |
| "loss": 1.2182434797286987, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8337028824833703, | |
| "grad_norm": 1.1312615871429443, | |
| "learning_rate": 4.701644607894687e-06, | |
| "loss": 1.1773383617401123, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8381374722838137, | |
| "grad_norm": 1.2919820547103882, | |
| "learning_rate": 4.697524730629159e-06, | |
| "loss": 1.1870676279067993, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8425720620842572, | |
| "grad_norm": 1.1439024209976196, | |
| "learning_rate": 4.693378651782162e-06, | |
| "loss": 0.7241402268409729, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8470066518847007, | |
| "grad_norm": 2.4431052207946777, | |
| "learning_rate": 4.689206427134155e-06, | |
| "loss": 1.2909201383590698, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8514412416851441, | |
| "grad_norm": 2.413292169570923, | |
| "learning_rate": 4.6850081128173595e-06, | |
| "loss": 1.1248009204864502, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8558758314855875, | |
| "grad_norm": 1.0476144552230835, | |
| "learning_rate": 4.680783765314994e-06, | |
| "loss": 1.2368849515914917, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8603104212860311, | |
| "grad_norm": 0.5809259414672852, | |
| "learning_rate": 4.6765334414605315e-06, | |
| "loss": 1.134302020072937, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8647450110864745, | |
| "grad_norm": 0.950793981552124, | |
| "learning_rate": 4.672257198436918e-06, | |
| "loss": 1.2347341775894165, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8691796008869179, | |
| "grad_norm": 1.9269758462905884, | |
| "learning_rate": 4.667955093775814e-06, | |
| "loss": 0.9339362978935242, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8736141906873615, | |
| "grad_norm": 2.1874947547912598, | |
| "learning_rate": 4.663627185356818e-06, | |
| "loss": 1.2027480602264404, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8780487804878049, | |
| "grad_norm": 0.8487191200256348, | |
| "learning_rate": 4.65927353140668e-06, | |
| "loss": 1.1914976835250854, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8824833702882483, | |
| "grad_norm": 1.360918641090393, | |
| "learning_rate": 4.654894190498534e-06, | |
| "loss": 1.194710373878479, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8869179600886918, | |
| "grad_norm": 0.5938453078269958, | |
| "learning_rate": 4.650489221551095e-06, | |
| "loss": 0.43390318751335144, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8913525498891353, | |
| "grad_norm": 1.3284165859222412, | |
| "learning_rate": 4.646058683827874e-06, | |
| "loss": 1.0268417596817017, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8957871396895787, | |
| "grad_norm": 0.35066622495651245, | |
| "learning_rate": 4.641602636936378e-06, | |
| "loss": 0.9203835725784302, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.9002217294900222, | |
| "grad_norm": 2.698913335800171, | |
| "learning_rate": 4.637121140827311e-06, | |
| "loss": 1.253874659538269, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.9046563192904656, | |
| "grad_norm": 1.8617788553237915, | |
| "learning_rate": 4.632614255793762e-06, | |
| "loss": 1.1092817783355713, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 1.344133734703064, | |
| "learning_rate": 4.6280820424704e-06, | |
| "loss": 1.053206443786621, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9135254988913526, | |
| "grad_norm": 1.601177453994751, | |
| "learning_rate": 4.623524561832653e-06, | |
| "loss": 1.221002459526062, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.917960088691796, | |
| "grad_norm": 0.8422732353210449, | |
| "learning_rate": 4.618941875195893e-06, | |
| "loss": 1.2346315383911133, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9223946784922394, | |
| "grad_norm": 1.5042940378189087, | |
| "learning_rate": 4.614334044214606e-06, | |
| "loss": 0.959531307220459, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.926829268292683, | |
| "grad_norm": 30.954843521118164, | |
| "learning_rate": 4.6097011308815645e-06, | |
| "loss": 1.2459607124328613, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9312638580931264, | |
| "grad_norm": 36.690757751464844, | |
| "learning_rate": 4.605043197526996e-06, | |
| "loss": 0.7548654079437256, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9356984478935698, | |
| "grad_norm": 3.826604127883911, | |
| "learning_rate": 4.600360306817738e-06, | |
| "loss": 1.448906421661377, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9401330376940134, | |
| "grad_norm": 12.927244186401367, | |
| "learning_rate": 4.595652521756403e-06, | |
| "loss": 1.0475653409957886, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9445676274944568, | |
| "grad_norm": 6.169033527374268, | |
| "learning_rate": 4.590919905680524e-06, | |
| "loss": 1.184989094734192, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9490022172949002, | |
| "grad_norm": 0.7973089814186096, | |
| "learning_rate": 4.5861625222617065e-06, | |
| "loss": 1.069338321685791, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9534368070953437, | |
| "grad_norm": 2.250502586364746, | |
| "learning_rate": 4.58138043550477e-06, | |
| "loss": 0.6720293164253235, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9578713968957872, | |
| "grad_norm": 0.9973225593566895, | |
| "learning_rate": 4.576573709746887e-06, | |
| "loss": 1.2099367380142212, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9623059866962306, | |
| "grad_norm": 1.777103304862976, | |
| "learning_rate": 4.5717424096567205e-06, | |
| "loss": 1.053672194480896, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9667405764966741, | |
| "grad_norm": 1.1499074697494507, | |
| "learning_rate": 4.566886600233547e-06, | |
| "loss": 1.2612353563308716, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9711751662971175, | |
| "grad_norm": 0.8228787779808044, | |
| "learning_rate": 4.56200634680639e-06, | |
| "loss": 1.2220462560653687, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 2.36310076713562, | |
| "learning_rate": 4.557101715033136e-06, | |
| "loss": 0.7764140367507935, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9800443458980045, | |
| "grad_norm": 0.8385096192359924, | |
| "learning_rate": 4.552172770899652e-06, | |
| "loss": 1.01227867603302, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9844789356984479, | |
| "grad_norm": 1.1155610084533691, | |
| "learning_rate": 4.547219580718899e-06, | |
| "loss": 1.261846661567688, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9889135254988913, | |
| "grad_norm": 1.4040062427520752, | |
| "learning_rate": 4.542242211130039e-06, | |
| "loss": 1.1938276290893555, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9933481152993349, | |
| "grad_norm": 1.3951795101165771, | |
| "learning_rate": 4.537240729097539e-06, | |
| "loss": 1.228947401046753, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9977827050997783, | |
| "grad_norm": 1.0445842742919922, | |
| "learning_rate": 4.532215201910269e-06, | |
| "loss": 1.021051287651062, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.0022172949002217, | |
| "grad_norm": 3.247546911239624, | |
| "learning_rate": 4.527165697180598e-06, | |
| "loss": 1.091888189315796, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0066518847006651, | |
| "grad_norm": 1.5735362768173218, | |
| "learning_rate": 4.522092282843481e-06, | |
| "loss": 1.2799968719482422, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0110864745011086, | |
| "grad_norm": 37.99199676513672, | |
| "learning_rate": 4.516995027155554e-06, | |
| "loss": 1.2860794067382812, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.0155210643015522, | |
| "grad_norm": 0.8153271079063416, | |
| "learning_rate": 4.511873998694204e-06, | |
| "loss": 0.8918830156326294, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0199556541019956, | |
| "grad_norm": 0.31422239542007446, | |
| "learning_rate": 4.506729266356651e-06, | |
| "loss": 0.8340937495231628, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.024390243902439, | |
| "grad_norm": 1.3617857694625854, | |
| "learning_rate": 4.5015608993590276e-06, | |
| "loss": 0.5565242171287537, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0288248337028825, | |
| "grad_norm": 1.1211755275726318, | |
| "learning_rate": 4.4963689672354375e-06, | |
| "loss": 0.864283561706543, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.033259423503326, | |
| "grad_norm": 1.0255306959152222, | |
| "learning_rate": 4.491153539837026e-06, | |
| "loss": 0.720465898513794, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0376940133037693, | |
| "grad_norm": 1.1541537046432495, | |
| "learning_rate": 4.4859146873310375e-06, | |
| "loss": 1.0676193237304688, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.042128603104213, | |
| "grad_norm": 1.8817100524902344, | |
| "learning_rate": 4.480652480199873e-06, | |
| "loss": 0.6701173782348633, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0465631929046564, | |
| "grad_norm": 2.1447463035583496, | |
| "learning_rate": 4.475366989240147e-06, | |
| "loss": 1.1118934154510498, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0509977827050998, | |
| "grad_norm": 1.540354609489441, | |
| "learning_rate": 4.470058285561721e-06, | |
| "loss": 1.037009835243225, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0554323725055432, | |
| "grad_norm": 0.8637385368347168, | |
| "learning_rate": 4.464726440586761e-06, | |
| "loss": 1.051493525505066, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0598669623059866, | |
| "grad_norm": 1.036024570465088, | |
| "learning_rate": 4.45937152604877e-06, | |
| "loss": 1.0941234827041626, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.06430155210643, | |
| "grad_norm": 1.068137526512146, | |
| "learning_rate": 4.453993613991622e-06, | |
| "loss": 0.6281552910804749, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0687361419068737, | |
| "grad_norm": 1.500739574432373, | |
| "learning_rate": 4.4485927767685995e-06, | |
| "loss": 1.1264927387237549, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0731707317073171, | |
| "grad_norm": 0.5921480059623718, | |
| "learning_rate": 4.443169087041409e-06, | |
| "loss": 0.9420715570449829, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0776053215077606, | |
| "grad_norm": 0.7887612581253052, | |
| "learning_rate": 4.4377226177792145e-06, | |
| "loss": 1.0924605131149292, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.082039911308204, | |
| "grad_norm": 1.0860843658447266, | |
| "learning_rate": 4.432253442257649e-06, | |
| "loss": 0.7638985514640808, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0864745011086474, | |
| "grad_norm": 1.021136999130249, | |
| "learning_rate": 4.426761634057831e-06, | |
| "loss": 0.9648894667625427, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 1.4795594215393066, | |
| "learning_rate": 4.421247267065375e-06, | |
| "loss": 1.113416314125061, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0953436807095343, | |
| "grad_norm": 4.473145484924316, | |
| "learning_rate": 4.415710415469394e-06, | |
| "loss": 1.06260085105896, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.099778270509978, | |
| "grad_norm": 2.0926437377929688, | |
| "learning_rate": 4.410151153761506e-06, | |
| "loss": 0.8711175918579102, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.1042128603104213, | |
| "grad_norm": 2.1538195610046387, | |
| "learning_rate": 4.404569556734832e-06, | |
| "loss": 1.0097200870513916, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.1086474501108647, | |
| "grad_norm": 0.7072110176086426, | |
| "learning_rate": 4.398965699482984e-06, | |
| "loss": 0.7574429512023926, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.1130820399113082, | |
| "grad_norm": 0.6986474990844727, | |
| "learning_rate": 4.39333965739906e-06, | |
| "loss": 1.0583008527755737, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1175166297117516, | |
| "grad_norm": 2.5247440338134766, | |
| "learning_rate": 4.3876915061746275e-06, | |
| "loss": 0.9256818294525146, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1219512195121952, | |
| "grad_norm": 2.264343738555908, | |
| "learning_rate": 4.382021321798707e-06, | |
| "loss": 1.0718340873718262, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.1263858093126387, | |
| "grad_norm": 2.3817667961120605, | |
| "learning_rate": 4.376329180556745e-06, | |
| "loss": 0.9168416857719421, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.130820399113082, | |
| "grad_norm": 1.3641257286071777, | |
| "learning_rate": 4.370615159029594e-06, | |
| "loss": 1.1087809801101685, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1352549889135255, | |
| "grad_norm": 2.9134786128997803, | |
| "learning_rate": 4.36487933409248e-06, | |
| "loss": 0.5451399683952332, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.139689578713969, | |
| "grad_norm": 1.1057777404785156, | |
| "learning_rate": 4.359121782913964e-06, | |
| "loss": 0.4450605511665344, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1441241685144123, | |
| "grad_norm": 1.3963892459869385, | |
| "learning_rate": 4.3533425829549085e-06, | |
| "loss": 0.9792951345443726, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1485587583148558, | |
| "grad_norm": 1.4983371496200562, | |
| "learning_rate": 4.347541811967436e-06, | |
| "loss": 1.1121540069580078, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1529933481152994, | |
| "grad_norm": 0.9903774261474609, | |
| "learning_rate": 4.341719547993879e-06, | |
| "loss": 1.130444049835205, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1574279379157428, | |
| "grad_norm": 1.0208805799484253, | |
| "learning_rate": 4.335875869365732e-06, | |
| "loss": 0.5359130501747131, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1618625277161863, | |
| "grad_norm": 1.0656358003616333, | |
| "learning_rate": 4.330010854702598e-06, | |
| "loss": 1.042147159576416, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1662971175166297, | |
| "grad_norm": 2.328760862350464, | |
| "learning_rate": 4.3241245829111324e-06, | |
| "loss": 1.1488664150238037, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.170731707317073, | |
| "grad_norm": 1.1009130477905273, | |
| "learning_rate": 4.318217133183978e-06, | |
| "loss": 0.7168905138969421, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1751662971175167, | |
| "grad_norm": 0.8763664364814758, | |
| "learning_rate": 4.312288584998697e-06, | |
| "loss": 0.8310704827308655, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1796008869179602, | |
| "grad_norm": 0.7050794363021851, | |
| "learning_rate": 4.306339018116714e-06, | |
| "loss": 0.9871019124984741, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1840354767184036, | |
| "grad_norm": 1.0369445085525513, | |
| "learning_rate": 4.300368512582227e-06, | |
| "loss": 1.096502423286438, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.188470066518847, | |
| "grad_norm": 0.4809003472328186, | |
| "learning_rate": 4.294377148721144e-06, | |
| "loss": 0.8078370094299316, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1929046563192904, | |
| "grad_norm": 1.3127416372299194, | |
| "learning_rate": 4.288365007139991e-06, | |
| "loss": 1.0696966648101807, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1973392461197339, | |
| "grad_norm": 1.225541591644287, | |
| "learning_rate": 4.2823321687248386e-06, | |
| "loss": 0.6737713813781738, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.2017738359201773, | |
| "grad_norm": 4.297253608703613, | |
| "learning_rate": 4.276278714640203e-06, | |
| "loss": 0.7530418634414673, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.206208425720621, | |
| "grad_norm": 2.4006433486938477, | |
| "learning_rate": 4.270204726327963e-06, | |
| "loss": 1.1115124225616455, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2106430155210643, | |
| "grad_norm": 0.5028179883956909, | |
| "learning_rate": 4.264110285506259e-06, | |
| "loss": 0.8297025561332703, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2150776053215078, | |
| "grad_norm": 1.1112703084945679, | |
| "learning_rate": 4.257995474168395e-06, | |
| "loss": 1.2000458240509033, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "grad_norm": 7.750529766082764, | |
| "learning_rate": 4.251860374581736e-06, | |
| "loss": 0.5883951783180237, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2239467849223946, | |
| "grad_norm": 1.483959436416626, | |
| "learning_rate": 4.245705069286601e-06, | |
| "loss": 1.0737543106079102, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2283813747228383, | |
| "grad_norm": 4.382837772369385, | |
| "learning_rate": 4.239529641095149e-06, | |
| "loss": 0.7496063709259033, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2328159645232817, | |
| "grad_norm": 0.482736200094223, | |
| "learning_rate": 4.233334173090274e-06, | |
| "loss": 0.7637969255447388, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.237250554323725, | |
| "grad_norm": 1.713169813156128, | |
| "learning_rate": 4.227118748624478e-06, | |
| "loss": 0.73581862449646, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2416851441241685, | |
| "grad_norm": 2.0154945850372314, | |
| "learning_rate": 4.220883451318753e-06, | |
| "loss": 1.2916665077209473, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.246119733924612, | |
| "grad_norm": 1.1945637464523315, | |
| "learning_rate": 4.2146283650614545e-06, | |
| "loss": 0.9394684433937073, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2505543237250554, | |
| "grad_norm": 0.8182294368743896, | |
| "learning_rate": 4.208353574007179e-06, | |
| "loss": 0.6380811333656311, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2549889135254988, | |
| "grad_norm": 1.0067088603973389, | |
| "learning_rate": 4.202059162575622e-06, | |
| "loss": 0.9561277627944946, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2594235033259422, | |
| "grad_norm": 1.1284490823745728, | |
| "learning_rate": 4.195745215450451e-06, | |
| "loss": 0.9579231142997742, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2638580931263859, | |
| "grad_norm": 0.6526016592979431, | |
| "learning_rate": 4.189411817578159e-06, | |
| "loss": 1.1114004850387573, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2682926829268293, | |
| "grad_norm": 0.7916569113731384, | |
| "learning_rate": 4.1830590541669304e-06, | |
| "loss": 0.7596962451934814, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 1.7166008949279785, | |
| "learning_rate": 4.176687010685484e-06, | |
| "loss": 1.2456161975860596, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2771618625277161, | |
| "grad_norm": 0.9254941344261169, | |
| "learning_rate": 4.170295772861931e-06, | |
| "loss": 0.9804297685623169, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2815964523281598, | |
| "grad_norm": 0.8669252395629883, | |
| "learning_rate": 4.163885426682619e-06, | |
| "loss": 1.1345466375350952, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2860310421286032, | |
| "grad_norm": 0.9402420520782471, | |
| "learning_rate": 4.157456058390977e-06, | |
| "loss": 1.0704562664031982, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2904656319290466, | |
| "grad_norm": 6.258746147155762, | |
| "learning_rate": 4.151007754486351e-06, | |
| "loss": 1.2947583198547363, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.29490022172949, | |
| "grad_norm": 0.737083375453949, | |
| "learning_rate": 4.144540601722843e-06, | |
| "loss": 0.782565712928772, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2993348115299335, | |
| "grad_norm": 1.295681118965149, | |
| "learning_rate": 4.138054687108143e-06, | |
| "loss": 0.829471230506897, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.3037694013303769, | |
| "grad_norm": 2.530069351196289, | |
| "learning_rate": 4.131550097902361e-06, | |
| "loss": 0.5070698857307434, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3082039911308203, | |
| "grad_norm": 4.558071613311768, | |
| "learning_rate": 4.125026921616852e-06, | |
| "loss": 1.0417896509170532, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.3126385809312637, | |
| "grad_norm": 1.0858033895492554, | |
| "learning_rate": 4.118485246013031e-06, | |
| "loss": 1.0292699337005615, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.3170731707317074, | |
| "grad_norm": 2.0192573070526123, | |
| "learning_rate": 4.111925159101208e-06, | |
| "loss": 1.0664730072021484, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3215077605321508, | |
| "grad_norm": 0.6075845956802368, | |
| "learning_rate": 4.1053467491393864e-06, | |
| "loss": 1.077016830444336, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3259423503325942, | |
| "grad_norm": 1.1085582971572876, | |
| "learning_rate": 4.098750104632091e-06, | |
| "loss": 1.0554149150848389, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3303769401330376, | |
| "grad_norm": 0.8516935110092163, | |
| "learning_rate": 4.092135314329165e-06, | |
| "loss": 0.41985565423965454, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3348115299334813, | |
| "grad_norm": 1.4308011531829834, | |
| "learning_rate": 4.085502467224583e-06, | |
| "loss": 1.0022186040878296, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3392461197339247, | |
| "grad_norm": 0.866374135017395, | |
| "learning_rate": 4.078851652555254e-06, | |
| "loss": 0.8239214420318604, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.3436807095343681, | |
| "grad_norm": 0.39041706919670105, | |
| "learning_rate": 4.072182959799816e-06, | |
| "loss": 0.7566671967506409, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3481152993348116, | |
| "grad_norm": 3.046255588531494, | |
| "learning_rate": 4.065496478677436e-06, | |
| "loss": 1.0992430448532104, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.352549889135255, | |
| "grad_norm": 2.365644693374634, | |
| "learning_rate": 4.058792299146602e-06, | |
| "loss": 1.1112772226333618, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3569844789356984, | |
| "grad_norm": 0.9957023859024048, | |
| "learning_rate": 4.052070511403912e-06, | |
| "loss": 0.6697097420692444, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3614190687361418, | |
| "grad_norm": 1.1376887559890747, | |
| "learning_rate": 4.045331205882863e-06, | |
| "loss": 1.117611050605774, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3658536585365852, | |
| "grad_norm": 0.6812132596969604, | |
| "learning_rate": 4.038574473252629e-06, | |
| "loss": 0.7285422086715698, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.370288248337029, | |
| "grad_norm": 1.371950626373291, | |
| "learning_rate": 4.031800404416849e-06, | |
| "loss": 0.9932656288146973, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3747228381374723, | |
| "grad_norm": 0.4800332188606262, | |
| "learning_rate": 4.025009090512394e-06, | |
| "loss": 0.05759064108133316, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3791574279379157, | |
| "grad_norm": 0.9680930972099304, | |
| "learning_rate": 4.018200622908153e-06, | |
| "loss": 1.0663511753082275, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3835920177383592, | |
| "grad_norm": 1.1375492811203003, | |
| "learning_rate": 4.011375093203793e-06, | |
| "loss": 0.9414310455322266, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3880266075388026, | |
| "grad_norm": 1.8018168210983276, | |
| "learning_rate": 4.004532593228531e-06, | |
| "loss": 0.9270746111869812, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3924611973392462, | |
| "grad_norm": 0.7995503544807434, | |
| "learning_rate": 3.997673215039899e-06, | |
| "loss": 1.022778868675232, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3968957871396896, | |
| "grad_norm": 0.6482570171356201, | |
| "learning_rate": 3.990797050922506e-06, | |
| "loss": 1.0561681985855103, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.401330376940133, | |
| "grad_norm": 0.9474309086799622, | |
| "learning_rate": 3.9839041933867954e-06, | |
| "loss": 0.7767112851142883, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.4057649667405765, | |
| "grad_norm": 0.9471772313117981, | |
| "learning_rate": 3.976994735167796e-06, | |
| "loss": 0.9378006458282471, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.41019955654102, | |
| "grad_norm": 3.042051315307617, | |
| "learning_rate": 3.970068769223884e-06, | |
| "loss": 1.0586607456207275, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.4146341463414633, | |
| "grad_norm": 1.6717044115066528, | |
| "learning_rate": 3.963126388735525e-06, | |
| "loss": 0.7560147047042847, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.4190687361419068, | |
| "grad_norm": 0.8269588351249695, | |
| "learning_rate": 3.956167687104021e-06, | |
| "loss": 0.6895716190338135, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4235033259423504, | |
| "grad_norm": 2.3829591274261475, | |
| "learning_rate": 3.9491927579502584e-06, | |
| "loss": 0.8010018467903137, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4279379157427938, | |
| "grad_norm": 5.464539527893066, | |
| "learning_rate": 3.9422016951134415e-06, | |
| "loss": 0.6167224645614624, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.4323725055432373, | |
| "grad_norm": 0.9182257056236267, | |
| "learning_rate": 3.935194592649836e-06, | |
| "loss": 1.2061957120895386, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4368070953436807, | |
| "grad_norm": 1.2997310161590576, | |
| "learning_rate": 3.928171544831501e-06, | |
| "loss": 1.0815800428390503, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.441241685144124, | |
| "grad_norm": 0.9832943081855774, | |
| "learning_rate": 3.921132646145019e-06, | |
| "loss": 1.1462043523788452, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4456762749445677, | |
| "grad_norm": 4.495982646942139, | |
| "learning_rate": 3.914077991290232e-06, | |
| "loss": 0.9017251133918762, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4501108647450112, | |
| "grad_norm": 3.023244619369507, | |
| "learning_rate": 3.907007675178956e-06, | |
| "loss": 1.071535348892212, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 1.0500205755233765, | |
| "learning_rate": 3.899921792933713e-06, | |
| "loss": 0.8729349374771118, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.458980044345898, | |
| "grad_norm": 0.5229496359825134, | |
| "learning_rate": 3.892820439886448e-06, | |
| "loss": 0.7276408076286316, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 1.74760901927948, | |
| "learning_rate": 3.885703711577249e-06, | |
| "loss": 1.0792577266693115, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4678492239467849, | |
| "grad_norm": 0.6744844913482666, | |
| "learning_rate": 3.8785717037530555e-06, | |
| "loss": 1.04912269115448, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4722838137472283, | |
| "grad_norm": 0.9498162865638733, | |
| "learning_rate": 3.871424512366377e-06, | |
| "loss": 0.7877933382987976, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.476718403547672, | |
| "grad_norm": 0.9392883777618408, | |
| "learning_rate": 3.864262233574e-06, | |
| "loss": 0.5171116590499878, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4811529933481153, | |
| "grad_norm": 2.990640640258789, | |
| "learning_rate": 3.857084963735689e-06, | |
| "loss": 0.6348138451576233, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4855875831485588, | |
| "grad_norm": 1.4360038042068481, | |
| "learning_rate": 3.849892799412902e-06, | |
| "loss": 0.9652891159057617, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4900221729490022, | |
| "grad_norm": 0.7203241586685181, | |
| "learning_rate": 3.84268583736748e-06, | |
| "loss": 1.0772919654846191, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4944567627494456, | |
| "grad_norm": 0.7038840055465698, | |
| "learning_rate": 3.835464174560349e-06, | |
| "loss": 0.6485112905502319, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4988913525498893, | |
| "grad_norm": 5.711329460144043, | |
| "learning_rate": 3.828227908150217e-06, | |
| "loss": 0.7235292196273804, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.5033259423503327, | |
| "grad_norm": 0.828093409538269, | |
| "learning_rate": 3.820977135492266e-06, | |
| "loss": 1.063615083694458, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.507760532150776, | |
| "grad_norm": 0.7944739460945129, | |
| "learning_rate": 3.8137119541368415e-06, | |
| "loss": 0.659883975982666, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5121951219512195, | |
| "grad_norm": 7.873995304107666, | |
| "learning_rate": 3.80643246182814e-06, | |
| "loss": 0.5578336715698242, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.516629711751663, | |
| "grad_norm": 1.194551706314087, | |
| "learning_rate": 3.7991387565028963e-06, | |
| "loss": 1.11123526096344, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5210643015521064, | |
| "grad_norm": 1.1621936559677124, | |
| "learning_rate": 3.791830936289062e-06, | |
| "loss": 0.9926000833511353, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5254988913525498, | |
| "grad_norm": 1.093554139137268, | |
| "learning_rate": 3.784509099504488e-06, | |
| "loss": 0.5783771872520447, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5299334811529932, | |
| "grad_norm": 0.998589277267456, | |
| "learning_rate": 3.7771733446556025e-06, | |
| "loss": 0.25470170378685, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5343680709534369, | |
| "grad_norm": 1.2918857336044312, | |
| "learning_rate": 3.7698237704360826e-06, | |
| "loss": 0.8159583806991577, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5388026607538803, | |
| "grad_norm": 3.156062364578247, | |
| "learning_rate": 3.7624604757255297e-06, | |
| "loss": 0.914296567440033, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5432372505543237, | |
| "grad_norm": 1.473464012145996, | |
| "learning_rate": 3.7550835595881365e-06, | |
| "loss": 0.6349583268165588, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5476718403547673, | |
| "grad_norm": 0.6629686951637268, | |
| "learning_rate": 3.747693121271355e-06, | |
| "loss": 1.1163161993026733, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5521064301552108, | |
| "grad_norm": 1.4136548042297363, | |
| "learning_rate": 3.740289260204565e-06, | |
| "loss": 1.0595505237579346, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5565410199556542, | |
| "grad_norm": 0.9893305897712708, | |
| "learning_rate": 3.732872075997729e-06, | |
| "loss": 1.0329627990722656, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5609756097560976, | |
| "grad_norm": 1.2765549421310425, | |
| "learning_rate": 3.725441668440058e-06, | |
| "loss": 0.8997711539268494, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.565410199556541, | |
| "grad_norm": 25.827316284179688, | |
| "learning_rate": 3.7179981374986683e-06, | |
| "loss": 0.47515010833740234, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5698447893569845, | |
| "grad_norm": 1.0069860219955444, | |
| "learning_rate": 3.710541583317233e-06, | |
| "loss": 1.101843237876892, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.5742793791574279, | |
| "grad_norm": 1.7606300115585327, | |
| "learning_rate": 3.70307210621464e-06, | |
| "loss": 0.8734806776046753, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5787139689578713, | |
| "grad_norm": 4.4100141525268555, | |
| "learning_rate": 3.695589806683636e-06, | |
| "loss": 0.6428739428520203, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5831485587583147, | |
| "grad_norm": 0.7855361104011536, | |
| "learning_rate": 3.68809478538948e-06, | |
| "loss": 1.0313284397125244, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5875831485587582, | |
| "grad_norm": 1.0333298444747925, | |
| "learning_rate": 3.6805871431685875e-06, | |
| "loss": 1.0764654874801636, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5920177383592018, | |
| "grad_norm": 3.6170754432678223, | |
| "learning_rate": 3.6730669810271707e-06, | |
| "loss": 1.3448973894119263, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5964523281596452, | |
| "grad_norm": 1.6662654876708984, | |
| "learning_rate": 3.665534400139885e-06, | |
| "loss": 0.5263369679450989, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.6008869179600886, | |
| "grad_norm": 0.41445672512054443, | |
| "learning_rate": 3.6579895018484635e-06, | |
| "loss": 0.5438380837440491, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.6053215077605323, | |
| "grad_norm": 4.3432183265686035, | |
| "learning_rate": 3.650432387660354e-06, | |
| "loss": 1.0684770345687866, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.6097560975609757, | |
| "grad_norm": 0.9178838133811951, | |
| "learning_rate": 3.6428631592473584e-06, | |
| "loss": 1.0635815858840942, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.6141906873614191, | |
| "grad_norm": 1.0494548082351685, | |
| "learning_rate": 3.6352819184442552e-06, | |
| "loss": 0.7950181365013123, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.6186252771618626, | |
| "grad_norm": 0.6856399178504944, | |
| "learning_rate": 3.6276887672474374e-06, | |
| "loss": 1.0625864267349243, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.623059866962306, | |
| "grad_norm": 0.8747734427452087, | |
| "learning_rate": 3.620083807813541e-06, | |
| "loss": 1.0343061685562134, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6274944567627494, | |
| "grad_norm": 0.7687981128692627, | |
| "learning_rate": 3.6124671424580633e-06, | |
| "loss": 1.0561656951904297, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6319290465631928, | |
| "grad_norm": 0.4724279046058655, | |
| "learning_rate": 3.604838873653991e-06, | |
| "loss": 0.7322379350662231, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 0.8618527054786682, | |
| "learning_rate": 3.597199104030424e-06, | |
| "loss": 1.174364447593689, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.6407982261640797, | |
| "grad_norm": 1.1735891103744507, | |
| "learning_rate": 3.589547936371189e-06, | |
| "loss": 1.0586884021759033, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6452328159645233, | |
| "grad_norm": 1.6278800964355469, | |
| "learning_rate": 3.58188547361346e-06, | |
| "loss": 1.2099087238311768, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6496674057649667, | |
| "grad_norm": 8.375886917114258, | |
| "learning_rate": 3.574211818846374e-06, | |
| "loss": 0.8451070189476013, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6541019955654102, | |
| "grad_norm": 4.998831272125244, | |
| "learning_rate": 3.566527075309641e-06, | |
| "loss": 0.9085059762001038, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.6585365853658538, | |
| "grad_norm": 2.622978687286377, | |
| "learning_rate": 3.558831346392159e-06, | |
| "loss": 0.7893009185791016, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.6629711751662972, | |
| "grad_norm": 1.087239384651184, | |
| "learning_rate": 3.5511247356306205e-06, | |
| "loss": 1.0533859729766846, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6674057649667406, | |
| "grad_norm": 0.8943765163421631, | |
| "learning_rate": 3.5434073467081183e-06, | |
| "loss": 0.9193379282951355, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.671840354767184, | |
| "grad_norm": 0.7251817584037781, | |
| "learning_rate": 3.5356792834527533e-06, | |
| "loss": 0.555644154548645, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6762749445676275, | |
| "grad_norm": 0.9888861775398254, | |
| "learning_rate": 3.527940649836238e-06, | |
| "loss": 0.9255459308624268, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.680709534368071, | |
| "grad_norm": 0.9948540925979614, | |
| "learning_rate": 3.520191549972494e-06, | |
| "loss": 1.0330082178115845, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6851441241685143, | |
| "grad_norm": 1.4498493671417236, | |
| "learning_rate": 3.512432088116255e-06, | |
| "loss": 0.9195749759674072, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6895787139689578, | |
| "grad_norm": 2.5919599533081055, | |
| "learning_rate": 3.5046623686616627e-06, | |
| "loss": 1.0231900215148926, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.6940133037694012, | |
| "grad_norm": 1.6839133501052856, | |
| "learning_rate": 3.496882496140861e-06, | |
| "loss": 1.0705158710479736, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6984478935698448, | |
| "grad_norm": 2.3896384239196777, | |
| "learning_rate": 3.4890925752225935e-06, | |
| "loss": 0.9851964116096497, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.7028824833702882, | |
| "grad_norm": 1.7617751359939575, | |
| "learning_rate": 3.48129271071079e-06, | |
| "loss": 0.8785147666931152, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.7073170731707317, | |
| "grad_norm": 0.7179524302482605, | |
| "learning_rate": 3.4734830075431605e-06, | |
| "loss": 1.0462019443511963, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.7117516629711753, | |
| "grad_norm": 99.32469177246094, | |
| "learning_rate": 3.4656635707897823e-06, | |
| "loss": 1.0003533363342285, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.7161862527716187, | |
| "grad_norm": 1.4013028144836426, | |
| "learning_rate": 3.457834505651687e-06, | |
| "loss": 0.9330251216888428, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.7206208425720622, | |
| "grad_norm": 1.3153235912322998, | |
| "learning_rate": 3.449995917459442e-06, | |
| "loss": 1.068703532218933, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.7250554323725056, | |
| "grad_norm": 1.7329002618789673, | |
| "learning_rate": 3.4421479116717394e-06, | |
| "loss": 1.0447511672973633, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.729490022172949, | |
| "grad_norm": 1.7924240827560425, | |
| "learning_rate": 3.4342905938739707e-06, | |
| "loss": 0.6974395513534546, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7339246119733924, | |
| "grad_norm": 0.8617153763771057, | |
| "learning_rate": 3.4264240697768096e-06, | |
| "loss": 1.005650281906128, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.7383592017738358, | |
| "grad_norm": 2.041933298110962, | |
| "learning_rate": 3.418548445214791e-06, | |
| "loss": 0.7548515200614929, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7427937915742793, | |
| "grad_norm": 1.0399845838546753, | |
| "learning_rate": 3.410663826144884e-06, | |
| "loss": 0.7364926338195801, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.7472283813747227, | |
| "grad_norm": 1.0632048845291138, | |
| "learning_rate": 3.4027703186450672e-06, | |
| "loss": 0.6422973871231079, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.7516629711751663, | |
| "grad_norm": 2.727888822555542, | |
| "learning_rate": 3.394868028912906e-06, | |
| "loss": 0.6838661432266235, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.7560975609756098, | |
| "grad_norm": 0.47422733902931213, | |
| "learning_rate": 3.386957063264115e-06, | |
| "loss": 0.6767208576202393, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.7605321507760532, | |
| "grad_norm": 0.7666518688201904, | |
| "learning_rate": 3.3790375281311355e-06, | |
| "loss": 0.8840131759643555, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7649667405764968, | |
| "grad_norm": 1.755710244178772, | |
| "learning_rate": 3.3711095300617015e-06, | |
| "loss": 0.9250625371932983, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7694013303769403, | |
| "grad_norm": 0.984292209148407, | |
| "learning_rate": 3.3631731757174048e-06, | |
| "loss": 1.0221278667449951, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7738359201773837, | |
| "grad_norm": 2.9353697299957275, | |
| "learning_rate": 3.3552285718722593e-06, | |
| "loss": 1.0566647052764893, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.778270509977827, | |
| "grad_norm": 2.145174980163574, | |
| "learning_rate": 3.3472758254112662e-06, | |
| "loss": 1.1013846397399902, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7827050997782705, | |
| "grad_norm": 1.3626816272735596, | |
| "learning_rate": 3.3393150433289795e-06, | |
| "loss": 1.1163952350616455, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.787139689578714, | |
| "grad_norm": 1.1192476749420166, | |
| "learning_rate": 3.3313463327280576e-06, | |
| "loss": 0.7770203948020935, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7915742793791574, | |
| "grad_norm": 1.5108779668807983, | |
| "learning_rate": 3.3233698008178306e-06, | |
| "loss": 1.042984962463379, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7960088691796008, | |
| "grad_norm": 1.3956575393676758, | |
| "learning_rate": 3.3153855549128537e-06, | |
| "loss": 0.5932509899139404, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.8004434589800442, | |
| "grad_norm": 5.858518600463867, | |
| "learning_rate": 3.3073937024314647e-06, | |
| "loss": 0.5802476406097412, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.8048780487804879, | |
| "grad_norm": 1.5974634885787964, | |
| "learning_rate": 3.2993943508943386e-06, | |
| "loss": 1.1237906217575073, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.8093126385809313, | |
| "grad_norm": 2.2132883071899414, | |
| "learning_rate": 3.291387607923041e-06, | |
| "loss": 0.9052677154541016, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.8137472283813747, | |
| "grad_norm": 1.916467547416687, | |
| "learning_rate": 3.283373581238582e-06, | |
| "loss": 0.4801388084888458, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.9333745241165161, | |
| "learning_rate": 3.2753523786599618e-06, | |
| "loss": 1.098004698753357, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.8226164079822618, | |
| "grad_norm": 0.9473229050636292, | |
| "learning_rate": 3.2673241081027263e-06, | |
| "loss": 1.0623825788497925, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.8270509977827052, | |
| "grad_norm": 1.0046288967132568, | |
| "learning_rate": 3.259288877577512e-06, | |
| "loss": 1.1435964107513428, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.8314855875831486, | |
| "grad_norm": 1.3672471046447754, | |
| "learning_rate": 3.251246795188592e-06, | |
| "loss": 0.9531443119049072, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.835920177383592, | |
| "grad_norm": 4.6819353103637695, | |
| "learning_rate": 3.243197969132425e-06, | |
| "loss": 0.785484790802002, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.8403547671840355, | |
| "grad_norm": 0.6413378715515137, | |
| "learning_rate": 3.2351425076961957e-06, | |
| "loss": 1.0066115856170654, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.8447893569844789, | |
| "grad_norm": 0.9913519620895386, | |
| "learning_rate": 3.22708051925636e-06, | |
| "loss": 1.0463634729385376, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.8492239467849223, | |
| "grad_norm": 0.8798125386238098, | |
| "learning_rate": 3.219012112277189e-06, | |
| "loss": 1.014890432357788, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8536585365853657, | |
| "grad_norm": 0.8148372769355774, | |
| "learning_rate": 3.210937395309304e-06, | |
| "loss": 1.0661628246307373, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8580931263858091, | |
| "grad_norm": 3.0656192302703857, | |
| "learning_rate": 3.202856476988222e-06, | |
| "loss": 1.011720061302185, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.8625277161862528, | |
| "grad_norm": 0.9508707523345947, | |
| "learning_rate": 3.1947694660328914e-06, | |
| "loss": 1.1549465656280518, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.8669623059866962, | |
| "grad_norm": 1.2188172340393066, | |
| "learning_rate": 3.1866764712442273e-06, | |
| "loss": 0.6235454082489014, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8713968957871396, | |
| "grad_norm": 1.0734593868255615, | |
| "learning_rate": 3.1785776015036533e-06, | |
| "loss": 0.7762396335601807, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8758314855875833, | |
| "grad_norm": 8.548871994018555, | |
| "learning_rate": 3.1704729657716314e-06, | |
| "loss": 0.8318262696266174, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.8802660753880267, | |
| "grad_norm": 1.0601602792739868, | |
| "learning_rate": 3.1623626730861996e-06, | |
| "loss": 1.0204439163208008, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8847006651884701, | |
| "grad_norm": 0.9268496632575989, | |
| "learning_rate": 3.1542468325615e-06, | |
| "loss": 1.2579779624938965, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8891352549889135, | |
| "grad_norm": 1.1618844270706177, | |
| "learning_rate": 3.1461255533863183e-06, | |
| "loss": 0.7151064872741699, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.893569844789357, | |
| "grad_norm": 1.2911403179168701, | |
| "learning_rate": 3.1379989448226077e-06, | |
| "loss": 1.0588139295578003, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8980044345898004, | |
| "grad_norm": 1.2581367492675781, | |
| "learning_rate": 3.1298671162040236e-06, | |
| "loss": 0.9134914875030518, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.9024390243902438, | |
| "grad_norm": 0.9134765267372131, | |
| "learning_rate": 3.1217301769344488e-06, | |
| "loss": 0.723232090473175, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.9068736141906872, | |
| "grad_norm": 1.5111923217773438, | |
| "learning_rate": 3.1135882364865262e-06, | |
| "loss": 0.7724399566650391, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.9113082039911307, | |
| "grad_norm": 2.6118454933166504, | |
| "learning_rate": 3.105441404400183e-06, | |
| "loss": 1.0999490022659302, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.9157427937915743, | |
| "grad_norm": 0.951991617679596, | |
| "learning_rate": 3.097289790281155e-06, | |
| "loss": 0.7594307661056519, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.9201773835920177, | |
| "grad_norm": 5.733903884887695, | |
| "learning_rate": 3.089133503799517e-06, | |
| "loss": 0.6187224388122559, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.9246119733924612, | |
| "grad_norm": 0.7913883924484253, | |
| "learning_rate": 3.0809726546882045e-06, | |
| "loss": 1.1264593601226807, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.9290465631929048, | |
| "grad_norm": 0.7077911496162415, | |
| "learning_rate": 3.0728073527415376e-06, | |
| "loss": 1.0579288005828857, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.9334811529933482, | |
| "grad_norm": 0.4359738826751709, | |
| "learning_rate": 3.0646377078137424e-06, | |
| "loss": 0.693384051322937, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.9379157427937916, | |
| "grad_norm": 1.4608731269836426, | |
| "learning_rate": 3.056463829817475e-06, | |
| "loss": 0.6210131049156189, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.942350332594235, | |
| "grad_norm": 1.902843952178955, | |
| "learning_rate": 3.048285828722345e-06, | |
| "loss": 0.6315315961837769, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.9467849223946785, | |
| "grad_norm": 0.7998486757278442, | |
| "learning_rate": 3.0401038145534297e-06, | |
| "loss": 0.8021946549415588, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.951219512195122, | |
| "grad_norm": 1.0074536800384521, | |
| "learning_rate": 3.031917897389799e-06, | |
| "loss": 0.8259488344192505, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9556541019955653, | |
| "grad_norm": 2.5570080280303955, | |
| "learning_rate": 3.0237281873630335e-06, | |
| "loss": 0.6797860860824585, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.9600886917960088, | |
| "grad_norm": 0.7867917418479919, | |
| "learning_rate": 3.0155347946557407e-06, | |
| "loss": 0.8724421262741089, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9645232815964522, | |
| "grad_norm": 0.7262649536132812, | |
| "learning_rate": 3.007337829500075e-06, | |
| "loss": 1.0460387468338013, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9689578713968958, | |
| "grad_norm": 0.5823050737380981, | |
| "learning_rate": 2.999137402176255e-06, | |
| "loss": 0.9942973852157593, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9733924611973392, | |
| "grad_norm": 0.9097557067871094, | |
| "learning_rate": 2.9909336230110747e-06, | |
| "loss": 1.0488195419311523, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9778270509977827, | |
| "grad_norm": 0.9082350730895996, | |
| "learning_rate": 2.9827266023764274e-06, | |
| "loss": 1.0950303077697754, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9822616407982263, | |
| "grad_norm": 1.0125060081481934, | |
| "learning_rate": 2.9745164506878134e-06, | |
| "loss": 1.032450556755066, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9866962305986697, | |
| "grad_norm": 2.043154001235962, | |
| "learning_rate": 2.9663032784028596e-06, | |
| "loss": 0.8691053986549377, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9911308203991132, | |
| "grad_norm": 1.362161636352539, | |
| "learning_rate": 2.9580871960198297e-06, | |
| "loss": 1.0347143411636353, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9955654101995566, | |
| "grad_norm": 0.7828076481819153, | |
| "learning_rate": 2.949868314076142e-06, | |
| "loss": 0.7763835191726685, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5598412752151489, | |
| "learning_rate": 2.941646743146875e-06, | |
| "loss": 1.0524572134017944, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.0044345898004434, | |
| "grad_norm": 0.6863189339637756, | |
| "learning_rate": 2.9334225938432868e-06, | |
| "loss": 0.8581877946853638, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.008869179600887, | |
| "grad_norm": 0.7125548720359802, | |
| "learning_rate": 2.925195976811326e-06, | |
| "loss": 0.8792709708213806, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.0133037694013303, | |
| "grad_norm": 0.6354836821556091, | |
| "learning_rate": 2.9169670027301387e-06, | |
| "loss": 1.0257556438446045, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.0177383592017737, | |
| "grad_norm": 0.9187865853309631, | |
| "learning_rate": 2.9087357823105843e-06, | |
| "loss": 0.8405720591545105, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.022172949002217, | |
| "grad_norm": 1.2554885149002075, | |
| "learning_rate": 2.9005024262937427e-06, | |
| "loss": 0.5283271074295044, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.0266075388026605, | |
| "grad_norm": 1.8454920053482056, | |
| "learning_rate": 2.8922670454494247e-06, | |
| "loss": 0.6945717334747314, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.0310421286031044, | |
| "grad_norm": 1.0446429252624512, | |
| "learning_rate": 2.8840297505746843e-06, | |
| "loss": 0.5962017774581909, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.035476718403548, | |
| "grad_norm": 0.8583077192306519, | |
| "learning_rate": 2.8757906524923286e-06, | |
| "loss": 0.9059169888496399, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.0399113082039912, | |
| "grad_norm": 0.7749403715133667, | |
| "learning_rate": 2.867549862049419e-06, | |
| "loss": 0.6539976000785828, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.0443458980044347, | |
| "grad_norm": 0.7740585207939148, | |
| "learning_rate": 2.859307490115791e-06, | |
| "loss": 0.7305862307548523, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.048780487804878, | |
| "grad_norm": 0.5781393051147461, | |
| "learning_rate": 2.8510636475825533e-06, | |
| "loss": 0.24691088497638702, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.0532150776053215, | |
| "grad_norm": 0.9956364035606384, | |
| "learning_rate": 2.8428184453606027e-06, | |
| "loss": 0.8243362307548523, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.057649667405765, | |
| "grad_norm": 6.8051276206970215, | |
| "learning_rate": 2.8345719943791266e-06, | |
| "loss": 0.6835483312606812, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0620842572062084, | |
| "grad_norm": 0.4145214855670929, | |
| "learning_rate": 2.826324405584114e-06, | |
| "loss": 0.5555226802825928, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.066518847006652, | |
| "grad_norm": 15.418578147888184, | |
| "learning_rate": 2.818075789936863e-06, | |
| "loss": 0.6951206922531128, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.070953436807095, | |
| "grad_norm": 2.4657557010650635, | |
| "learning_rate": 2.8098262584124834e-06, | |
| "loss": 0.9028570652008057, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.0753880266075386, | |
| "grad_norm": 1.7383790016174316, | |
| "learning_rate": 2.801575921998411e-06, | |
| "loss": 0.8572009205818176, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.079822616407982, | |
| "grad_norm": 0.8971472382545471, | |
| "learning_rate": 2.7933248916929066e-06, | |
| "loss": 0.6242398023605347, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.084257206208426, | |
| "grad_norm": 0.8894023299217224, | |
| "learning_rate": 2.7850732785035705e-06, | |
| "loss": 0.5844857692718506, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0886917960088693, | |
| "grad_norm": 1.6542750597000122, | |
| "learning_rate": 2.7768211934458417e-06, | |
| "loss": 0.8088274002075195, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.0931263858093128, | |
| "grad_norm": 1.1526122093200684, | |
| "learning_rate": 2.768568747541509e-06, | |
| "loss": 0.48685911297798157, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.097560975609756, | |
| "grad_norm": 0.9583385586738586, | |
| "learning_rate": 2.7603160518172152e-06, | |
| "loss": 0.8856199979782104, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.1019955654101996, | |
| "grad_norm": 1.043729543685913, | |
| "learning_rate": 2.752063217302966e-06, | |
| "loss": 0.8684977293014526, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.106430155210643, | |
| "grad_norm": 1.6375235319137573, | |
| "learning_rate": 2.743810355030631e-06, | |
| "loss": 0.5918598175048828, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.1108647450110865, | |
| "grad_norm": 0.9976754188537598, | |
| "learning_rate": 2.735557576032458e-06, | |
| "loss": 0.7557308673858643, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.11529933481153, | |
| "grad_norm": 1.8317605257034302, | |
| "learning_rate": 2.727304991339569e-06, | |
| "loss": 0.7505866289138794, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.1197339246119733, | |
| "grad_norm": 0.8461515307426453, | |
| "learning_rate": 2.7190527119804762e-06, | |
| "loss": 0.8690387010574341, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.1241685144124167, | |
| "grad_norm": 1.2172333002090454, | |
| "learning_rate": 2.710800848979582e-06, | |
| "loss": 0.7549737691879272, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.12860310421286, | |
| "grad_norm": 2.6961116790771484, | |
| "learning_rate": 2.702549513355687e-06, | |
| "loss": 0.6273566484451294, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.1330376940133036, | |
| "grad_norm": 2.817370653152466, | |
| "learning_rate": 2.694298816120497e-06, | |
| "loss": 0.507022500038147, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.1374722838137474, | |
| "grad_norm": 3.4701592922210693, | |
| "learning_rate": 2.6860488682771306e-06, | |
| "loss": 0.9122434854507446, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.141906873614191, | |
| "grad_norm": 1.0491613149642944, | |
| "learning_rate": 2.67779978081862e-06, | |
| "loss": 0.8000502586364746, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.1463414634146343, | |
| "grad_norm": 0.8666864037513733, | |
| "learning_rate": 2.669551664726428e-06, | |
| "loss": 0.5371144413948059, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.1507760532150777, | |
| "grad_norm": 0.929885983467102, | |
| "learning_rate": 2.6613046309689433e-06, | |
| "loss": 0.7838866710662842, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.155210643015521, | |
| "grad_norm": 1.449052095413208, | |
| "learning_rate": 2.6530587904999966e-06, | |
| "loss": 0.6714781522750854, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.1596452328159645, | |
| "grad_norm": 1.6018487215042114, | |
| "learning_rate": 2.6448142542573624e-06, | |
| "loss": 0.8650703430175781, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.164079822616408, | |
| "grad_norm": 1.1995538473129272, | |
| "learning_rate": 2.6365711331612692e-06, | |
| "loss": 0.9092912077903748, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1685144124168514, | |
| "grad_norm": 4.522655010223389, | |
| "learning_rate": 2.6283295381129066e-06, | |
| "loss": 0.623193621635437, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.172949002217295, | |
| "grad_norm": 1.066388487815857, | |
| "learning_rate": 2.620089579992933e-06, | |
| "loss": 0.8715465664863586, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.1773835920177382, | |
| "grad_norm": 0.7982610464096069, | |
| "learning_rate": 2.6118513696599823e-06, | |
| "loss": 0.4557168781757355, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 2.2363100051879883, | |
| "learning_rate": 2.603615017949178e-06, | |
| "loss": 0.2402995377779007, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.186252771618625, | |
| "grad_norm": 1.7150230407714844, | |
| "learning_rate": 2.595380635670634e-06, | |
| "loss": 0.6125180721282959, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1906873614190685, | |
| "grad_norm": 2.03562331199646, | |
| "learning_rate": 2.5871483336079694e-06, | |
| "loss": 0.3789454698562622, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.1951219512195124, | |
| "grad_norm": 0.9788453578948975, | |
| "learning_rate": 2.578918222516818e-06, | |
| "loss": 0.8006269335746765, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.199556541019956, | |
| "grad_norm": 3.577479600906372, | |
| "learning_rate": 2.5706904131233336e-06, | |
| "loss": 1.1745080947875977, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.203991130820399, | |
| "grad_norm": 0.9117245078086853, | |
| "learning_rate": 2.5624650161227073e-06, | |
| "loss": 0.752780556678772, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.2084257206208426, | |
| "grad_norm": 1.0597925186157227, | |
| "learning_rate": 2.5542421421776696e-06, | |
| "loss": 0.92555171251297, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.212860310421286, | |
| "grad_norm": 2.012413740158081, | |
| "learning_rate": 2.5460219019170097e-06, | |
| "loss": 0.7486417293548584, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.2172949002217295, | |
| "grad_norm": 1.388223648071289, | |
| "learning_rate": 2.5378044059340845e-06, | |
| "loss": 0.6380313038825989, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.221729490022173, | |
| "grad_norm": 2.087331533432007, | |
| "learning_rate": 2.5295897647853283e-06, | |
| "loss": 0.42272722721099854, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.2261640798226163, | |
| "grad_norm": 1.6069846153259277, | |
| "learning_rate": 2.521378088988767e-06, | |
| "loss": 0.9431757926940918, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.2305986696230597, | |
| "grad_norm": 1.133226990699768, | |
| "learning_rate": 2.513169489022531e-06, | |
| "loss": 0.7965492010116577, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.235033259423503, | |
| "grad_norm": 2.120872735977173, | |
| "learning_rate": 2.5049640753233705e-06, | |
| "loss": 0.8802080750465393, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.2394678492239466, | |
| "grad_norm": 0.9329570531845093, | |
| "learning_rate": 2.496761958285167e-06, | |
| "loss": 0.5040686130523682, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2439024390243905, | |
| "grad_norm": 0.7371305227279663, | |
| "learning_rate": 2.488563248257451e-06, | |
| "loss": 0.8810028433799744, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.248337028824834, | |
| "grad_norm": 5.761690139770508, | |
| "learning_rate": 2.4803680555439136e-06, | |
| "loss": 0.7554865479469299, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.2527716186252773, | |
| "grad_norm": 1.0782852172851562, | |
| "learning_rate": 2.4721764904009272e-06, | |
| "loss": 0.8898205161094666, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.2572062084257207, | |
| "grad_norm": 1.2129498720169067, | |
| "learning_rate": 2.4639886630360574e-06, | |
| "loss": 0.40009406208992004, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.261640798226164, | |
| "grad_norm": 0.9777927994728088, | |
| "learning_rate": 2.455804683606584e-06, | |
| "loss": 0.8841888904571533, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.2660753880266076, | |
| "grad_norm": 0.3243866562843323, | |
| "learning_rate": 2.4476246622180174e-06, | |
| "loss": 0.44640085101127625, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.270509977827051, | |
| "grad_norm": 2.328533411026001, | |
| "learning_rate": 2.4394487089226158e-06, | |
| "loss": 1.2326172590255737, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.2749445676274944, | |
| "grad_norm": 0.8293269872665405, | |
| "learning_rate": 2.43127693371791e-06, | |
| "loss": 0.6043848395347595, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.279379157427938, | |
| "grad_norm": 1.0006300210952759, | |
| "learning_rate": 2.423109446545213e-06, | |
| "loss": 0.9160785675048828, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.2838137472283813, | |
| "grad_norm": 1.3495242595672607, | |
| "learning_rate": 2.4149463572881537e-06, | |
| "loss": 1.0066651105880737, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.2882483370288247, | |
| "grad_norm": 3.227001190185547, | |
| "learning_rate": 2.4067877757711907e-06, | |
| "loss": 0.6590787172317505, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.292682926829268, | |
| "grad_norm": 1.3093198537826538, | |
| "learning_rate": 2.3986338117581357e-06, | |
| "loss": 0.6363720893859863, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2971175166297115, | |
| "grad_norm": 8.721003532409668, | |
| "learning_rate": 2.390484574950677e-06, | |
| "loss": 1.0154914855957031, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.3015521064301554, | |
| "grad_norm": 2.5220930576324463, | |
| "learning_rate": 2.382340174986906e-06, | |
| "loss": 0.7293195724487305, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.305986696230599, | |
| "grad_norm": 1.6561472415924072, | |
| "learning_rate": 2.374200721439837e-06, | |
| "loss": 0.8729753494262695, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.3104212860310422, | |
| "grad_norm": 1.198063611984253, | |
| "learning_rate": 2.3660663238159405e-06, | |
| "loss": 0.8993839621543884, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.3148558758314857, | |
| "grad_norm": 0.6974464654922485, | |
| "learning_rate": 2.357937091553662e-06, | |
| "loss": 0.6564696431159973, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.319290465631929, | |
| "grad_norm": 2.7228505611419678, | |
| "learning_rate": 2.3498131340219554e-06, | |
| "loss": 0.8685024380683899, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.3237250554323725, | |
| "grad_norm": 1.095363974571228, | |
| "learning_rate": 2.341694560518809e-06, | |
| "loss": 0.8607962727546692, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.328159645232816, | |
| "grad_norm": 3.3042118549346924, | |
| "learning_rate": 2.333581480269776e-06, | |
| "loss": 0.8638893365859985, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.3325942350332594, | |
| "grad_norm": 2.431647539138794, | |
| "learning_rate": 2.325474002426503e-06, | |
| "loss": 0.7261286973953247, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.337028824833703, | |
| "grad_norm": 1.1630088090896606, | |
| "learning_rate": 2.3173722360652644e-06, | |
| "loss": 0.6455481052398682, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.341463414634146, | |
| "grad_norm": 0.9199745655059814, | |
| "learning_rate": 2.309276290185494e-06, | |
| "loss": 0.79603511095047, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.3458980044345896, | |
| "grad_norm": 0.4536246955394745, | |
| "learning_rate": 2.3011862737083162e-06, | |
| "loss": 0.508949875831604, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.3503325942350335, | |
| "grad_norm": 0.858386218547821, | |
| "learning_rate": 2.2931022954750843e-06, | |
| "loss": 0.9388585090637207, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.354767184035477, | |
| "grad_norm": 6.024104118347168, | |
| "learning_rate": 2.285024464245912e-06, | |
| "loss": 0.5697469115257263, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.3592017738359203, | |
| "grad_norm": 0.5417823195457458, | |
| "learning_rate": 2.2769528886982158e-06, | |
| "loss": 0.48910415172576904, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 3.6564247608184814, | |
| "learning_rate": 2.268887677425248e-06, | |
| "loss": 0.706069827079773, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.368070953436807, | |
| "grad_norm": 1.85207998752594, | |
| "learning_rate": 2.2608289389346362e-06, | |
| "loss": 0.9581363797187805, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.3725055432372506, | |
| "grad_norm": 1.0115586519241333, | |
| "learning_rate": 2.2527767816469263e-06, | |
| "loss": 0.847273051738739, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.376940133037694, | |
| "grad_norm": 1.160576343536377, | |
| "learning_rate": 2.244731313894121e-06, | |
| "loss": 0.9410176873207092, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.3813747228381374, | |
| "grad_norm": 0.41236647963523865, | |
| "learning_rate": 2.236692643918224e-06, | |
| "loss": 0.03157289698719978, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.385809312638581, | |
| "grad_norm": 1.3738679885864258, | |
| "learning_rate": 2.2286608798697834e-06, | |
| "loss": 0.652180016040802, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.3902439024390243, | |
| "grad_norm": 1.2516071796417236, | |
| "learning_rate": 2.2206361298064343e-06, | |
| "loss": 0.2624179422855377, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.3946784922394677, | |
| "grad_norm": 1.7923249006271362, | |
| "learning_rate": 2.2126185016914515e-06, | |
| "loss": 0.7395876049995422, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.399113082039911, | |
| "grad_norm": 0.4336431622505188, | |
| "learning_rate": 2.2046081033922884e-06, | |
| "loss": 0.5255239009857178, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.4035476718403546, | |
| "grad_norm": 2.713783025741577, | |
| "learning_rate": 2.1966050426791325e-06, | |
| "loss": 0.7811592817306519, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.4079822616407984, | |
| "grad_norm": 1.2868162393569946, | |
| "learning_rate": 2.1886094272234508e-06, | |
| "loss": 0.8133082985877991, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.412416851441242, | |
| "grad_norm": 0.6888259053230286, | |
| "learning_rate": 2.1806213645965457e-06, | |
| "loss": 0.2685484290122986, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.4168514412416853, | |
| "grad_norm": 3.5159847736358643, | |
| "learning_rate": 2.172640962268104e-06, | |
| "loss": 0.8924703598022461, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.4212860310421287, | |
| "grad_norm": 1.7184735536575317, | |
| "learning_rate": 2.1646683276047525e-06, | |
| "loss": 0.8745633959770203, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.425720620842572, | |
| "grad_norm": 2.238689661026001, | |
| "learning_rate": 2.156703567868615e-06, | |
| "loss": 0.40802738070487976, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.4301552106430155, | |
| "grad_norm": 3.6044087409973145, | |
| "learning_rate": 2.148746790215866e-06, | |
| "loss": 0.538159191608429, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.434589800443459, | |
| "grad_norm": 1.0334011316299438, | |
| "learning_rate": 2.140798101695291e-06, | |
| "loss": 0.5009183287620544, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "grad_norm": 3.447432518005371, | |
| "learning_rate": 2.1328576092468476e-06, | |
| "loss": 0.8873782157897949, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.443458980044346, | |
| "grad_norm": 19.567584991455078, | |
| "learning_rate": 2.124925419700223e-06, | |
| "loss": 0.9391869902610779, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.4478935698447892, | |
| "grad_norm": 1.3884862661361694, | |
| "learning_rate": 2.1170016397734e-06, | |
| "loss": 0.4363320469856262, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.4523281596452327, | |
| "grad_norm": 0.8786958456039429, | |
| "learning_rate": 2.109086376071221e-06, | |
| "loss": 0.8789792060852051, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.4567627494456765, | |
| "grad_norm": 1.4285457134246826, | |
| "learning_rate": 2.1011797350839513e-06, | |
| "loss": 0.7075852751731873, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.4611973392461195, | |
| "grad_norm": 1.5161508321762085, | |
| "learning_rate": 2.093281823185848e-06, | |
| "loss": 0.9112823009490967, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.4656319290465634, | |
| "grad_norm": 0.9512194991111755, | |
| "learning_rate": 2.0853927466337315e-06, | |
| "loss": 0.6104337573051453, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.470066518847007, | |
| "grad_norm": 2.540562391281128, | |
| "learning_rate": 2.077512611565551e-06, | |
| "loss": 0.8170069456100464, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.47450110864745, | |
| "grad_norm": 3.007708787918091, | |
| "learning_rate": 2.0696415239989593e-06, | |
| "loss": 0.3202068507671356, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.4789356984478936, | |
| "grad_norm": 1.8201653957366943, | |
| "learning_rate": 2.0617795898298855e-06, | |
| "loss": 0.8330751657485962, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.483370288248337, | |
| "grad_norm": 2.480975389480591, | |
| "learning_rate": 2.053926914831112e-06, | |
| "loss": 0.7699635028839111, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.4878048780487805, | |
| "grad_norm": 1.417551040649414, | |
| "learning_rate": 2.04608360465085e-06, | |
| "loss": 0.8326176404953003, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.492239467849224, | |
| "grad_norm": 3.1235289573669434, | |
| "learning_rate": 2.038249764811318e-06, | |
| "loss": 1.0917719602584839, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4966740576496673, | |
| "grad_norm": 1.4231172800064087, | |
| "learning_rate": 2.0304255007073227e-06, | |
| "loss": 0.924064040184021, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.5011086474501107, | |
| "grad_norm": 1.0138386487960815, | |
| "learning_rate": 2.022610917604842e-06, | |
| "loss": 0.5847758650779724, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.505543237250554, | |
| "grad_norm": 12.797927856445312, | |
| "learning_rate": 2.014806120639605e-06, | |
| "loss": 0.6378868818283081, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.5099778270509976, | |
| "grad_norm": 1.234104037284851, | |
| "learning_rate": 2.007011214815684e-06, | |
| "loss": 0.7710060477256775, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.5144124168514415, | |
| "grad_norm": 1.5984342098236084, | |
| "learning_rate": 1.9992263050040737e-06, | |
| "loss": 0.5080645680427551, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.5188470066518844, | |
| "grad_norm": 0.7567741274833679, | |
| "learning_rate": 1.991451495941289e-06, | |
| "loss": 0.9128345847129822, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.5232815964523283, | |
| "grad_norm": 0.9700871706008911, | |
| "learning_rate": 1.983686892227948e-06, | |
| "loss": 0.7003995776176453, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.5277161862527717, | |
| "grad_norm": 1.3000694513320923, | |
| "learning_rate": 1.975932598327369e-06, | |
| "loss": 0.8861632943153381, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.532150776053215, | |
| "grad_norm": 1.9337971210479736, | |
| "learning_rate": 1.9681887185641646e-06, | |
| "loss": 0.4743580222129822, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.5365853658536586, | |
| "grad_norm": 3.3465616703033447, | |
| "learning_rate": 1.9604553571228395e-06, | |
| "loss": 0.5982359647750854, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.541019955654102, | |
| "grad_norm": 0.9957235455513, | |
| "learning_rate": 1.9527326180463855e-06, | |
| "loss": 0.8827557563781738, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 0.8949941992759705, | |
| "learning_rate": 1.9450206052348823e-06, | |
| "loss": 0.9071930646896362, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.549889135254989, | |
| "grad_norm": 0.8225301504135132, | |
| "learning_rate": 1.9373194224441028e-06, | |
| "loss": 0.8257545232772827, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5543237250554323, | |
| "grad_norm": 0.8032196760177612, | |
| "learning_rate": 1.929629173284114e-06, | |
| "loss": 0.5126382112503052, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.5587583148558757, | |
| "grad_norm": 0.5240678191184998, | |
| "learning_rate": 1.9219499612178836e-06, | |
| "loss": 0.3800676763057709, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.5631929046563195, | |
| "grad_norm": 0.8303525447845459, | |
| "learning_rate": 1.9142818895598908e-06, | |
| "loss": 0.49963274598121643, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.5676274944567625, | |
| "grad_norm": 3.7417755126953125, | |
| "learning_rate": 1.9066250614747317e-06, | |
| "loss": 0.4743386507034302, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.5720620842572064, | |
| "grad_norm": 2.17212176322937, | |
| "learning_rate": 1.8989795799757348e-06, | |
| "loss": 0.7726760506629944, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.57649667405765, | |
| "grad_norm": 0.7126960158348083, | |
| "learning_rate": 1.8913455479235754e-06, | |
| "loss": 0.9239242076873779, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.5809312638580932, | |
| "grad_norm": 0.8875038027763367, | |
| "learning_rate": 1.8837230680248874e-06, | |
| "loss": 0.9501176476478577, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.5853658536585367, | |
| "grad_norm": 0.9675799012184143, | |
| "learning_rate": 1.8761122428308875e-06, | |
| "loss": 0.6145892143249512, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.58980044345898, | |
| "grad_norm": 0.7325379252433777, | |
| "learning_rate": 1.8685131747359902e-06, | |
| "loss": 0.8216499090194702, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.5942350332594235, | |
| "grad_norm": 5.725553035736084, | |
| "learning_rate": 1.8609259659764345e-06, | |
| "loss": 0.8852095007896423, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.598669623059867, | |
| "grad_norm": 3.2101731300354004, | |
| "learning_rate": 1.853350718628904e-06, | |
| "loss": 0.42097488045692444, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.6031042128603104, | |
| "grad_norm": 2.247046947479248, | |
| "learning_rate": 1.845787534609157e-06, | |
| "loss": 0.6113148331642151, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.6075388026607538, | |
| "grad_norm": 4.695516586303711, | |
| "learning_rate": 1.8382365156706566e-06, | |
| "loss": 0.6265615224838257, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.611973392461197, | |
| "grad_norm": 1.2384305000305176, | |
| "learning_rate": 1.8306977634031976e-06, | |
| "loss": 0.627465546131134, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.6164079822616406, | |
| "grad_norm": 9.685011863708496, | |
| "learning_rate": 1.8231713792315403e-06, | |
| "loss": 0.7403496503829956, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.6208425720620845, | |
| "grad_norm": 0.927742063999176, | |
| "learning_rate": 1.8156574644140495e-06, | |
| "loss": 0.85722416639328, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.6252771618625275, | |
| "grad_norm": 1.9018654823303223, | |
| "learning_rate": 1.8081561200413295e-06, | |
| "loss": 0.853569507598877, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.6297117516629713, | |
| "grad_norm": 0.8076664805412292, | |
| "learning_rate": 1.800667447034864e-06, | |
| "loss": 0.8907285332679749, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.6341463414634148, | |
| "grad_norm": 2.0217840671539307, | |
| "learning_rate": 1.7931915461456573e-06, | |
| "loss": 0.9535523653030396, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.638580931263858, | |
| "grad_norm": 0.8926487565040588, | |
| "learning_rate": 1.7857285179528838e-06, | |
| "loss": 0.489310622215271, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.6430155210643016, | |
| "grad_norm": 3.029690742492676, | |
| "learning_rate": 1.7782784628625305e-06, | |
| "loss": 0.6923867464065552, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.647450110864745, | |
| "grad_norm": 2.82142972946167, | |
| "learning_rate": 1.7708414811060437e-06, | |
| "loss": 0.582843542098999, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.6518847006651884, | |
| "grad_norm": 1.2220163345336914, | |
| "learning_rate": 1.763417672738989e-06, | |
| "loss": 0.7117786407470703, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.656319290465632, | |
| "grad_norm": 0.4822169244289398, | |
| "learning_rate": 1.7560071376396953e-06, | |
| "loss": 0.2628706693649292, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.6607538802660753, | |
| "grad_norm": 1.2988340854644775, | |
| "learning_rate": 1.7486099755079197e-06, | |
| "loss": 0.9527356624603271, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.6651884700665187, | |
| "grad_norm": 1.3238375186920166, | |
| "learning_rate": 1.7412262858634987e-06, | |
| "loss": 0.8897156119346619, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.6696230598669626, | |
| "grad_norm": 1.0240246057510376, | |
| "learning_rate": 1.7338561680450171e-06, | |
| "loss": 0.46362125873565674, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.6740576496674056, | |
| "grad_norm": 1.75541353225708, | |
| "learning_rate": 1.7264997212084616e-06, | |
| "loss": 0.7587183713912964, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.6784922394678494, | |
| "grad_norm": 1.5657193660736084, | |
| "learning_rate": 1.7191570443258976e-06, | |
| "loss": 0.9052755832672119, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.682926829268293, | |
| "grad_norm": 1.5885916948318481, | |
| "learning_rate": 1.711828236184131e-06, | |
| "loss": 0.4789600968360901, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.6873614190687363, | |
| "grad_norm": 71.83616638183594, | |
| "learning_rate": 1.704513395383378e-06, | |
| "loss": 0.2857840955257416, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.6917960088691797, | |
| "grad_norm": 1.3978385925292969, | |
| "learning_rate": 1.6972126203359454e-06, | |
| "loss": 0.5244172215461731, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.696230598669623, | |
| "grad_norm": 6.41267204284668, | |
| "learning_rate": 1.6899260092648995e-06, | |
| "loss": 0.5827531814575195, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.7006651884700665, | |
| "grad_norm": 3.540893316268921, | |
| "learning_rate": 1.6826536602027471e-06, | |
| "loss": 0.5931687355041504, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.70509977827051, | |
| "grad_norm": 1.408761978149414, | |
| "learning_rate": 1.6753956709901202e-06, | |
| "loss": 0.9201699495315552, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.7095343680709534, | |
| "grad_norm": 3.0229973793029785, | |
| "learning_rate": 1.6681521392744515e-06, | |
| "loss": 0.7630390524864197, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.713968957871397, | |
| "grad_norm": 3.136180877685547, | |
| "learning_rate": 1.660923162508671e-06, | |
| "loss": 0.8200241923332214, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.7184035476718402, | |
| "grad_norm": 5.015387535095215, | |
| "learning_rate": 1.6537088379498872e-06, | |
| "loss": 0.41038981080055237, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.7228381374722836, | |
| "grad_norm": 0.8245871067047119, | |
| "learning_rate": 1.6465092626580787e-06, | |
| "loss": 0.967170238494873, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 1.9440391063690186, | |
| "learning_rate": 1.6393245334947942e-06, | |
| "loss": 0.6494452357292175, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.7317073170731705, | |
| "grad_norm": 0.9693624973297119, | |
| "learning_rate": 1.6321547471218432e-06, | |
| "loss": 0.8679260611534119, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.7361419068736144, | |
| "grad_norm": 6.900158882141113, | |
| "learning_rate": 1.6250000000000007e-06, | |
| "loss": 0.5012823343276978, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.740576496674058, | |
| "grad_norm": 0.9100331664085388, | |
| "learning_rate": 1.6178603883877032e-06, | |
| "loss": 0.8535019755363464, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.745011086474501, | |
| "grad_norm": 0.8146524429321289, | |
| "learning_rate": 1.6107360083397604e-06, | |
| "loss": 0.5447841286659241, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.7494456762749446, | |
| "grad_norm": 0.8178804516792297, | |
| "learning_rate": 1.6036269557060594e-06, | |
| "loss": 0.5961492657661438, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.753880266075388, | |
| "grad_norm": 1.5546802282333374, | |
| "learning_rate": 1.5965333261302735e-06, | |
| "loss": 0.8809974193572998, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.7583148558758315, | |
| "grad_norm": 2.939823865890503, | |
| "learning_rate": 1.5894552150485801e-06, | |
| "loss": 0.9211047291755676, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.762749445676275, | |
| "grad_norm": 0.6600274443626404, | |
| "learning_rate": 1.5823927176883725e-06, | |
| "loss": 0.3387180268764496, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.7671840354767183, | |
| "grad_norm": 10.310997009277344, | |
| "learning_rate": 1.5753459290669792e-06, | |
| "loss": 0.6843352317810059, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.7716186252771617, | |
| "grad_norm": 1.5545812845230103, | |
| "learning_rate": 1.5683149439903905e-06, | |
| "loss": 0.5795391201972961, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.776053215077605, | |
| "grad_norm": 0.9581854939460754, | |
| "learning_rate": 1.5612998570519746e-06, | |
| "loss": 0.9656073451042175, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.7804878048780486, | |
| "grad_norm": 1.066094160079956, | |
| "learning_rate": 1.5543007626312129e-06, | |
| "loss": 0.9749020338058472, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.7849223946784925, | |
| "grad_norm": 2.5097603797912598, | |
| "learning_rate": 1.5473177548924267e-06, | |
| "loss": 0.9311152696609497, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.7893569844789354, | |
| "grad_norm": 1.2054038047790527, | |
| "learning_rate": 1.5403509277835077e-06, | |
| "loss": 0.2651883065700531, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.7937915742793793, | |
| "grad_norm": 1.640947699546814, | |
| "learning_rate": 1.5334003750346608e-06, | |
| "loss": 0.873029887676239, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7982261640798227, | |
| "grad_norm": 3.502584218978882, | |
| "learning_rate": 1.5264661901571349e-06, | |
| "loss": 0.6039642691612244, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.802660753880266, | |
| "grad_norm": 0.6926102042198181, | |
| "learning_rate": 1.5195484664419732e-06, | |
| "loss": 0.485037237405777, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.8070953436807096, | |
| "grad_norm": 1.0856491327285767, | |
| "learning_rate": 1.5126472969587502e-06, | |
| "loss": 0.810798704624176, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.811529933481153, | |
| "grad_norm": 5.212289810180664, | |
| "learning_rate": 1.5057627745543269e-06, | |
| "loss": 0.8525525331497192, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.8159645232815964, | |
| "grad_norm": 0.6678111553192139, | |
| "learning_rate": 1.4988949918515947e-06, | |
| "loss": 0.8990007638931274, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.82039911308204, | |
| "grad_norm": 6.717563152313232, | |
| "learning_rate": 1.4920440412482345e-06, | |
| "loss": 0.3461105525493622, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.8248337028824833, | |
| "grad_norm": 0.8497708439826965, | |
| "learning_rate": 1.485210014915473e-06, | |
| "loss": 0.5556339025497437, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.8292682926829267, | |
| "grad_norm": 1.1711769104003906, | |
| "learning_rate": 1.4783930047968388e-06, | |
| "loss": 0.9120653867721558, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.8337028824833705, | |
| "grad_norm": 1.5863559246063232, | |
| "learning_rate": 1.4715931026069273e-06, | |
| "loss": 0.8935397267341614, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.8381374722838135, | |
| "grad_norm": 1.5529717206954956, | |
| "learning_rate": 1.4648103998301716e-06, | |
| "loss": 0.5035147070884705, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.8425720620842574, | |
| "grad_norm": 5.072021484375, | |
| "learning_rate": 1.4580449877196035e-06, | |
| "loss": 0.6653071045875549, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.847006651884701, | |
| "grad_norm": 0.8926497101783752, | |
| "learning_rate": 1.4512969572956328e-06, | |
| "loss": 0.6203804612159729, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.8514412416851442, | |
| "grad_norm": 4.661787986755371, | |
| "learning_rate": 1.4445663993448173e-06, | |
| "loss": 0.9532814621925354, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.8558758314855877, | |
| "grad_norm": 3.233835458755493, | |
| "learning_rate": 1.437853404418646e-06, | |
| "loss": 0.32333725690841675, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.860310421286031, | |
| "grad_norm": 1.3348687887191772, | |
| "learning_rate": 1.431158062832318e-06, | |
| "loss": 0.8358311653137207, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.8647450110864745, | |
| "grad_norm": 2.6443517208099365, | |
| "learning_rate": 1.4244804646635266e-06, | |
| "loss": 0.8612353205680847, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.869179600886918, | |
| "grad_norm": 0.6927613615989685, | |
| "learning_rate": 1.4178206997512522e-06, | |
| "loss": 0.8790969252586365, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.8736141906873613, | |
| "grad_norm": 0.8676153421401978, | |
| "learning_rate": 1.4111788576945467e-06, | |
| "loss": 0.8564894795417786, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.8780487804878048, | |
| "grad_norm": 14.382169723510742, | |
| "learning_rate": 1.4045550278513351e-06, | |
| "loss": 0.5821776986122131, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.882483370288248, | |
| "grad_norm": 0.7985261678695679, | |
| "learning_rate": 1.3979492993372074e-06, | |
| "loss": 0.8649250268936157, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.8869179600886916, | |
| "grad_norm": 1.3638763427734375, | |
| "learning_rate": 1.391361761024222e-06, | |
| "loss": 0.23123106360435486, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.8913525498891355, | |
| "grad_norm": 1.07351815700531, | |
| "learning_rate": 1.3847925015397146e-06, | |
| "loss": 0.8127425909042358, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.8957871396895785, | |
| "grad_norm": 1.6091636419296265, | |
| "learning_rate": 1.3782416092650957e-06, | |
| "loss": 0.9045838117599487, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.9002217294900223, | |
| "grad_norm": 0.37830138206481934, | |
| "learning_rate": 1.3717091723346699e-06, | |
| "loss": 0.36935049295425415, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.9046563192904657, | |
| "grad_norm": 0.7938516736030579, | |
| "learning_rate": 1.3651952786344485e-06, | |
| "loss": 0.44842761754989624, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 1.4822460412979126, | |
| "learning_rate": 1.3587000158009638e-06, | |
| "loss": 0.8571957349777222, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.9135254988913526, | |
| "grad_norm": 0.6906547546386719, | |
| "learning_rate": 1.3522234712200954e-06, | |
| "loss": 0.8675597906112671, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.917960088691796, | |
| "grad_norm": 2.601785898208618, | |
| "learning_rate": 1.3457657320258878e-06, | |
| "loss": 0.7463323473930359, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.9223946784922394, | |
| "grad_norm": 2.53776216506958, | |
| "learning_rate": 1.3393268850993852e-06, | |
| "loss": 0.8853073716163635, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.926829268292683, | |
| "grad_norm": 2.7623400688171387, | |
| "learning_rate": 1.332907017067458e-06, | |
| "loss": 0.5930368304252625, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.9312638580931263, | |
| "grad_norm": 1.027145504951477, | |
| "learning_rate": 1.3265062143016378e-06, | |
| "loss": 0.8478403687477112, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.9356984478935697, | |
| "grad_norm": 0.7706930041313171, | |
| "learning_rate": 1.3201245629169574e-06, | |
| "loss": 0.8749006390571594, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.9401330376940136, | |
| "grad_norm": 1.0428484678268433, | |
| "learning_rate": 1.3137621487707902e-06, | |
| "loss": 0.9431027173995972, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.9445676274944566, | |
| "grad_norm": 1.0761144161224365, | |
| "learning_rate": 1.307419057461697e-06, | |
| "loss": 1.0074454545974731, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.9490022172949004, | |
| "grad_norm": 0.6524636149406433, | |
| "learning_rate": 1.3010953743282724e-06, | |
| "loss": 0.844873309135437, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.953436807095344, | |
| "grad_norm": 1.394280195236206, | |
| "learning_rate": 1.294791184447996e-06, | |
| "loss": 0.9639885425567627, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.9578713968957873, | |
| "grad_norm": 0.7140282988548279, | |
| "learning_rate": 1.2885065726360925e-06, | |
| "loss": 0.48626944422721863, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.9623059866962307, | |
| "grad_norm": 0.707069456577301, | |
| "learning_rate": 1.282241623444386e-06, | |
| "loss": 0.8297737836837769, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.966740576496674, | |
| "grad_norm": 0.9522484540939331, | |
| "learning_rate": 1.2759964211601633e-06, | |
| "loss": 0.8412789106369019, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.9711751662971175, | |
| "grad_norm": 0.9063006043434143, | |
| "learning_rate": 1.269771049805042e-06, | |
| "loss": 0.7291353940963745, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.975609756097561, | |
| "grad_norm": 2.06269907951355, | |
| "learning_rate": 1.2635655931338364e-06, | |
| "loss": 0.5531010627746582, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.9800443458980044, | |
| "grad_norm": 0.36816778779029846, | |
| "learning_rate": 1.2573801346334355e-06, | |
| "loss": 0.16619227826595306, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.984478935698448, | |
| "grad_norm": 0.8877193331718445, | |
| "learning_rate": 1.251214757521675e-06, | |
| "loss": 0.6579598784446716, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.988913525498891, | |
| "grad_norm": 0.42023998498916626, | |
| "learning_rate": 1.2450695447462214e-06, | |
| "loss": 0.5335787534713745, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.9933481152993346, | |
| "grad_norm": 1.5073217153549194, | |
| "learning_rate": 1.2389445789834534e-06, | |
| "loss": 0.5976958870887756, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.9977827050997785, | |
| "grad_norm": 2.1122334003448486, | |
| "learning_rate": 1.2328399426373511e-06, | |
| "loss": 0.6611562967300415, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 3.002217294900222, | |
| "grad_norm": 0.7240795493125916, | |
| "learning_rate": 1.2267557178383886e-06, | |
| "loss": 0.783865749835968, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 3.0066518847006654, | |
| "grad_norm": 0.8436267375946045, | |
| "learning_rate": 1.220691986442424e-06, | |
| "loss": 0.394011527299881, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 3.011086474501109, | |
| "grad_norm": 2.886348009109497, | |
| "learning_rate": 1.2146488300296047e-06, | |
| "loss": 0.6734086871147156, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 3.015521064301552, | |
| "grad_norm": 0.9459258913993835, | |
| "learning_rate": 1.2086263299032652e-06, | |
| "loss": 0.7187186479568481, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.0199556541019956, | |
| "grad_norm": 0.9276299476623535, | |
| "learning_rate": 1.2026245670888343e-06, | |
| "loss": 0.6644557118415833, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 3.024390243902439, | |
| "grad_norm": 1.1041805744171143, | |
| "learning_rate": 1.196643622332747e-06, | |
| "loss": 0.78998863697052, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 3.0288248337028825, | |
| "grad_norm": 0.25377586483955383, | |
| "learning_rate": 1.1906835761013547e-06, | |
| "loss": 0.3418872356414795, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 3.033259423503326, | |
| "grad_norm": 1.3526285886764526, | |
| "learning_rate": 1.184744508579846e-06, | |
| "loss": 0.423952579498291, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 3.0376940133037693, | |
| "grad_norm": 1.1649706363677979, | |
| "learning_rate": 1.178826499671167e-06, | |
| "loss": 0.4472143352031708, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.0421286031042127, | |
| "grad_norm": 1.4715780019760132, | |
| "learning_rate": 1.172929628994943e-06, | |
| "loss": 0.8062811493873596, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 3.046563192904656, | |
| "grad_norm": 1.3881590366363525, | |
| "learning_rate": 1.167053975886413e-06, | |
| "loss": 0.3142853379249573, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 3.0509977827050996, | |
| "grad_norm": 3.416389226913452, | |
| "learning_rate": 1.1611996193953569e-06, | |
| "loss": 0.43006041646003723, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 3.0554323725055434, | |
| "grad_norm": 1.048660159111023, | |
| "learning_rate": 1.1553666382850366e-06, | |
| "loss": 0.3392511308193207, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 3.059866962305987, | |
| "grad_norm": 0.3364250659942627, | |
| "learning_rate": 1.1495551110311324e-06, | |
| "loss": 0.2998298704624176, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.0643015521064303, | |
| "grad_norm": 6.17432165145874, | |
| "learning_rate": 1.1437651158206904e-06, | |
| "loss": 0.17229698598384857, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 3.0687361419068737, | |
| "grad_norm": 1.7204508781433105, | |
| "learning_rate": 1.137996730551069e-06, | |
| "loss": 0.4303905963897705, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 3.073170731707317, | |
| "grad_norm": 0.39900439977645874, | |
| "learning_rate": 1.1322500328288897e-06, | |
| "loss": 0.2888520658016205, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 3.0776053215077606, | |
| "grad_norm": 0.15358883142471313, | |
| "learning_rate": 1.1265250999689966e-06, | |
| "loss": 0.32026374340057373, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 3.082039911308204, | |
| "grad_norm": 0.947392463684082, | |
| "learning_rate": 1.1208220089934118e-06, | |
| "loss": 0.6729350090026855, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.0864745011086474, | |
| "grad_norm": 3.0764870643615723, | |
| "learning_rate": 1.1151408366303024e-06, | |
| "loss": 0.325135737657547, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 3.090909090909091, | |
| "grad_norm": 4.003721237182617, | |
| "learning_rate": 1.1094816593129475e-06, | |
| "loss": 0.4179095923900604, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 3.0953436807095343, | |
| "grad_norm": 1.155935287475586, | |
| "learning_rate": 1.1038445531787083e-06, | |
| "loss": 0.715002715587616, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 3.0997782705099777, | |
| "grad_norm": 1.3893831968307495, | |
| "learning_rate": 1.098229594068007e-06, | |
| "loss": 0.6728289127349854, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 3.104212860310421, | |
| "grad_norm": 1.6952826976776123, | |
| "learning_rate": 1.0926368575233032e-06, | |
| "loss": 0.6240461468696594, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.1086474501108645, | |
| "grad_norm": 0.4904409945011139, | |
| "learning_rate": 1.087066418788078e-06, | |
| "loss": 0.19736936688423157, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 3.1130820399113084, | |
| "grad_norm": 1.5871460437774658, | |
| "learning_rate": 1.0815183528058248e-06, | |
| "loss": 0.38163045048713684, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 3.117516629711752, | |
| "grad_norm": 1.395402193069458, | |
| "learning_rate": 1.0759927342190362e-06, | |
| "loss": 0.7312334179878235, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 3.1219512195121952, | |
| "grad_norm": 1.0713874101638794, | |
| "learning_rate": 1.0704896373682052e-06, | |
| "loss": 0.7383747696876526, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 3.1263858093126387, | |
| "grad_norm": 1.2227498292922974, | |
| "learning_rate": 1.0650091362908189e-06, | |
| "loss": 0.6887333393096924, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.130820399113082, | |
| "grad_norm": 1.1990351676940918, | |
| "learning_rate": 1.0595513047203693e-06, | |
| "loss": 0.6441534757614136, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 3.1352549889135255, | |
| "grad_norm": 1.021954894065857, | |
| "learning_rate": 1.0541162160853538e-06, | |
| "loss": 0.4102858901023865, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 3.139689578713969, | |
| "grad_norm": 0.33726537227630615, | |
| "learning_rate": 1.0487039435082941e-06, | |
| "loss": 0.056999024003744125, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 3.1441241685144123, | |
| "grad_norm": 1.8328347206115723, | |
| "learning_rate": 1.0433145598047495e-06, | |
| "loss": 0.6035170555114746, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 3.1485587583148558, | |
| "grad_norm": 1.524192214012146, | |
| "learning_rate": 1.0379481374823358e-06, | |
| "loss": 0.7395703196525574, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.152993348115299, | |
| "grad_norm": 1.5557146072387695, | |
| "learning_rate": 1.032604748739751e-06, | |
| "loss": 0.6392835974693298, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.1574279379157426, | |
| "grad_norm": 8.86196231842041, | |
| "learning_rate": 1.0272844654658069e-06, | |
| "loss": 0.1004699245095253, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.1618625277161865, | |
| "grad_norm": 1.7001556158065796, | |
| "learning_rate": 1.0219873592384556e-06, | |
| "loss": 0.6165364384651184, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.16629711751663, | |
| "grad_norm": 2.5815086364746094, | |
| "learning_rate": 1.016713501323834e-06, | |
| "loss": 0.7140083312988281, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.1707317073170733, | |
| "grad_norm": 2.0112009048461914, | |
| "learning_rate": 1.0114629626752973e-06, | |
| "loss": 0.3635685443878174, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.1751662971175167, | |
| "grad_norm": 0.9459992051124573, | |
| "learning_rate": 1.0062358139324715e-06, | |
| "loss": 0.34838780760765076, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.17960088691796, | |
| "grad_norm": 1.4729267358779907, | |
| "learning_rate": 1.0010321254202992e-06, | |
| "loss": 0.5929923057556152, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.1840354767184036, | |
| "grad_norm": 2.3258347511291504, | |
| "learning_rate": 9.958519671480919e-07, | |
| "loss": 0.2461414337158203, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.188470066518847, | |
| "grad_norm": 0.8266603946685791, | |
| "learning_rate": 9.906954088085929e-07, | |
| "loss": 0.421371191740036, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.1929046563192904, | |
| "grad_norm": 4.340923309326172, | |
| "learning_rate": 9.85562519777035e-07, | |
| "loss": 0.49519577622413635, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.197339246119734, | |
| "grad_norm": 1.8006068468093872, | |
| "learning_rate": 9.804533691102112e-07, | |
| "loss": 0.6714183688163757, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.2017738359201773, | |
| "grad_norm": 1.2195959091186523, | |
| "learning_rate": 9.75368025545542e-07, | |
| "loss": 0.6853227615356445, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.2062084257206207, | |
| "grad_norm": 1.3917747735977173, | |
| "learning_rate": 9.703065575001518e-07, | |
| "loss": 0.7487331628799438, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.210643015521064, | |
| "grad_norm": 7.102336883544922, | |
| "learning_rate": 9.65269033069952e-07, | |
| "loss": 0.45779871940612793, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.2150776053215075, | |
| "grad_norm": 1.3083984851837158, | |
| "learning_rate": 9.602555200287184e-07, | |
| "loss": 0.7010431289672852, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.2195121951219514, | |
| "grad_norm": 5.842024803161621, | |
| "learning_rate": 9.552660858271835e-07, | |
| "loss": 0.4435151517391205, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.223946784922395, | |
| "grad_norm": 0.8788203001022339, | |
| "learning_rate": 9.503007975921294e-07, | |
| "loss": 0.45844289660453796, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.2283813747228383, | |
| "grad_norm": 3.6687796115875244, | |
| "learning_rate": 9.453597221254821e-07, | |
| "loss": 0.8120266795158386, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.2328159645232817, | |
| "grad_norm": 2.3230605125427246, | |
| "learning_rate": 9.404429259034156e-07, | |
| "loss": 0.39245831966400146, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.237250554323725, | |
| "grad_norm": 1.0407835245132446, | |
| "learning_rate": 9.355504750754543e-07, | |
| "loss": 0.7990567684173584, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.2416851441241685, | |
| "grad_norm": 1.1758776903152466, | |
| "learning_rate": 9.306824354635866e-07, | |
| "loss": 0.4867308437824249, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.246119733924612, | |
| "grad_norm": 1.7548372745513916, | |
| "learning_rate": 9.258388725613776e-07, | |
| "loss": 0.5504351258277893, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.2505543237250554, | |
| "grad_norm": 1.3209401369094849, | |
| "learning_rate": 9.21019851533086e-07, | |
| "loss": 0.7579896450042725, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.254988913525499, | |
| "grad_norm": 0.2684461176395416, | |
| "learning_rate": 9.162254372127921e-07, | |
| "loss": 0.07358714938163757, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.259423503325942, | |
| "grad_norm": 2.430288791656494, | |
| "learning_rate": 9.114556941035199e-07, | |
| "loss": 0.6224230527877808, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.2638580931263856, | |
| "grad_norm": 1.1718429327011108, | |
| "learning_rate": 9.067106863763752e-07, | |
| "loss": 0.9210297465324402, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.2682926829268295, | |
| "grad_norm": 1.4180784225463867, | |
| "learning_rate": 9.01990477869677e-07, | |
| "loss": 0.30042320489883423, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.2727272727272725, | |
| "grad_norm": 1.123668909072876, | |
| "learning_rate": 8.972951320881014e-07, | |
| "loss": 0.5489499568939209, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.2771618625277164, | |
| "grad_norm": 0.8793950080871582, | |
| "learning_rate": 8.92624712201827e-07, | |
| "loss": 0.3639003336429596, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.2815964523281598, | |
| "grad_norm": 1.9588091373443604, | |
| "learning_rate": 8.879792810456861e-07, | |
| "loss": 0.4639153778553009, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.286031042128603, | |
| "grad_norm": 1.0029566287994385, | |
| "learning_rate": 8.833589011183147e-07, | |
| "loss": 0.7829899191856384, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.2904656319290466, | |
| "grad_norm": 0.9496995210647583, | |
| "learning_rate": 8.78763634581318e-07, | |
| "loss": 0.5258970260620117, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.29490022172949, | |
| "grad_norm": 0.913833737373352, | |
| "learning_rate": 8.741935432584292e-07, | |
| "loss": 0.47597554326057434, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.2993348115299335, | |
| "grad_norm": 2.7132110595703125, | |
| "learning_rate": 8.696486886346805e-07, | |
| "loss": 0.43635129928588867, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.303769401330377, | |
| "grad_norm": 2.0939266681671143, | |
| "learning_rate": 8.651291318555745e-07, | |
| "loss": 0.23065295815467834, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.3082039911308203, | |
| "grad_norm": 0.9052222967147827, | |
| "learning_rate": 8.606349337262623e-07, | |
| "loss": 0.8228150606155396, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.3126385809312637, | |
| "grad_norm": 1.733860731124878, | |
| "learning_rate": 8.561661547107243e-07, | |
| "loss": 0.47642073035240173, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.317073170731707, | |
| "grad_norm": 1.4319210052490234, | |
| "learning_rate": 8.517228549309588e-07, | |
| "loss": 0.6294840574264526, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.3215077605321506, | |
| "grad_norm": 3.192908525466919, | |
| "learning_rate": 8.473050941661717e-07, | |
| "loss": 0.767174482345581, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.3259423503325944, | |
| "grad_norm": 1.09794020652771, | |
| "learning_rate": 8.429129318519711e-07, | |
| "loss": 0.3739165961742401, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.330376940133038, | |
| "grad_norm": 1.1555298566818237, | |
| "learning_rate": 8.38546427079571e-07, | |
| "loss": 0.6726783514022827, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.3348115299334813, | |
| "grad_norm": 0.9871981739997864, | |
| "learning_rate": 8.342056385949929e-07, | |
| "loss": 0.7142524123191833, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.3392461197339247, | |
| "grad_norm": 0.46822428703308105, | |
| "learning_rate": 8.298906247982768e-07, | |
| "loss": 0.3226020932197571, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.343680709534368, | |
| "grad_norm": 1.3958603143692017, | |
| "learning_rate": 8.25601443742697e-07, | |
| "loss": 0.48669373989105225, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.3481152993348116, | |
| "grad_norm": 2.5492188930511475, | |
| "learning_rate": 8.213381531339776e-07, | |
| "loss": 0.732366681098938, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.352549889135255, | |
| "grad_norm": 0.35939931869506836, | |
| "learning_rate": 8.1710081032952e-07, | |
| "loss": 0.46142342686653137, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.3569844789356984, | |
| "grad_norm": 1.1625959873199463, | |
| "learning_rate": 8.128894723376285e-07, | |
| "loss": 0.8541035056114197, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.361419068736142, | |
| "grad_norm": 3.446476697921753, | |
| "learning_rate": 8.087041958167438e-07, | |
| "loss": 0.47667139768600464, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.3658536585365852, | |
| "grad_norm": 0.9094996452331543, | |
| "learning_rate": 8.04545037074683e-07, | |
| "loss": 0.5068199634552002, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.3702882483370287, | |
| "grad_norm": 0.8286429047584534, | |
| "learning_rate": 8.004120520678768e-07, | |
| "loss": 0.703849732875824, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.374722838137472, | |
| "grad_norm": 0.2944241166114807, | |
| "learning_rate": 7.963052964006243e-07, | |
| "loss": 0.4450077414512634, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.3791574279379155, | |
| "grad_norm": 2.379061698913574, | |
| "learning_rate": 7.922248253243367e-07, | |
| "loss": 0.6920251250267029, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.3835920177383594, | |
| "grad_norm": 6.14056921005249, | |
| "learning_rate": 7.881706937368005e-07, | |
| "loss": 0.6898224353790283, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.388026607538803, | |
| "grad_norm": 1.5987882614135742, | |
| "learning_rate": 7.84142956181436e-07, | |
| "loss": 0.45382753014564514, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.3924611973392462, | |
| "grad_norm": 0.6767929196357727, | |
| "learning_rate": 7.801416668465621e-07, | |
| "loss": 0.3358471691608429, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.3968957871396896, | |
| "grad_norm": 5.22633695602417, | |
| "learning_rate": 7.76166879564672e-07, | |
| "loss": 0.14293606579303741, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.401330376940133, | |
| "grad_norm": 1.0749104022979736, | |
| "learning_rate": 7.722186478117031e-07, | |
| "loss": 0.6599565148353577, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.4057649667405765, | |
| "grad_norm": 1.8686710596084595, | |
| "learning_rate": 7.682970247063212e-07, | |
| "loss": 0.7604563236236572, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.41019955654102, | |
| "grad_norm": 1.1680110692977905, | |
| "learning_rate": 7.644020630092066e-07, | |
| "loss": 0.7063665986061096, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.4146341463414633, | |
| "grad_norm": 1.0432260036468506, | |
| "learning_rate": 7.605338151223401e-07, | |
| "loss": 0.727014422416687, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.4190687361419068, | |
| "grad_norm": 1.5552973747253418, | |
| "learning_rate": 7.566923330883029e-07, | |
| "loss": 0.4836811125278473, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.42350332594235, | |
| "grad_norm": 1.57466459274292, | |
| "learning_rate": 7.528776685895731e-07, | |
| "loss": 0.5649837255477905, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.4279379157427936, | |
| "grad_norm": 4.2533650398254395, | |
| "learning_rate": 7.490898729478312e-07, | |
| "loss": 0.23879516124725342, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.4323725055432375, | |
| "grad_norm": 0.1743544042110443, | |
| "learning_rate": 7.45328997123271e-07, | |
| "loss": 0.1827031672000885, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.436807095343681, | |
| "grad_norm": 0.9885973930358887, | |
| "learning_rate": 7.415950917139106e-07, | |
| "loss": 0.7187482118606567, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.4412416851441243, | |
| "grad_norm": 1.0436469316482544, | |
| "learning_rate": 7.378882069549166e-07, | |
| "loss": 0.6883783936500549, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.4456762749445677, | |
| "grad_norm": 0.17162276804447174, | |
| "learning_rate": 7.342083927179235e-07, | |
| "loss": 0.3528411388397217, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.450110864745011, | |
| "grad_norm": 0.9677134156227112, | |
| "learning_rate": 7.30555698510366e-07, | |
| "loss": 0.7020009756088257, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.4545454545454546, | |
| "grad_norm": 2.290982961654663, | |
| "learning_rate": 7.269301734748107e-07, | |
| "loss": 0.6964924931526184, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.458980044345898, | |
| "grad_norm": 0.9703812003135681, | |
| "learning_rate": 7.233318663882968e-07, | |
| "loss": 0.7445743083953857, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.4634146341463414, | |
| "grad_norm": 2.6579227447509766, | |
| "learning_rate": 7.197608256616792e-07, | |
| "loss": 0.19093935191631317, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.467849223946785, | |
| "grad_norm": 1.445381999015808, | |
| "learning_rate": 7.162170993389763e-07, | |
| "loss": 0.7774098515510559, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.4722838137472283, | |
| "grad_norm": 1.2807323932647705, | |
| "learning_rate": 7.127007350967241e-07, | |
| "loss": 0.3930266201496124, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.4767184035476717, | |
| "grad_norm": 1.6947154998779297, | |
| "learning_rate": 7.092117802433362e-07, | |
| "loss": 0.8776201009750366, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.481152993348115, | |
| "grad_norm": 1.315631628036499, | |
| "learning_rate": 7.057502817184648e-07, | |
| "loss": 0.4978081285953522, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.4855875831485585, | |
| "grad_norm": 0.36691853404045105, | |
| "learning_rate": 7.023162860923722e-07, | |
| "loss": 0.5448426604270935, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.4900221729490024, | |
| "grad_norm": 1.9715549945831299, | |
| "learning_rate": 6.989098395653005e-07, | |
| "loss": 0.8805798292160034, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.494456762749446, | |
| "grad_norm": 2.96600341796875, | |
| "learning_rate": 6.955309879668537e-07, | |
| "loss": 0.3866577744483948, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.4988913525498893, | |
| "grad_norm": 0.8945562839508057, | |
| "learning_rate": 6.921797767553794e-07, | |
| "loss": 0.413793683052063, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.5033259423503327, | |
| "grad_norm": 1.501956582069397, | |
| "learning_rate": 6.88856251017356e-07, | |
| "loss": 0.6843544244766235, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.507760532150776, | |
| "grad_norm": 3.895604372024536, | |
| "learning_rate": 6.855604554667897e-07, | |
| "loss": 0.8031338453292847, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.5121951219512195, | |
| "grad_norm": 1.0929995775222778, | |
| "learning_rate": 6.822924344446081e-07, | |
| "loss": 0.46741926670074463, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.516629711751663, | |
| "grad_norm": 1.4208186864852905, | |
| "learning_rate": 6.790522319180687e-07, | |
| "loss": 0.4863869249820709, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.5210643015521064, | |
| "grad_norm": 1.2825591564178467, | |
| "learning_rate": 6.758398914801628e-07, | |
| "loss": 0.8202866315841675, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.52549889135255, | |
| "grad_norm": 2.9333906173706055, | |
| "learning_rate": 6.726554563490321e-07, | |
| "loss": 0.4186065196990967, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.529933481152993, | |
| "grad_norm": 4.726576805114746, | |
| "learning_rate": 6.694989693673872e-07, | |
| "loss": 0.4944823384284973, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.5343680709534366, | |
| "grad_norm": 1.305874228477478, | |
| "learning_rate": 6.663704730019285e-07, | |
| "loss": 0.8017009496688843, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.5388026607538805, | |
| "grad_norm": 4.202098846435547, | |
| "learning_rate": 6.632700093427774e-07, | |
| "loss": 0.23011818528175354, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.5432372505543235, | |
| "grad_norm": 2.405488967895508, | |
| "learning_rate": 6.601976201029095e-07, | |
| "loss": 0.44181007146835327, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.5476718403547673, | |
| "grad_norm": 1.209696888923645, | |
| "learning_rate": 6.571533466175928e-07, | |
| "loss": 0.5328426957130432, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.5521064301552108, | |
| "grad_norm": 0.9200423359870911, | |
| "learning_rate": 6.541372298438325e-07, | |
| "loss": 0.7708749175071716, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.556541019955654, | |
| "grad_norm": 0.8987447023391724, | |
| "learning_rate": 6.511493103598184e-07, | |
| "loss": 0.8243938088417053, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.5609756097560976, | |
| "grad_norm": 1.0883064270019531, | |
| "learning_rate": 6.481896283643808e-07, | |
| "loss": 0.7721865177154541, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.565410199556541, | |
| "grad_norm": 0.14681853353977203, | |
| "learning_rate": 6.452582236764495e-07, | |
| "loss": 0.08870165795087814, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.5698447893569845, | |
| "grad_norm": 7.72481632232666, | |
| "learning_rate": 6.423551357345154e-07, | |
| "loss": 0.5422983169555664, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.574279379157428, | |
| "grad_norm": 6.495584487915039, | |
| "learning_rate": 6.394804035961038e-07, | |
| "loss": 0.14715789258480072, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.5787139689578713, | |
| "grad_norm": 2.7080962657928467, | |
| "learning_rate": 6.366340659372462e-07, | |
| "loss": 0.5568990111351013, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.5831485587583147, | |
| "grad_norm": 1.5873769521713257, | |
| "learning_rate": 6.338161610519618e-07, | |
| "loss": 0.7860218286514282, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.587583148558758, | |
| "grad_norm": 0.8671674132347107, | |
| "learning_rate": 6.310267268517397e-07, | |
| "loss": 0.4535370171070099, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.5920177383592016, | |
| "grad_norm": 5.442802906036377, | |
| "learning_rate": 6.282658008650318e-07, | |
| "loss": 0.5291122198104858, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.5964523281596454, | |
| "grad_norm": 0.8540799021720886, | |
| "learning_rate": 6.255334202367462e-07, | |
| "loss": 0.647901713848114, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.6008869179600884, | |
| "grad_norm": 1.4981876611709595, | |
| "learning_rate": 6.228296217277481e-07, | |
| "loss": 0.5466744899749756, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.6053215077605323, | |
| "grad_norm": 0.7501009106636047, | |
| "learning_rate": 6.201544417143641e-07, | |
| "loss": 0.20443859696388245, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.6097560975609757, | |
| "grad_norm": 2.2188422679901123, | |
| "learning_rate": 6.175079161878951e-07, | |
| "loss": 0.6764265894889832, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.614190687361419, | |
| "grad_norm": 3.0544168949127197, | |
| "learning_rate": 6.148900807541295e-07, | |
| "loss": 0.5957894325256348, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.6186252771618626, | |
| "grad_norm": 1.2711377143859863, | |
| "learning_rate": 6.123009706328659e-07, | |
| "loss": 0.5789898037910461, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.623059866962306, | |
| "grad_norm": 3.054331064224243, | |
| "learning_rate": 6.097406206574378e-07, | |
| "loss": 0.6961408257484436, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.6274944567627494, | |
| "grad_norm": 1.0726757049560547, | |
| "learning_rate": 6.072090652742475e-07, | |
| "loss": 0.47810229659080505, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.631929046563193, | |
| "grad_norm": 2.5502026081085205, | |
| "learning_rate": 6.047063385422993e-07, | |
| "loss": 0.6304081678390503, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 2.4343056678771973, | |
| "learning_rate": 6.022324741327438e-07, | |
| "loss": 0.6360555291175842, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.6407982261640797, | |
| "grad_norm": 0.8209331035614014, | |
| "learning_rate": 5.997875053284248e-07, | |
| "loss": 0.32580727338790894, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.6452328159645235, | |
| "grad_norm": 1.0483583211898804, | |
| "learning_rate": 5.973714650234287e-07, | |
| "loss": 0.563210666179657, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.6496674057649665, | |
| "grad_norm": 0.1659860759973526, | |
| "learning_rate": 5.949843857226466e-07, | |
| "loss": 0.376314640045166, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.6541019955654104, | |
| "grad_norm": 1.0978350639343262, | |
| "learning_rate": 5.926262995413329e-07, | |
| "loss": 0.36298975348472595, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.658536585365854, | |
| "grad_norm": 5.020273685455322, | |
| "learning_rate": 5.902972382046742e-07, | |
| "loss": 0.4144408106803894, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.662971175166297, | |
| "grad_norm": 0.9348329901695251, | |
| "learning_rate": 5.879972330473651e-07, | |
| "loss": 0.677159309387207, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.6674057649667406, | |
| "grad_norm": 0.8802472949028015, | |
| "learning_rate": 5.857263150131825e-07, | |
| "loss": 0.3901694416999817, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.671840354767184, | |
| "grad_norm": 0.9511557817459106, | |
| "learning_rate": 5.834845146545726e-07, | |
| "loss": 0.7068908214569092, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.6762749445676275, | |
| "grad_norm": 2.1888368129730225, | |
| "learning_rate": 5.812718621322386e-07, | |
| "loss": 0.5586342215538025, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.680709534368071, | |
| "grad_norm": 1.1138534545898438, | |
| "learning_rate": 5.790883872147341e-07, | |
| "loss": 0.4197966456413269, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.6851441241685143, | |
| "grad_norm": 0.3132419288158417, | |
| "learning_rate": 5.769341192780643e-07, | |
| "loss": 0.3502754867076874, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.6895787139689578, | |
| "grad_norm": 1.2125017642974854, | |
| "learning_rate": 5.748090873052892e-07, | |
| "loss": 0.4432962238788605, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.694013303769401, | |
| "grad_norm": 1.3100497722625732, | |
| "learning_rate": 5.727133198861353e-07, | |
| "loss": 0.6520885229110718, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.6984478935698446, | |
| "grad_norm": 1.8288265466690063, | |
| "learning_rate": 5.706468452166091e-07, | |
| "loss": 0.5817002654075623, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.7028824833702885, | |
| "grad_norm": 0.997337818145752, | |
| "learning_rate": 5.686096910986189e-07, | |
| "loss": 0.6981693506240845, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.7073170731707314, | |
| "grad_norm": 1.4828516244888306, | |
| "learning_rate": 5.666018849396016e-07, | |
| "loss": 0.6814447045326233, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.7117516629711753, | |
| "grad_norm": 0.9741377234458923, | |
| "learning_rate": 5.646234537521513e-07, | |
| "loss": 0.6926964521408081, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.7161862527716187, | |
| "grad_norm": 1.1306507587432861, | |
| "learning_rate": 5.626744241536589e-07, | |
| "loss": 0.7342678308486938, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.720620842572062, | |
| "grad_norm": 1.6748311519622803, | |
| "learning_rate": 5.607548223659519e-07, | |
| "loss": 0.7321768999099731, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.7250554323725056, | |
| "grad_norm": 4.892649173736572, | |
| "learning_rate": 5.58864674214942e-07, | |
| "loss": 0.5290915369987488, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.729490022172949, | |
| "grad_norm": 0.8138172030448914, | |
| "learning_rate": 5.57004005130279e-07, | |
| "loss": 0.5308825969696045, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.7339246119733924, | |
| "grad_norm": 1.0458836555480957, | |
| "learning_rate": 5.551728401450067e-07, | |
| "loss": 0.35428744554519653, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.738359201773836, | |
| "grad_norm": 2.807513475418091, | |
| "learning_rate": 5.533712038952278e-07, | |
| "loss": 0.6030918955802917, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.7427937915742793, | |
| "grad_norm": 0.6136502623558044, | |
| "learning_rate": 5.51599120619771e-07, | |
| "loss": 0.17880572378635406, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.7472283813747227, | |
| "grad_norm": 1.1024833917617798, | |
| "learning_rate": 5.498566141598662e-07, | |
| "loss": 0.6866117715835571, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.7516629711751666, | |
| "grad_norm": 1.260596513748169, | |
| "learning_rate": 5.481437079588227e-07, | |
| "loss": 0.6682636737823486, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.7560975609756095, | |
| "grad_norm": 0.8481225371360779, | |
| "learning_rate": 5.464604250617143e-07, | |
| "loss": 0.7785466909408569, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.7605321507760534, | |
| "grad_norm": 1.0920374393463135, | |
| "learning_rate": 5.448067881150697e-07, | |
| "loss": 0.7681268453598022, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.764966740576497, | |
| "grad_norm": 1.2000559568405151, | |
| "learning_rate": 5.431828193665664e-07, | |
| "loss": 0.3771549463272095, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.7694013303769403, | |
| "grad_norm": 4.952986240386963, | |
| "learning_rate": 5.415885406647334e-07, | |
| "loss": 0.5442360639572144, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.7738359201773837, | |
| "grad_norm": 1.6961767673492432, | |
| "learning_rate": 5.400239734586551e-07, | |
| "loss": 0.9097031950950623, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.778270509977827, | |
| "grad_norm": 0.585956871509552, | |
| "learning_rate": 5.384891387976845e-07, | |
| "loss": 0.11114199459552765, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.7827050997782705, | |
| "grad_norm": 0.36832162737846375, | |
| "learning_rate": 5.369840573311593e-07, | |
| "loss": 0.41135963797569275, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.787139689578714, | |
| "grad_norm": 1.0942734479904175, | |
| "learning_rate": 5.355087493081236e-07, | |
| "loss": 0.5182826519012451, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.7915742793791574, | |
| "grad_norm": 0.9384496212005615, | |
| "learning_rate": 5.340632345770564e-07, | |
| "loss": 0.8278499841690063, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.796008869179601, | |
| "grad_norm": 1.8050825595855713, | |
| "learning_rate": 5.326475325856036e-07, | |
| "loss": 0.4890661835670471, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.800443458980044, | |
| "grad_norm": 4.0279364585876465, | |
| "learning_rate": 5.312616623803174e-07, | |
| "loss": 0.4427688717842102, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.8048780487804876, | |
| "grad_norm": 1.8224685192108154, | |
| "learning_rate": 5.299056426063995e-07, | |
| "loss": 0.7666689157485962, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.8093126385809315, | |
| "grad_norm": 1.169655203819275, | |
| "learning_rate": 5.2857949150745e-07, | |
| "loss": 0.5377134084701538, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.8137472283813745, | |
| "grad_norm": 1.5467808246612549, | |
| "learning_rate": 5.27283226925222e-07, | |
| "loss": 0.5351519584655762, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.8181818181818183, | |
| "grad_norm": 0.9303249716758728, | |
| "learning_rate": 5.260168662993824e-07, | |
| "loss": 0.7153096199035645, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.8226164079822618, | |
| "grad_norm": 1.7566769123077393, | |
| "learning_rate": 5.247804266672765e-07, | |
| "loss": 0.7039221525192261, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.827050997782705, | |
| "grad_norm": 1.3865876197814941, | |
| "learning_rate": 5.235739246636988e-07, | |
| "loss": 0.6029395461082458, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.8314855875831486, | |
| "grad_norm": 1.0234519243240356, | |
| "learning_rate": 5.223973765206694e-07, | |
| "loss": 0.3769378960132599, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.835920177383592, | |
| "grad_norm": 1.2475616931915283, | |
| "learning_rate": 5.212507980672155e-07, | |
| "loss": 0.4270702302455902, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.8403547671840355, | |
| "grad_norm": 1.0097404718399048, | |
| "learning_rate": 5.201342047291587e-07, | |
| "loss": 0.7340813279151917, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.844789356984479, | |
| "grad_norm": 3.0468151569366455, | |
| "learning_rate": 5.190476115289063e-07, | |
| "loss": 0.8035828471183777, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.8492239467849223, | |
| "grad_norm": 1.1387958526611328, | |
| "learning_rate": 5.179910330852521e-07, | |
| "loss": 0.7475385069847107, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.8536585365853657, | |
| "grad_norm": 1.7286393642425537, | |
| "learning_rate": 5.169644836131759e-07, | |
| "loss": 0.6196325421333313, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.858093126385809, | |
| "grad_norm": 1.653084397315979, | |
| "learning_rate": 5.159679769236553e-07, | |
| "loss": 0.37859052419662476, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.8625277161862526, | |
| "grad_norm": 2.3914763927459717, | |
| "learning_rate": 5.150015264234782e-07, | |
| "loss": 0.4192189574241638, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.8669623059866964, | |
| "grad_norm": 1.2099565267562866, | |
| "learning_rate": 5.140651451150627e-07, | |
| "loss": 0.6012130379676819, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.8713968957871394, | |
| "grad_norm": 1.0337554216384888, | |
| "learning_rate": 5.131588455962835e-07, | |
| "loss": 0.6700254678726196, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 3.8758314855875833, | |
| "grad_norm": 1.2129027843475342, | |
| "learning_rate": 5.122826400602999e-07, | |
| "loss": 0.3560533821582794, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 3.8802660753880267, | |
| "grad_norm": 1.0586822032928467, | |
| "learning_rate": 5.114365402953946e-07, | |
| "loss": 0.4538826048374176, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.88470066518847, | |
| "grad_norm": 2.09010910987854, | |
| "learning_rate": 5.106205576848123e-07, | |
| "loss": 0.6869809031486511, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 3.8891352549889135, | |
| "grad_norm": 1.5580908060073853, | |
| "learning_rate": 5.09834703206609e-07, | |
| "loss": 0.7175853848457336, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 3.893569844789357, | |
| "grad_norm": 3.137685537338257, | |
| "learning_rate": 5.090789874335027e-07, | |
| "loss": 0.5131061673164368, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 3.8980044345898004, | |
| "grad_norm": 1.276068091392517, | |
| "learning_rate": 5.083534205327321e-07, | |
| "loss": 0.8281271457672119, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 3.902439024390244, | |
| "grad_norm": 0.07891691476106644, | |
| "learning_rate": 5.076580122659192e-07, | |
| "loss": 0.0017504242714494467, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.9068736141906872, | |
| "grad_norm": 3.17092227935791, | |
| "learning_rate": 5.069927719889383e-07, | |
| "loss": 0.37903013825416565, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 3.9113082039911307, | |
| "grad_norm": 0.18284766376018524, | |
| "learning_rate": 5.063577086517894e-07, | |
| "loss": 0.3342430591583252, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 3.9157427937915745, | |
| "grad_norm": 7.076814651489258, | |
| "learning_rate": 5.057528307984792e-07, | |
| "loss": 0.4555712938308716, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 3.9201773835920175, | |
| "grad_norm": 1.618480920791626, | |
| "learning_rate": 5.051781465669053e-07, | |
| "loss": 0.8074356913566589, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 3.9246119733924614, | |
| "grad_norm": 1.1312144994735718, | |
| "learning_rate": 5.04633663688746e-07, | |
| "loss": 0.6972216963768005, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.929046563192905, | |
| "grad_norm": 1.7633752822875977, | |
| "learning_rate": 5.04119389489358e-07, | |
| "loss": 0.2154180407524109, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 3.933481152993348, | |
| "grad_norm": 1.3447540998458862, | |
| "learning_rate": 5.036353308876764e-07, | |
| "loss": 0.637127697467804, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 3.9379157427937916, | |
| "grad_norm": 1.6075332164764404, | |
| "learning_rate": 5.031814943961221e-07, | |
| "loss": 0.7520142197608948, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 3.942350332594235, | |
| "grad_norm": 0.8913193941116333, | |
| "learning_rate": 5.027578861205139e-07, | |
| "loss": 0.08660762012004852, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 3.9467849223946785, | |
| "grad_norm": 2.475306510925293, | |
| "learning_rate": 5.023645117599877e-07, | |
| "loss": 0.42116767168045044, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.951219512195122, | |
| "grad_norm": 0.3929775357246399, | |
| "learning_rate": 5.020013766069176e-07, | |
| "loss": 0.3883530795574188, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 3.9556541019955653, | |
| "grad_norm": 1.1025030612945557, | |
| "learning_rate": 5.016684855468464e-07, | |
| "loss": 0.44370949268341064, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.9600886917960088, | |
| "grad_norm": 1.1644660234451294, | |
| "learning_rate": 5.013658430584194e-07, | |
| "loss": 0.651877224445343, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 3.964523281596452, | |
| "grad_norm": 0.9477538466453552, | |
| "learning_rate": 5.010934532133236e-07, | |
| "loss": 0.43285292387008667, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 3.9689578713968956, | |
| "grad_norm": 0.9940587878227234, | |
| "learning_rate": 5.008513196762342e-07, | |
| "loss": 0.5851073861122131, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.9733924611973395, | |
| "grad_norm": 1.0942143201828003, | |
| "learning_rate": 5.006394457047638e-07, | |
| "loss": 0.5996626615524292, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 3.9778270509977824, | |
| "grad_norm": 1.2127927541732788, | |
| "learning_rate": 5.004578341494197e-07, | |
| "loss": 0.3695821464061737, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 3.9822616407982263, | |
| "grad_norm": 1.090599536895752, | |
| "learning_rate": 5.003064874535649e-07, | |
| "loss": 0.7109208106994629, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 3.9866962305986697, | |
| "grad_norm": 1.2731367349624634, | |
| "learning_rate": 5.00185407653385e-07, | |
| "loss": 0.7177985906600952, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 3.991130820399113, | |
| "grad_norm": 2.5619304180145264, | |
| "learning_rate": 5.000945963778627e-07, | |
| "loss": 0.9041726589202881, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.9955654101995566, | |
| "grad_norm": 1.3043817281723022, | |
| "learning_rate": 5.000340548487528e-07, | |
| "loss": 0.6277958154678345, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.6461491584777832, | |
| "learning_rate": 5.000037838805682e-07, | |
| "loss": 0.2714325189590454, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1804, | |
| "total_flos": 3.4175049861232067e+18, | |
| "train_loss": 0.8386258969696222, | |
| "train_runtime": 7909.6987, | |
| "train_samples_per_second": 6.842, | |
| "train_steps_per_second": 0.228 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1804, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.4175049861232067e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |