Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-52 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-52 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-52") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-52") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-52") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-52 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-52" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-52", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-52
- SGLang
How to use furproxy/9b-52 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-52" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-52", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-52" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-52", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-52 with Docker Model Runner:
docker model run hf.co/furproxy/9b-52
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1098, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00546448087431694, | |
| "grad_norm": 0.8349518775939941, | |
| "learning_rate": 5.454545454545455e-07, | |
| "loss": 2.6585090160369873, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01092896174863388, | |
| "grad_norm": 1.1113650798797607, | |
| "learning_rate": 1.6363636363636363e-06, | |
| "loss": 1.9362050294876099, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01639344262295082, | |
| "grad_norm": 0.5255424380302429, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 1.8687975406646729, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02185792349726776, | |
| "grad_norm": 0.6620073318481445, | |
| "learning_rate": 3.818181818181818e-06, | |
| "loss": 1.7159302234649658, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0273224043715847, | |
| "grad_norm": 0.5295394062995911, | |
| "learning_rate": 4.90909090909091e-06, | |
| "loss": 1.685194492340088, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03278688524590164, | |
| "grad_norm": 0.713146984577179, | |
| "learning_rate": 6e-06, | |
| "loss": 1.5490000247955322, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03825136612021858, | |
| "grad_norm": 0.35955384373664856, | |
| "learning_rate": 7.090909090909091e-06, | |
| "loss": 1.4553167819976807, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04371584699453552, | |
| "grad_norm": 0.28743666410446167, | |
| "learning_rate": 8.181818181818181e-06, | |
| "loss": 1.1914829015731812, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04918032786885246, | |
| "grad_norm": 0.36817532777786255, | |
| "learning_rate": 9.272727272727273e-06, | |
| "loss": 1.1589792966842651, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0546448087431694, | |
| "grad_norm": 0.32375410199165344, | |
| "learning_rate": 1.0363636363636364e-05, | |
| "loss": 1.3819025754928589, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.060109289617486336, | |
| "grad_norm": 0.30952924489974976, | |
| "learning_rate": 1.1454545454545455e-05, | |
| "loss": 1.3256053924560547, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06557377049180328, | |
| "grad_norm": 0.38788944482803345, | |
| "learning_rate": 1.2545454545454545e-05, | |
| "loss": 1.3872653245925903, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07103825136612021, | |
| "grad_norm": 6.16391658782959, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 1.5898637771606445, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07650273224043716, | |
| "grad_norm": 0.41342800855636597, | |
| "learning_rate": 1.4727272727272728e-05, | |
| "loss": 1.346091389656067, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.08196721311475409, | |
| "grad_norm": 0.4514571726322174, | |
| "learning_rate": 1.5818181818181818e-05, | |
| "loss": 1.2832849025726318, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08743169398907104, | |
| "grad_norm": 0.8185935616493225, | |
| "learning_rate": 1.6909090909090907e-05, | |
| "loss": 1.2825489044189453, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09289617486338798, | |
| "grad_norm": 0.431914359331131, | |
| "learning_rate": 1.8e-05, | |
| "loss": 1.01780104637146, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09836065573770492, | |
| "grad_norm": 0.23881161212921143, | |
| "learning_rate": 1.909090909090909e-05, | |
| "loss": 1.1156284809112549, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.10382513661202186, | |
| "grad_norm": 0.32558315992355347, | |
| "learning_rate": 2.0181818181818183e-05, | |
| "loss": 1.3640450239181519, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1092896174863388, | |
| "grad_norm": 0.26043546199798584, | |
| "learning_rate": 2.1272727272727273e-05, | |
| "loss": 1.302004337310791, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11475409836065574, | |
| "grad_norm": 3.590569257736206, | |
| "learning_rate": 2.2363636363636366e-05, | |
| "loss": 0.9626105427742004, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.12021857923497267, | |
| "grad_norm": 0.34414249658584595, | |
| "learning_rate": 2.3454545454545456e-05, | |
| "loss": 0.9383161067962646, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.12568306010928962, | |
| "grad_norm": 0.33813583850860596, | |
| "learning_rate": 2.454545454545455e-05, | |
| "loss": 1.3467686176300049, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.13114754098360656, | |
| "grad_norm": 0.29904571175575256, | |
| "learning_rate": 2.5636363636363635e-05, | |
| "loss": 1.3344920873641968, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1366120218579235, | |
| "grad_norm": 0.5444883704185486, | |
| "learning_rate": 2.6727272727272728e-05, | |
| "loss": 1.2999744415283203, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14207650273224043, | |
| "grad_norm": 0.36373546719551086, | |
| "learning_rate": 2.7818181818181818e-05, | |
| "loss": 1.2705168724060059, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.14754098360655737, | |
| "grad_norm": 3.3880348205566406, | |
| "learning_rate": 2.890909090909091e-05, | |
| "loss": 0.7357171773910522, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.15300546448087432, | |
| "grad_norm": 0.40625038743019104, | |
| "learning_rate": 3e-05, | |
| "loss": 1.4833019971847534, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.15846994535519127, | |
| "grad_norm": 0.3753228783607483, | |
| "learning_rate": 2.9999755040899112e-05, | |
| "loss": 1.464920163154602, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.16393442622950818, | |
| "grad_norm": 0.24931401014328003, | |
| "learning_rate": 2.9999020172486062e-05, | |
| "loss": 1.337968349456787, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16939890710382513, | |
| "grad_norm": 0.46426111459732056, | |
| "learning_rate": 2.9997795421429404e-05, | |
| "loss": 1.5345537662506104, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.17486338797814208, | |
| "grad_norm": 0.7521016001701355, | |
| "learning_rate": 2.9996080832175648e-05, | |
| "loss": 1.175337553024292, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.18032786885245902, | |
| "grad_norm": 0.47603926062583923, | |
| "learning_rate": 2.9993876466947636e-05, | |
| "loss": 1.3400994539260864, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.18579234972677597, | |
| "grad_norm": 0.28464552760124207, | |
| "learning_rate": 2.9991182405742313e-05, | |
| "loss": 1.3063408136367798, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1912568306010929, | |
| "grad_norm": 0.4081110954284668, | |
| "learning_rate": 2.9987998746327793e-05, | |
| "loss": 1.3260284662246704, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19672131147540983, | |
| "grad_norm": 0.36022159457206726, | |
| "learning_rate": 2.9984325604239832e-05, | |
| "loss": 1.0935287475585938, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.20218579234972678, | |
| "grad_norm": 0.2852855622768402, | |
| "learning_rate": 2.9980163112777633e-05, | |
| "loss": 1.1231757402420044, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.20765027322404372, | |
| "grad_norm": 0.776136577129364, | |
| "learning_rate": 2.9975511422998992e-05, | |
| "loss": 1.3375043869018555, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.21311475409836064, | |
| "grad_norm": 2.1257131099700928, | |
| "learning_rate": 2.9970370703714842e-05, | |
| "loss": 1.4646670818328857, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2185792349726776, | |
| "grad_norm": 0.3119620382785797, | |
| "learning_rate": 2.9964741141483095e-05, | |
| "loss": 0.6720016002655029, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.22404371584699453, | |
| "grad_norm": 0.37047696113586426, | |
| "learning_rate": 2.9958622940601907e-05, | |
| "loss": 1.3742259740829468, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.22950819672131148, | |
| "grad_norm": 0.804155170917511, | |
| "learning_rate": 2.9952016323102237e-05, | |
| "loss": 0.7848977446556091, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.23497267759562843, | |
| "grad_norm": 0.20921625196933746, | |
| "learning_rate": 2.9944921528739798e-05, | |
| "loss": 1.3509215116500854, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.24043715846994534, | |
| "grad_norm": 0.4230685234069824, | |
| "learning_rate": 2.993733881498636e-05, | |
| "loss": 1.2749418020248413, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.2459016393442623, | |
| "grad_norm": 1.2742705345153809, | |
| "learning_rate": 2.9929268457020412e-05, | |
| "loss": 1.0279054641723633, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.25136612021857924, | |
| "grad_norm": 0.7286893129348755, | |
| "learning_rate": 2.9920710747717152e-05, | |
| "loss": 1.3933875560760498, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2568306010928962, | |
| "grad_norm": 0.3418238162994385, | |
| "learning_rate": 2.991166599763788e-05, | |
| "loss": 1.2102015018463135, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.26229508196721313, | |
| "grad_norm": 0.6196624040603638, | |
| "learning_rate": 2.990213453501872e-05, | |
| "loss": 1.5771762132644653, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2677595628415301, | |
| "grad_norm": 0.40388840436935425, | |
| "learning_rate": 2.9892116705758716e-05, | |
| "loss": 1.2043923139572144, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.273224043715847, | |
| "grad_norm": 0.37436801195144653, | |
| "learning_rate": 2.988161287340726e-05, | |
| "loss": 1.1322193145751953, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2786885245901639, | |
| "grad_norm": 0.4332585334777832, | |
| "learning_rate": 2.9870623419150935e-05, | |
| "loss": 1.3135957717895508, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.28415300546448086, | |
| "grad_norm": 0.4765128791332245, | |
| "learning_rate": 2.9859148741799622e-05, | |
| "loss": 1.0530325174331665, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2896174863387978, | |
| "grad_norm": 2.4587535858154297, | |
| "learning_rate": 2.98471892577721e-05, | |
| "loss": 1.297810435295105, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.29508196721311475, | |
| "grad_norm": 0.46058765053749084, | |
| "learning_rate": 2.983474540108087e-05, | |
| "loss": 1.4367880821228027, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3005464480874317, | |
| "grad_norm": 0.5404685139656067, | |
| "learning_rate": 2.9821817623316448e-05, | |
| "loss": 1.2867742776870728, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.30601092896174864, | |
| "grad_norm": 0.3982601761817932, | |
| "learning_rate": 2.9808406393630953e-05, | |
| "loss": 1.2902946472167969, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3114754098360656, | |
| "grad_norm": 0.7301350831985474, | |
| "learning_rate": 2.9794512198721096e-05, | |
| "loss": 1.445720911026001, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.31693989071038253, | |
| "grad_norm": 0.3251919150352478, | |
| "learning_rate": 2.9780135542810495e-05, | |
| "loss": 1.282047152519226, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3224043715846995, | |
| "grad_norm": 0.36070072650909424, | |
| "learning_rate": 2.9765276947631414e-05, | |
| "loss": 1.0572261810302734, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.32786885245901637, | |
| "grad_norm": 1.8308000564575195, | |
| "learning_rate": 2.974993695240579e-05, | |
| "loss": 1.2791436910629272, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.5521191954612732, | |
| "learning_rate": 2.9734116113825688e-05, | |
| "loss": 1.3651310205459595, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.33879781420765026, | |
| "grad_norm": 0.2895878255367279, | |
| "learning_rate": 2.9717815006033093e-05, | |
| "loss": 1.1527748107910156, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3442622950819672, | |
| "grad_norm": 0.2666403651237488, | |
| "learning_rate": 2.9701034220599074e-05, | |
| "loss": 1.0741976499557495, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.34972677595628415, | |
| "grad_norm": 2.9272992610931396, | |
| "learning_rate": 2.9683774366502314e-05, | |
| "loss": 1.3308141231536865, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3551912568306011, | |
| "grad_norm": 0.17278704047203064, | |
| "learning_rate": 2.9666036070107015e-05, | |
| "loss": 1.2851102352142334, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.36065573770491804, | |
| "grad_norm": 0.19944007694721222, | |
| "learning_rate": 2.964781997514015e-05, | |
| "loss": 1.2214155197143555, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.366120218579235, | |
| "grad_norm": 0.48982882499694824, | |
| "learning_rate": 2.9629126742668136e-05, | |
| "loss": 1.1284376382827759, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.37158469945355194, | |
| "grad_norm": 0.5427325367927551, | |
| "learning_rate": 2.9609957051072805e-05, | |
| "loss": 1.0665885210037231, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3770491803278688, | |
| "grad_norm": 0.2552976906299591, | |
| "learning_rate": 2.959031159602682e-05, | |
| "loss": 1.0383477210998535, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3825136612021858, | |
| "grad_norm": 0.25971028208732605, | |
| "learning_rate": 2.9570191090468393e-05, | |
| "loss": 0.6707891225814819, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3879781420765027, | |
| "grad_norm": 0.26162365078926086, | |
| "learning_rate": 2.9549596264575452e-05, | |
| "loss": 1.2587742805480957, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.39344262295081966, | |
| "grad_norm": 0.3446868658065796, | |
| "learning_rate": 2.952852786573911e-05, | |
| "loss": 1.2872346639633179, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3989071038251366, | |
| "grad_norm": 0.24461157619953156, | |
| "learning_rate": 2.9506986658536562e-05, | |
| "loss": 1.2417794466018677, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.40437158469945356, | |
| "grad_norm": 0.41741102933883667, | |
| "learning_rate": 2.9484973424703315e-05, | |
| "loss": 1.2326178550720215, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4098360655737705, | |
| "grad_norm": 0.615510880947113, | |
| "learning_rate": 2.946248896310486e-05, | |
| "loss": 1.1736871004104614, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.41530054644808745, | |
| "grad_norm": 0.7076267004013062, | |
| "learning_rate": 2.9439534089707624e-05, | |
| "loss": 1.2764538526535034, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4207650273224044, | |
| "grad_norm": 0.3268417418003082, | |
| "learning_rate": 2.9416109637549423e-05, | |
| "loss": 1.258562684059143, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4262295081967213, | |
| "grad_norm": 0.2503832280635834, | |
| "learning_rate": 2.9392216456709174e-05, | |
| "loss": 1.294039249420166, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.43169398907103823, | |
| "grad_norm": 1.2436753511428833, | |
| "learning_rate": 2.9367855414276077e-05, | |
| "loss": 1.033632755279541, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4371584699453552, | |
| "grad_norm": 0.2629822790622711, | |
| "learning_rate": 2.9343027394318137e-05, | |
| "loss": 1.2340978384017944, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4426229508196721, | |
| "grad_norm": 0.43004974722862244, | |
| "learning_rate": 2.9317733297850092e-05, | |
| "loss": 0.7392095923423767, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.44808743169398907, | |
| "grad_norm": 0.29098740220069885, | |
| "learning_rate": 2.9291974042800696e-05, | |
| "loss": 1.6267766952514648, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.453551912568306, | |
| "grad_norm": 1.0974997282028198, | |
| "learning_rate": 2.9265750563979432e-05, | |
| "loss": 0.7922143340110779, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.45901639344262296, | |
| "grad_norm": 0.3675383925437927, | |
| "learning_rate": 2.9239063813042557e-05, | |
| "loss": 1.1761270761489868, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4644808743169399, | |
| "grad_norm": 0.3152557611465454, | |
| "learning_rate": 2.9211914758458602e-05, | |
| "loss": 1.265891432762146, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.46994535519125685, | |
| "grad_norm": 0.4289422631263733, | |
| "learning_rate": 2.9184304385473183e-05, | |
| "loss": 1.3408340215682983, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.47540983606557374, | |
| "grad_norm": 0.4991217851638794, | |
| "learning_rate": 2.9156233696073293e-05, | |
| "loss": 1.4506194591522217, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4808743169398907, | |
| "grad_norm": 0.39125722646713257, | |
| "learning_rate": 2.912770370895091e-05, | |
| "loss": 1.236718773841858, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.48633879781420764, | |
| "grad_norm": 0.5120674967765808, | |
| "learning_rate": 2.909871545946603e-05, | |
| "loss": 1.1975741386413574, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4918032786885246, | |
| "grad_norm": 0.3231360614299774, | |
| "learning_rate": 2.9069269999609095e-05, | |
| "loss": 1.1961040496826172, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4972677595628415, | |
| "grad_norm": 0.8365263938903809, | |
| "learning_rate": 2.903936839796284e-05, | |
| "loss": 1.1858608722686768, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5027322404371585, | |
| "grad_norm": 1.0185188055038452, | |
| "learning_rate": 2.9009011739663467e-05, | |
| "loss": 1.3475292921066284, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5081967213114754, | |
| "grad_norm": 0.767466127872467, | |
| "learning_rate": 2.897820112636132e-05, | |
| "loss": 1.2032495737075806, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5136612021857924, | |
| "grad_norm": 2.586745023727417, | |
| "learning_rate": 2.894693767618085e-05, | |
| "loss": 1.1861158609390259, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5191256830601093, | |
| "grad_norm": 0.3434876799583435, | |
| "learning_rate": 2.8915222523680082e-05, | |
| "loss": 1.2496143579483032, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5245901639344263, | |
| "grad_norm": 0.37436240911483765, | |
| "learning_rate": 2.8883056819809428e-05, | |
| "loss": 1.21352219581604, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5300546448087432, | |
| "grad_norm": 0.30402809381484985, | |
| "learning_rate": 2.8850441731869903e-05, | |
| "loss": 1.2435191869735718, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5355191256830601, | |
| "grad_norm": 0.2915107309818268, | |
| "learning_rate": 2.8817378443470787e-05, | |
| "loss": 1.2945222854614258, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5409836065573771, | |
| "grad_norm": 0.3910680115222931, | |
| "learning_rate": 2.8783868154486656e-05, | |
| "loss": 1.1279659271240234, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.546448087431694, | |
| "grad_norm": 0.3353728950023651, | |
| "learning_rate": 2.8749912081013853e-05, | |
| "loss": 1.2710188627243042, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5519125683060109, | |
| "grad_norm": 0.3767080008983612, | |
| "learning_rate": 2.8715511455326328e-05, | |
| "loss": 1.1226139068603516, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5573770491803278, | |
| "grad_norm": 0.48755672574043274, | |
| "learning_rate": 2.8680667525830946e-05, | |
| "loss": 0.951927661895752, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5628415300546448, | |
| "grad_norm": 0.34329354763031006, | |
| "learning_rate": 2.8645381557022175e-05, | |
| "loss": 1.2716200351715088, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5683060109289617, | |
| "grad_norm": 0.3423449993133545, | |
| "learning_rate": 2.8609654829436193e-05, | |
| "loss": 1.1820414066314697, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5737704918032787, | |
| "grad_norm": 0.4888955056667328, | |
| "learning_rate": 2.8573488639604418e-05, | |
| "loss": 1.227460265159607, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5792349726775956, | |
| "grad_norm": 1.592087984085083, | |
| "learning_rate": 2.853688430000645e-05, | |
| "loss": 1.5284310579299927, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5846994535519126, | |
| "grad_norm": 0.2334529608488083, | |
| "learning_rate": 2.8499843139022463e-05, | |
| "loss": 1.2371737957000732, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5901639344262295, | |
| "grad_norm": 1.1139333248138428, | |
| "learning_rate": 2.846236650088497e-05, | |
| "loss": 1.6006219387054443, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5956284153005464, | |
| "grad_norm": 0.23051802814006805, | |
| "learning_rate": 2.8424455745630055e-05, | |
| "loss": 1.214034914970398, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.6010928961748634, | |
| "grad_norm": 0.33928921818733215, | |
| "learning_rate": 2.838611224904802e-05, | |
| "loss": 1.2665811777114868, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6065573770491803, | |
| "grad_norm": 0.4052067995071411, | |
| "learning_rate": 2.8347337402633456e-05, | |
| "loss": 1.1260387897491455, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6120218579234973, | |
| "grad_norm": 7.600457191467285, | |
| "learning_rate": 2.830813261353472e-05, | |
| "loss": 1.0921545028686523, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6174863387978142, | |
| "grad_norm": 0.22103650867938995, | |
| "learning_rate": 2.8268499304502923e-05, | |
| "loss": 1.2517577409744263, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6229508196721312, | |
| "grad_norm": 2.4157559871673584, | |
| "learning_rate": 2.822843891384024e-05, | |
| "loss": 1.264587640762329, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6284153005464481, | |
| "grad_norm": 0.24047903716564178, | |
| "learning_rate": 2.818795289534775e-05, | |
| "loss": 0.739978015422821, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6338797814207651, | |
| "grad_norm": 0.3653843402862549, | |
| "learning_rate": 2.8147042718272675e-05, | |
| "loss": 1.270297884941101, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.639344262295082, | |
| "grad_norm": 0.8584589958190918, | |
| "learning_rate": 2.8105709867255028e-05, | |
| "loss": 1.1542037725448608, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.644808743169399, | |
| "grad_norm": 0.9217857718467712, | |
| "learning_rate": 2.806395584227379e-05, | |
| "loss": 0.9023241996765137, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6502732240437158, | |
| "grad_norm": 0.4675036370754242, | |
| "learning_rate": 2.8021782158592416e-05, | |
| "loss": 1.3440523147583008, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 0.28667768836021423, | |
| "learning_rate": 2.7979190346703905e-05, | |
| "loss": 1.251165747642517, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6612021857923497, | |
| "grad_norm": 0.5557416081428528, | |
| "learning_rate": 2.7936181952275202e-05, | |
| "loss": 1.1947143077850342, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.5286179184913635, | |
| "learning_rate": 2.789275853609114e-05, | |
| "loss": 1.1044130325317383, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6721311475409836, | |
| "grad_norm": 0.19599549472332, | |
| "learning_rate": 2.7848921673997802e-05, | |
| "loss": 1.2250175476074219, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6775956284153005, | |
| "grad_norm": 0.3402693271636963, | |
| "learning_rate": 2.7804672956845295e-05, | |
| "loss": 1.2818059921264648, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6830601092896175, | |
| "grad_norm": 0.49632683396339417, | |
| "learning_rate": 2.776001399043007e-05, | |
| "loss": 1.239612340927124, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6885245901639344, | |
| "grad_norm": 0.27504825592041016, | |
| "learning_rate": 2.7714946395436603e-05, | |
| "loss": 1.1610487699508667, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6939890710382514, | |
| "grad_norm": 0.3069857358932495, | |
| "learning_rate": 2.766947180737861e-05, | |
| "loss": 1.200732707977295, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6994535519125683, | |
| "grad_norm": 0.2805365025997162, | |
| "learning_rate": 2.762359187653968e-05, | |
| "loss": 1.1845617294311523, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7049180327868853, | |
| "grad_norm": 0.37796005606651306, | |
| "learning_rate": 2.7577308267913373e-05, | |
| "loss": 1.149518609046936, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7103825136612022, | |
| "grad_norm": 0.3511449694633484, | |
| "learning_rate": 2.7530622661142833e-05, | |
| "loss": 1.1933660507202148, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7158469945355191, | |
| "grad_norm": 0.6162945628166199, | |
| "learning_rate": 2.7483536750459794e-05, | |
| "loss": 1.289069414138794, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.7213114754098361, | |
| "grad_norm": 0.3724265396595001, | |
| "learning_rate": 2.7436052244623127e-05, | |
| "loss": 1.2320224046707153, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.726775956284153, | |
| "grad_norm": 0.28604817390441895, | |
| "learning_rate": 2.7388170866856816e-05, | |
| "loss": 1.080925464630127, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.73224043715847, | |
| "grad_norm": 0.9405566453933716, | |
| "learning_rate": 2.7339894354787406e-05, | |
| "loss": 1.2446898221969604, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7377049180327869, | |
| "grad_norm": 0.46044522523880005, | |
| "learning_rate": 2.7291224460380973e-05, | |
| "loss": 1.2270363569259644, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7431693989071039, | |
| "grad_norm": 0.5795265436172485, | |
| "learning_rate": 2.7242162949879533e-05, | |
| "loss": 1.1811434030532837, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7486338797814208, | |
| "grad_norm": 0.5641931295394897, | |
| "learning_rate": 2.719271160373693e-05, | |
| "loss": 1.1956827640533447, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7540983606557377, | |
| "grad_norm": 0.4614900052547455, | |
| "learning_rate": 2.7142872216554246e-05, | |
| "loss": 1.2421934604644775, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7595628415300546, | |
| "grad_norm": 0.4700526297092438, | |
| "learning_rate": 2.709264659701467e-05, | |
| "loss": 1.206323504447937, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7650273224043715, | |
| "grad_norm": 0.4233056604862213, | |
| "learning_rate": 2.7042036567817838e-05, | |
| "loss": 0.9012216925621033, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7704918032786885, | |
| "grad_norm": 0.3025325536727905, | |
| "learning_rate": 2.6991043965613715e-05, | |
| "loss": 1.2100359201431274, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7759562841530054, | |
| "grad_norm": 0.3858908712863922, | |
| "learning_rate": 2.6939670640935933e-05, | |
| "loss": 1.1925559043884277, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7814207650273224, | |
| "grad_norm": 0.7173455953598022, | |
| "learning_rate": 2.6887918458134625e-05, | |
| "loss": 1.2038462162017822, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7868852459016393, | |
| "grad_norm": 0.3233148157596588, | |
| "learning_rate": 2.6835789295308775e-05, | |
| "loss": 1.181785225868225, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7923497267759563, | |
| "grad_norm": 0.5368382930755615, | |
| "learning_rate": 2.6783285044238075e-05, | |
| "loss": 1.183975100517273, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7978142076502732, | |
| "grad_norm": 1.3916505575180054, | |
| "learning_rate": 2.6730407610314235e-05, | |
| "loss": 1.1650279760360718, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8032786885245902, | |
| "grad_norm": 0.5467023849487305, | |
| "learning_rate": 2.6677158912471888e-05, | |
| "loss": 1.1808955669403076, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8087431693989071, | |
| "grad_norm": 0.45474594831466675, | |
| "learning_rate": 2.66235408831189e-05, | |
| "loss": 1.5349675416946411, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8142076502732241, | |
| "grad_norm": 1.3314640522003174, | |
| "learning_rate": 2.6569555468066274e-05, | |
| "loss": 1.0294817686080933, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.819672131147541, | |
| "grad_norm": 0.4061235189437866, | |
| "learning_rate": 2.6515204626457522e-05, | |
| "loss": 1.599696397781372, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.825136612021858, | |
| "grad_norm": 0.6883937120437622, | |
| "learning_rate": 2.646049033069758e-05, | |
| "loss": 1.204469084739685, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8306010928961749, | |
| "grad_norm": 0.5468365550041199, | |
| "learning_rate": 2.6405414566381223e-05, | |
| "loss": 0.8217574954032898, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8360655737704918, | |
| "grad_norm": 1.1285362243652344, | |
| "learning_rate": 2.6349979332220992e-05, | |
| "loss": 1.5492712259292603, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8415300546448088, | |
| "grad_norm": 0.3900437355041504, | |
| "learning_rate": 2.6294186639974684e-05, | |
| "loss": 0.8982808589935303, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8469945355191257, | |
| "grad_norm": 0.695273220539093, | |
| "learning_rate": 2.6238038514372337e-05, | |
| "loss": 1.19428288936615, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8524590163934426, | |
| "grad_norm": 0.5239560008049011, | |
| "learning_rate": 2.618153699304274e-05, | |
| "loss": 1.208155632019043, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.8579234972677595, | |
| "grad_norm": 0.4538440704345703, | |
| "learning_rate": 2.6124684126439502e-05, | |
| "loss": 1.22481107711792, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.8633879781420765, | |
| "grad_norm": 0.3862628936767578, | |
| "learning_rate": 2.6067481977766644e-05, | |
| "loss": 1.2237530946731567, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8688524590163934, | |
| "grad_norm": 0.42930173873901367, | |
| "learning_rate": 2.6009932622903703e-05, | |
| "loss": 1.1499100923538208, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8743169398907104, | |
| "grad_norm": 0.27683964371681213, | |
| "learning_rate": 2.5952038150330414e-05, | |
| "loss": 1.3338992595672607, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8797814207650273, | |
| "grad_norm": 0.5416029691696167, | |
| "learning_rate": 2.589380066105092e-05, | |
| "loss": 1.1807247400283813, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8852459016393442, | |
| "grad_norm": 0.44596952199935913, | |
| "learning_rate": 2.583522226851752e-05, | |
| "loss": 0.6371968388557434, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8907103825136612, | |
| "grad_norm": 0.25335779786109924, | |
| "learning_rate": 2.5776305098553967e-05, | |
| "loss": 1.2608835697174072, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8961748633879781, | |
| "grad_norm": 0.64796382188797, | |
| "learning_rate": 2.5717051289278337e-05, | |
| "loss": 1.1109199523925781, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.9016393442622951, | |
| "grad_norm": 1.1634434461593628, | |
| "learning_rate": 2.565746299102542e-05, | |
| "loss": 1.2793126106262207, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.907103825136612, | |
| "grad_norm": 0.7927089333534241, | |
| "learning_rate": 2.5597542366268693e-05, | |
| "loss": 1.3189245462417603, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.912568306010929, | |
| "grad_norm": 0.3011920154094696, | |
| "learning_rate": 2.553729158954184e-05, | |
| "loss": 1.1566650867462158, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.9180327868852459, | |
| "grad_norm": 0.2131953090429306, | |
| "learning_rate": 2.5476712847359837e-05, | |
| "loss": 1.1993520259857178, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9234972677595629, | |
| "grad_norm": 0.510997474193573, | |
| "learning_rate": 2.5415808338139605e-05, | |
| "loss": 1.2385536432266235, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.9289617486338798, | |
| "grad_norm": 0.37556126713752747, | |
| "learning_rate": 2.5354580272120225e-05, | |
| "loss": 1.2194688320159912, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9344262295081968, | |
| "grad_norm": 0.5696521997451782, | |
| "learning_rate": 2.529303087128274e-05, | |
| "loss": 1.259057641029358, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9398907103825137, | |
| "grad_norm": 8.865042686462402, | |
| "learning_rate": 2.5231162369269498e-05, | |
| "loss": 1.2176750898361206, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9453551912568307, | |
| "grad_norm": 0.2281872183084488, | |
| "learning_rate": 2.5168977011303122e-05, | |
| "loss": 1.6343557834625244, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9508196721311475, | |
| "grad_norm": 0.9919049143791199, | |
| "learning_rate": 2.5106477054104993e-05, | |
| "loss": 0.9662102460861206, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9562841530054644, | |
| "grad_norm": 0.21607114374637604, | |
| "learning_rate": 2.504366476581338e-05, | |
| "loss": 0.9071711897850037, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9617486338797814, | |
| "grad_norm": 0.22804783284664154, | |
| "learning_rate": 2.498054242590112e-05, | |
| "loss": 1.2166848182678223, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9672131147540983, | |
| "grad_norm": 0.31356871128082275, | |
| "learning_rate": 2.4917112325092904e-05, | |
| "loss": 0.9337010979652405, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9726775956284153, | |
| "grad_norm": 0.3371138572692871, | |
| "learning_rate": 2.485337676528213e-05, | |
| "loss": 1.1291182041168213, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9781420765027322, | |
| "grad_norm": 0.40679609775543213, | |
| "learning_rate": 2.478933805944739e-05, | |
| "loss": 0.7860838770866394, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9836065573770492, | |
| "grad_norm": 0.2649918496608734, | |
| "learning_rate": 2.4724998531568485e-05, | |
| "loss": 1.2208523750305176, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9890710382513661, | |
| "grad_norm": 0.21613630652427673, | |
| "learning_rate": 2.4660360516542164e-05, | |
| "loss": 0.9990649819374084, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.994535519125683, | |
| "grad_norm": 0.2330239862203598, | |
| "learning_rate": 2.4595426360097314e-05, | |
| "loss": 1.1983908414840698, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.1835353672504425, | |
| "learning_rate": 2.4530198418709876e-05, | |
| "loss": 1.1861966848373413, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.005464480874317, | |
| "grad_norm": 0.1733914166688919, | |
| "learning_rate": 2.4464679059517298e-05, | |
| "loss": 0.9823465347290039, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.010928961748634, | |
| "grad_norm": 0.3794485330581665, | |
| "learning_rate": 2.4398870660232684e-05, | |
| "loss": 0.8994698524475098, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.0163934426229508, | |
| "grad_norm": 0.2840877175331116, | |
| "learning_rate": 2.433277560905844e-05, | |
| "loss": 0.8950268626213074, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.0218579234972678, | |
| "grad_norm": 0.30615735054016113, | |
| "learning_rate": 2.426639630459965e-05, | |
| "loss": 1.0142794847488403, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.0273224043715847, | |
| "grad_norm": 2.2517504692077637, | |
| "learning_rate": 2.4199735155777017e-05, | |
| "loss": 0.7609462738037109, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.0327868852459017, | |
| "grad_norm": 0.6455325484275818, | |
| "learning_rate": 2.4132794581739447e-05, | |
| "loss": 0.8975200653076172, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.0382513661202186, | |
| "grad_norm": 0.2738470137119293, | |
| "learning_rate": 2.4065577011776237e-05, | |
| "loss": 0.6991083025932312, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0437158469945356, | |
| "grad_norm": 0.4126173257827759, | |
| "learning_rate": 2.399808488522896e-05, | |
| "loss": 0.8340121507644653, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.0491803278688525, | |
| "grad_norm": 0.3450702130794525, | |
| "learning_rate": 2.3930320651402877e-05, | |
| "loss": 0.8715396523475647, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.0546448087431695, | |
| "grad_norm": 0.3349602222442627, | |
| "learning_rate": 2.386228676947811e-05, | |
| "loss": 0.72311931848526, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.0601092896174864, | |
| "grad_norm": 0.49791452288627625, | |
| "learning_rate": 2.3793985708420378e-05, | |
| "loss": 0.708154559135437, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0655737704918034, | |
| "grad_norm": 0.24925543367862701, | |
| "learning_rate": 2.3725419946891376e-05, | |
| "loss": 1.1333740949630737, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0710382513661203, | |
| "grad_norm": 0.19225291907787323, | |
| "learning_rate": 2.3656591973158865e-05, | |
| "loss": 0.9638100862503052, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.0765027322404372, | |
| "grad_norm": 0.355923056602478, | |
| "learning_rate": 2.358750428500632e-05, | |
| "loss": 0.6823252439498901, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.0819672131147542, | |
| "grad_norm": 0.1856895536184311, | |
| "learning_rate": 2.3518159389642338e-05, | |
| "loss": 1.0084342956542969, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.0874316939890711, | |
| "grad_norm": 0.34302467107772827, | |
| "learning_rate": 2.344855980360961e-05, | |
| "loss": 0.9227741360664368, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.092896174863388, | |
| "grad_norm": 0.5094830989837646, | |
| "learning_rate": 2.3378708052693634e-05, | |
| "loss": 1.070932388305664, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.098360655737705, | |
| "grad_norm": 0.7553594708442688, | |
| "learning_rate": 2.3308606671831005e-05, | |
| "loss": 1.1846566200256348, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.1038251366120218, | |
| "grad_norm": 0.38487228751182556, | |
| "learning_rate": 2.3238258205017463e-05, | |
| "loss": 1.138014316558838, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.1092896174863387, | |
| "grad_norm": 0.8587226271629333, | |
| "learning_rate": 2.3167665205215557e-05, | |
| "loss": 0.9664843678474426, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.1147540983606556, | |
| "grad_norm": 0.4061746597290039, | |
| "learning_rate": 2.3096830234261996e-05, | |
| "loss": 0.6715931296348572, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.1202185792349726, | |
| "grad_norm": 0.24730029702186584, | |
| "learning_rate": 2.302575586277466e-05, | |
| "loss": 1.2493940591812134, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.1256830601092895, | |
| "grad_norm": 0.5724151730537415, | |
| "learning_rate": 2.2954444670059364e-05, | |
| "loss": 0.8063728213310242, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.1311475409836065, | |
| "grad_norm": 0.36146894097328186, | |
| "learning_rate": 2.2882899244016197e-05, | |
| "loss": 0.7969419360160828, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.1366120218579234, | |
| "grad_norm": 0.5529263019561768, | |
| "learning_rate": 2.281112218104564e-05, | |
| "loss": 0.70838463306427, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.1420765027322404, | |
| "grad_norm": 0.1716151237487793, | |
| "learning_rate": 2.2739116085954323e-05, | |
| "loss": 0.9043253660202026, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.1475409836065573, | |
| "grad_norm": 0.19492962956428528, | |
| "learning_rate": 2.266688357186053e-05, | |
| "loss": 0.9036260843276978, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.1530054644808743, | |
| "grad_norm": 0.2229461371898651, | |
| "learning_rate": 2.2594427260099317e-05, | |
| "loss": 0.908428430557251, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.1584699453551912, | |
| "grad_norm": 0.726727306842804, | |
| "learning_rate": 2.2521749780127423e-05, | |
| "loss": 0.7567380666732788, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.1639344262295082, | |
| "grad_norm": 0.3632301390171051, | |
| "learning_rate": 2.2448853769427842e-05, | |
| "loss": 0.7121322154998779, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.169398907103825, | |
| "grad_norm": 0.2174682468175888, | |
| "learning_rate": 2.2375741873414082e-05, | |
| "loss": 1.1349387168884277, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.174863387978142, | |
| "grad_norm": 0.20269879698753357, | |
| "learning_rate": 2.230241674533419e-05, | |
| "loss": 0.8598660230636597, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.180327868852459, | |
| "grad_norm": 0.3191964030265808, | |
| "learning_rate": 2.2228881046174457e-05, | |
| "loss": 1.0039303302764893, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.185792349726776, | |
| "grad_norm": 0.7402692437171936, | |
| "learning_rate": 2.215513744456285e-05, | |
| "loss": 0.8903926014900208, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.1912568306010929, | |
| "grad_norm": 0.2626335322856903, | |
| "learning_rate": 2.2081188616672154e-05, | |
| "loss": 1.094512939453125, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.1967213114754098, | |
| "grad_norm": 0.1579216867685318, | |
| "learning_rate": 2.2007037246122874e-05, | |
| "loss": 0.9891207814216614, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.2021857923497268, | |
| "grad_norm": 0.2660261392593384, | |
| "learning_rate": 2.193268602388583e-05, | |
| "loss": 0.9108368158340454, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.2076502732240437, | |
| "grad_norm": 0.27433860301971436, | |
| "learning_rate": 2.185813764818452e-05, | |
| "loss": 1.0370347499847412, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.2131147540983607, | |
| "grad_norm": 0.22014309465885162, | |
| "learning_rate": 2.178339482439717e-05, | |
| "loss": 0.8952693939208984, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.2185792349726776, | |
| "grad_norm": 0.20270608365535736, | |
| "learning_rate": 2.1708460264958592e-05, | |
| "loss": 0.5965734720230103, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.2240437158469946, | |
| "grad_norm": 0.33037033677101135, | |
| "learning_rate": 2.1633336689261704e-05, | |
| "loss": 0.9093762040138245, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.2295081967213115, | |
| "grad_norm": 0.27016597986221313, | |
| "learning_rate": 2.15580268235589e-05, | |
| "loss": 1.0521835088729858, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.2349726775956285, | |
| "grad_norm": 0.3782821595668793, | |
| "learning_rate": 2.1482533400863062e-05, | |
| "loss": 0.9290086627006531, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.2404371584699454, | |
| "grad_norm": 0.48837918043136597, | |
| "learning_rate": 2.1406859160848404e-05, | |
| "loss": 1.107755184173584, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.2459016393442623, | |
| "grad_norm": 1.515816569328308, | |
| "learning_rate": 2.133100684975104e-05, | |
| "loss": 0.7968311905860901, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.2513661202185793, | |
| "grad_norm": 0.5359959006309509, | |
| "learning_rate": 2.1254979220269334e-05, | |
| "loss": 0.7603551745414734, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.2568306010928962, | |
| "grad_norm": 0.22337506711483002, | |
| "learning_rate": 2.1178779031463995e-05, | |
| "loss": 0.9174489974975586, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.2622950819672132, | |
| "grad_norm": 0.31684499979019165, | |
| "learning_rate": 2.110240904865794e-05, | |
| "loss": 0.7280531525611877, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.2677595628415301, | |
| "grad_norm": 0.5560224652290344, | |
| "learning_rate": 2.1025872043335962e-05, | |
| "loss": 1.0027216672897339, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.273224043715847, | |
| "grad_norm": 0.45167967677116394, | |
| "learning_rate": 2.094917079304413e-05, | |
| "loss": 0.956573486328125, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.278688524590164, | |
| "grad_norm": 0.25001007318496704, | |
| "learning_rate": 2.087230808128902e-05, | |
| "loss": 0.9677035808563232, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.2841530054644807, | |
| "grad_norm": 0.33943644165992737, | |
| "learning_rate": 2.079528669743666e-05, | |
| "loss": 0.9677251577377319, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.289617486338798, | |
| "grad_norm": 0.18122142553329468, | |
| "learning_rate": 2.0718109436611344e-05, | |
| "loss": 1.3085139989852905, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.2950819672131146, | |
| "grad_norm": 0.17135341465473175, | |
| "learning_rate": 2.0640779099594168e-05, | |
| "loss": 0.9852219820022583, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.3005464480874318, | |
| "grad_norm": 0.1457795798778534, | |
| "learning_rate": 2.0563298492721405e-05, | |
| "loss": 0.4569844901561737, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.3060109289617485, | |
| "grad_norm": 0.1674627661705017, | |
| "learning_rate": 2.048567042778265e-05, | |
| "loss": 0.9698683023452759, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.3114754098360657, | |
| "grad_norm": 0.268148273229599, | |
| "learning_rate": 2.0407897721918795e-05, | |
| "loss": 1.0876888036727905, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.3169398907103824, | |
| "grad_norm": 0.20479606091976166, | |
| "learning_rate": 2.0329983197519776e-05, | |
| "loss": 0.9315165877342224, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.3224043715846996, | |
| "grad_norm": 0.3611004948616028, | |
| "learning_rate": 2.025192968212217e-05, | |
| "loss": 0.6297778487205505, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.3278688524590163, | |
| "grad_norm": 0.27092111110687256, | |
| "learning_rate": 2.0173740008306553e-05, | |
| "loss": 1.0578755140304565, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.20668600499629974, | |
| "learning_rate": 2.0095417013594744e-05, | |
| "loss": 0.8676777482032776, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.3387978142076502, | |
| "grad_norm": 0.49492204189300537, | |
| "learning_rate": 2.0016963540346783e-05, | |
| "loss": 0.6902498602867126, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.3442622950819672, | |
| "grad_norm": 0.20374695956707, | |
| "learning_rate": 1.9938382435657833e-05, | |
| "loss": 1.097572922706604, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.349726775956284, | |
| "grad_norm": 0.14068134129047394, | |
| "learning_rate": 1.9859676551254815e-05, | |
| "loss": 0.8731942772865295, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.355191256830601, | |
| "grad_norm": 0.26389455795288086, | |
| "learning_rate": 1.9780848743392944e-05, | |
| "loss": 1.0970230102539062, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.360655737704918, | |
| "grad_norm": 1.5849382877349854, | |
| "learning_rate": 1.970190187275205e-05, | |
| "loss": 0.9134294986724854, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.366120218579235, | |
| "grad_norm": 0.8538482189178467, | |
| "learning_rate": 1.962283880433281e-05, | |
| "loss": 1.1809808015823364, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3715846994535519, | |
| "grad_norm": 0.20208939909934998, | |
| "learning_rate": 1.9543662407352717e-05, | |
| "loss": 0.9382311105728149, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.3770491803278688, | |
| "grad_norm": 0.24416698515415192, | |
| "learning_rate": 1.9464375555142e-05, | |
| "loss": 0.651623010635376, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.3825136612021858, | |
| "grad_norm": 0.2169644832611084, | |
| "learning_rate": 1.938498112503933e-05, | |
| "loss": 0.9664312601089478, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.3879781420765027, | |
| "grad_norm": 0.14838415384292603, | |
| "learning_rate": 1.9305481998287402e-05, | |
| "loss": 0.9927853941917419, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.3934426229508197, | |
| "grad_norm": 0.28129279613494873, | |
| "learning_rate": 1.9225881059928384e-05, | |
| "loss": 0.9994415044784546, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.3989071038251366, | |
| "grad_norm": 0.21077130734920502, | |
| "learning_rate": 1.9146181198699206e-05, | |
| "loss": 1.0227320194244385, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.4043715846994536, | |
| "grad_norm": 0.19347411394119263, | |
| "learning_rate": 1.9066385306926735e-05, | |
| "loss": 1.0043479204177856, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.4098360655737705, | |
| "grad_norm": 0.3762439787387848, | |
| "learning_rate": 1.898649628042281e-05, | |
| "loss": 0.6620403528213501, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.4153005464480874, | |
| "grad_norm": 0.28557923436164856, | |
| "learning_rate": 1.890651701837915e-05, | |
| "loss": 1.0753734111785889, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.4207650273224044, | |
| "grad_norm": 0.8509463667869568, | |
| "learning_rate": 1.882645042326214e-05, | |
| "loss": 0.7662308216094971, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.4262295081967213, | |
| "grad_norm": 0.20371294021606445, | |
| "learning_rate": 1.8746299400707507e-05, | |
| "loss": 1.0270415544509888, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.4316939890710383, | |
| "grad_norm": 1.3119806051254272, | |
| "learning_rate": 1.8666066859414873e-05, | |
| "loss": 0.944159209728241, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.4371584699453552, | |
| "grad_norm": 0.2648325562477112, | |
| "learning_rate": 1.8585755711042185e-05, | |
| "loss": 0.9048633575439453, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.4426229508196722, | |
| "grad_norm": 0.26933619379997253, | |
| "learning_rate": 1.8505368870100067e-05, | |
| "loss": 0.7111335396766663, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.4480874316939891, | |
| "grad_norm": 0.4313851594924927, | |
| "learning_rate": 1.8424909253846042e-05, | |
| "loss": 0.7980619668960571, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.453551912568306, | |
| "grad_norm": 0.4982314705848694, | |
| "learning_rate": 1.8344379782178663e-05, | |
| "loss": 0.6696621775627136, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.459016393442623, | |
| "grad_norm": 0.19477106630802155, | |
| "learning_rate": 1.826378337753156e-05, | |
| "loss": 0.9805558919906616, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.46448087431694, | |
| "grad_norm": 0.20714399218559265, | |
| "learning_rate": 1.8183122964767375e-05, | |
| "loss": 0.9931337237358093, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.469945355191257, | |
| "grad_norm": 0.22592397034168243, | |
| "learning_rate": 1.8102401471071608e-05, | |
| "loss": 0.9873198866844177, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.4754098360655736, | |
| "grad_norm": 0.19923336803913116, | |
| "learning_rate": 1.8021621825846425e-05, | |
| "loss": 0.9330631494522095, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.4808743169398908, | |
| "grad_norm": 0.20338214933872223, | |
| "learning_rate": 1.794078696060429e-05, | |
| "loss": 0.942470908164978, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.4863387978142075, | |
| "grad_norm": 0.24082419276237488, | |
| "learning_rate": 1.7859899808861654e-05, | |
| "loss": 0.8958922624588013, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.4918032786885247, | |
| "grad_norm": 0.6995041966438293, | |
| "learning_rate": 1.7778963306032424e-05, | |
| "loss": 1.174452781677246, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.4972677595628414, | |
| "grad_norm": 0.48068925738334656, | |
| "learning_rate": 1.769798038932149e-05, | |
| "loss": 0.552976667881012, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.5027322404371586, | |
| "grad_norm": 0.26130443811416626, | |
| "learning_rate": 1.7616953997618098e-05, | |
| "loss": 0.9382733106613159, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.5081967213114753, | |
| "grad_norm": 0.36456573009490967, | |
| "learning_rate": 1.753588707138923e-05, | |
| "loss": 0.9734442234039307, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.5136612021857925, | |
| "grad_norm": 0.2951594293117523, | |
| "learning_rate": 1.745478255257286e-05, | |
| "loss": 0.968082070350647, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.5191256830601092, | |
| "grad_norm": 0.20367763936519623, | |
| "learning_rate": 1.7373643384471216e-05, | |
| "loss": 0.9755566716194153, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.5245901639344264, | |
| "grad_norm": 0.3831861913204193, | |
| "learning_rate": 1.7292472511643944e-05, | |
| "loss": 0.8060733079910278, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.530054644808743, | |
| "grad_norm": 0.2314150035381317, | |
| "learning_rate": 1.7211272879801276e-05, | |
| "loss": 0.6359631419181824, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.5355191256830603, | |
| "grad_norm": 0.48030155897140503, | |
| "learning_rate": 1.7130047435697108e-05, | |
| "loss": 0.9271581172943115, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.540983606557377, | |
| "grad_norm": 0.30352166295051575, | |
| "learning_rate": 1.704879912702207e-05, | |
| "loss": 0.767744779586792, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.5464480874316942, | |
| "grad_norm": 0.20077812671661377, | |
| "learning_rate": 1.6967530902296557e-05, | |
| "loss": 0.9996009469032288, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.5519125683060109, | |
| "grad_norm": 0.537512481212616, | |
| "learning_rate": 1.6886245710763714e-05, | |
| "loss": 0.8161935210227966, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.5573770491803278, | |
| "grad_norm": 0.17293407022953033, | |
| "learning_rate": 1.6804946502282418e-05, | |
| "loss": 1.02741539478302, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.5628415300546448, | |
| "grad_norm": 0.26286932826042175, | |
| "learning_rate": 1.672363622722024e-05, | |
| "loss": 0.9144545197486877, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.5683060109289617, | |
| "grad_norm": 0.15417145192623138, | |
| "learning_rate": 1.6642317836346327e-05, | |
| "loss": 0.9791783690452576, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.5737704918032787, | |
| "grad_norm": 0.28066548705101013, | |
| "learning_rate": 1.656099428072438e-05, | |
| "loss": 0.757041871547699, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.5792349726775956, | |
| "grad_norm": 0.24074207246303558, | |
| "learning_rate": 1.647966851160553e-05, | |
| "loss": 1.023717999458313, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.5846994535519126, | |
| "grad_norm": 0.2572455406188965, | |
| "learning_rate": 1.6398343480321203e-05, | |
| "loss": 0.5426998138427734, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.5901639344262295, | |
| "grad_norm": 0.2543684244155884, | |
| "learning_rate": 1.631702213817609e-05, | |
| "loss": 0.6681845188140869, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.5956284153005464, | |
| "grad_norm": 0.4130007028579712, | |
| "learning_rate": 1.623570743634098e-05, | |
| "loss": 0.6237643957138062, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.6010928961748634, | |
| "grad_norm": 0.2560160458087921, | |
| "learning_rate": 1.6154402325745684e-05, | |
| "loss": 1.0540562868118286, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.6065573770491803, | |
| "grad_norm": 0.2821918725967407, | |
| "learning_rate": 1.6073109756971954e-05, | |
| "loss": 1.0359805822372437, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.6120218579234973, | |
| "grad_norm": 0.16164234280586243, | |
| "learning_rate": 1.5991832680146395e-05, | |
| "loss": 0.7261804938316345, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.6174863387978142, | |
| "grad_norm": 0.2133316546678543, | |
| "learning_rate": 1.5910574044833388e-05, | |
| "loss": 0.9429523944854736, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.6229508196721312, | |
| "grad_norm": 0.42204782366752625, | |
| "learning_rate": 1.5829336799928086e-05, | |
| "loss": 0.9018955826759338, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.6284153005464481, | |
| "grad_norm": 0.28047484159469604, | |
| "learning_rate": 1.5748123893549376e-05, | |
| "loss": 0.42149481177330017, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.633879781420765, | |
| "grad_norm": 0.19302046298980713, | |
| "learning_rate": 1.56669382729329e-05, | |
| "loss": 0.9071238040924072, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.639344262295082, | |
| "grad_norm": 0.5831403136253357, | |
| "learning_rate": 1.5585782884324064e-05, | |
| "loss": 1.0509930849075317, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.644808743169399, | |
| "grad_norm": 0.29063358902931213, | |
| "learning_rate": 1.5504660672871184e-05, | |
| "loss": 1.0455665588378906, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.650273224043716, | |
| "grad_norm": 0.1813768744468689, | |
| "learning_rate": 1.542357458251855e-05, | |
| "loss": 1.003379225730896, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.6557377049180326, | |
| "grad_norm": 0.2480282336473465, | |
| "learning_rate": 1.5342527555899614e-05, | |
| "loss": 1.0220948457717896, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.6612021857923498, | |
| "grad_norm": 0.27286913990974426, | |
| "learning_rate": 1.5261522534230188e-05, | |
| "loss": 0.7884365320205688, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.18817579746246338, | |
| "learning_rate": 1.5180562457201718e-05, | |
| "loss": 1.040960431098938, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.6721311475409837, | |
| "grad_norm": 1.272721529006958, | |
| "learning_rate": 1.5099650262874609e-05, | |
| "loss": 0.7100189924240112, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.6775956284153004, | |
| "grad_norm": 0.1792086809873581, | |
| "learning_rate": 1.5018788887571583e-05, | |
| "loss": 0.9524511694908142, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.6830601092896176, | |
| "grad_norm": 0.17531287670135498, | |
| "learning_rate": 1.4937981265771125e-05, | |
| "loss": 1.0636396408081055, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.6885245901639343, | |
| "grad_norm": 0.281142920255661, | |
| "learning_rate": 1.4857230330000991e-05, | |
| "loss": 0.5498223304748535, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.6939890710382515, | |
| "grad_norm": 0.2615275979042053, | |
| "learning_rate": 1.4776539010731797e-05, | |
| "loss": 0.8987367749214172, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.6994535519125682, | |
| "grad_norm": 0.3065132796764374, | |
| "learning_rate": 1.4695910236270654e-05, | |
| "loss": 1.20211923122406, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.7049180327868854, | |
| "grad_norm": 0.19763752818107605, | |
| "learning_rate": 1.461534693265491e-05, | |
| "loss": 0.6365981698036194, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.710382513661202, | |
| "grad_norm": 0.1835319548845291, | |
| "learning_rate": 1.4534852023545962e-05, | |
| "loss": 0.5819370150566101, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.7158469945355193, | |
| "grad_norm": 2.9100160598754883, | |
| "learning_rate": 1.4454428430123156e-05, | |
| "loss": 1.0549867153167725, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.721311475409836, | |
| "grad_norm": 0.22998031973838806, | |
| "learning_rate": 1.4374079070977762e-05, | |
| "loss": 0.7337666749954224, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.7267759562841531, | |
| "grad_norm": 0.182816281914711, | |
| "learning_rate": 1.4293806862007085e-05, | |
| "loss": 1.005784034729004, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.7322404371584699, | |
| "grad_norm": 0.1910344958305359, | |
| "learning_rate": 1.4213614716308627e-05, | |
| "loss": 1.0956374406814575, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.737704918032787, | |
| "grad_norm": 0.21634313464164734, | |
| "learning_rate": 1.4133505544074378e-05, | |
| "loss": 0.8941826820373535, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.7431693989071038, | |
| "grad_norm": 0.21307677030563354, | |
| "learning_rate": 1.4053482252485186e-05, | |
| "loss": 0.946021318435669, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.748633879781421, | |
| "grad_norm": 0.6593809723854065, | |
| "learning_rate": 1.3973547745605293e-05, | |
| "loss": 0.8592963814735413, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.7540983606557377, | |
| "grad_norm": 0.16732850670814514, | |
| "learning_rate": 1.389370492427691e-05, | |
| "loss": 1.0226490497589111, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.7595628415300546, | |
| "grad_norm": 0.1760009229183197, | |
| "learning_rate": 1.3813956686014959e-05, | |
| "loss": 1.0081747770309448, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.7650273224043715, | |
| "grad_norm": 0.4435974955558777, | |
| "learning_rate": 1.3734305924901915e-05, | |
| "loss": 0.6210038661956787, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.7704918032786885, | |
| "grad_norm": 0.19363141059875488, | |
| "learning_rate": 1.3654755531482788e-05, | |
| "loss": 0.9575290679931641, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.7759562841530054, | |
| "grad_norm": 0.16297434270381927, | |
| "learning_rate": 1.3575308392660233e-05, | |
| "loss": 0.9905856251716614, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.7814207650273224, | |
| "grad_norm": 0.19367915391921997, | |
| "learning_rate": 1.3495967391589758e-05, | |
| "loss": 0.9013992547988892, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.7868852459016393, | |
| "grad_norm": 0.19683308899402618, | |
| "learning_rate": 1.34167354075751e-05, | |
| "loss": 0.9609240293502808, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.7923497267759563, | |
| "grad_norm": 0.9740826487541199, | |
| "learning_rate": 1.3337615315963759e-05, | |
| "loss": 0.8889397382736206, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.7978142076502732, | |
| "grad_norm": 0.27928921580314636, | |
| "learning_rate": 1.325860998804262e-05, | |
| "loss": 0.7899873852729797, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.8032786885245902, | |
| "grad_norm": 2.0662078857421875, | |
| "learning_rate": 1.3179722290933771e-05, | |
| "loss": 0.8331339955329895, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.8087431693989071, | |
| "grad_norm": 0.20343828201293945, | |
| "learning_rate": 1.3100955087490452e-05, | |
| "loss": 0.9290274381637573, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.814207650273224, | |
| "grad_norm": 0.2821129560470581, | |
| "learning_rate": 1.3022311236193156e-05, | |
| "loss": 1.1835439205169678, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.819672131147541, | |
| "grad_norm": 0.1843961477279663, | |
| "learning_rate": 1.2943793591045901e-05, | |
| "loss": 1.011027455329895, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.825136612021858, | |
| "grad_norm": 0.16920003294944763, | |
| "learning_rate": 1.2865405001472647e-05, | |
| "loss": 1.0091710090637207, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.830601092896175, | |
| "grad_norm": 0.31314581632614136, | |
| "learning_rate": 1.2787148312213901e-05, | |
| "loss": 0.6333152055740356, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.8360655737704918, | |
| "grad_norm": 0.55765300989151, | |
| "learning_rate": 1.2709026363223477e-05, | |
| "loss": 1.0525486469268799, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.8415300546448088, | |
| "grad_norm": 0.19167383015155792, | |
| "learning_rate": 1.263104198956544e-05, | |
| "loss": 0.9962109327316284, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.8469945355191257, | |
| "grad_norm": 0.1964281052350998, | |
| "learning_rate": 1.2553198021311191e-05, | |
| "loss": 0.9106393456459045, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.8524590163934427, | |
| "grad_norm": 0.09598688036203384, | |
| "learning_rate": 1.247549728343681e-05, | |
| "loss": 0.22874563932418823, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.8579234972677594, | |
| "grad_norm": 0.1871129870414734, | |
| "learning_rate": 1.2397942595720501e-05, | |
| "loss": 0.9396315217018127, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.8633879781420766, | |
| "grad_norm": 0.1745700091123581, | |
| "learning_rate": 1.2320536772640275e-05, | |
| "loss": 0.9384387731552124, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.8688524590163933, | |
| "grad_norm": 0.1895333081483841, | |
| "learning_rate": 1.2243282623271807e-05, | |
| "loss": 0.8838564157485962, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.8743169398907105, | |
| "grad_norm": 0.3600115180015564, | |
| "learning_rate": 1.2166182951186503e-05, | |
| "loss": 0.6965083479881287, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.8797814207650272, | |
| "grad_norm": 0.2689080238342285, | |
| "learning_rate": 1.2089240554349744e-05, | |
| "loss": 0.995834231376648, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.8852459016393444, | |
| "grad_norm": 0.18152876198291779, | |
| "learning_rate": 1.2012458225019368e-05, | |
| "loss": 1.0340481996536255, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.890710382513661, | |
| "grad_norm": 0.32161420583724976, | |
| "learning_rate": 1.193583874964431e-05, | |
| "loss": 0.6437407732009888, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.8961748633879782, | |
| "grad_norm": 0.43549007177352905, | |
| "learning_rate": 1.1859384908763506e-05, | |
| "loss": 1.045131802558899, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.901639344262295, | |
| "grad_norm": 0.16052097082138062, | |
| "learning_rate": 1.1783099476904972e-05, | |
| "loss": 0.9282304644584656, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.9071038251366121, | |
| "grad_norm": 0.30643054842948914, | |
| "learning_rate": 1.1706985222485122e-05, | |
| "loss": 0.8635981678962708, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.9125683060109289, | |
| "grad_norm": 0.1655147671699524, | |
| "learning_rate": 1.1631044907708304e-05, | |
| "loss": 0.9465705156326294, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.918032786885246, | |
| "grad_norm": 0.22872859239578247, | |
| "learning_rate": 1.1555281288466546e-05, | |
| "loss": 0.8689624071121216, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.9234972677595628, | |
| "grad_norm": 0.2155844122171402, | |
| "learning_rate": 1.1479697114239569e-05, | |
| "loss": 0.9470728039741516, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.92896174863388, | |
| "grad_norm": 0.34669604897499084, | |
| "learning_rate": 1.1404295127994979e-05, | |
| "loss": 0.992061972618103, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.9344262295081966, | |
| "grad_norm": 0.1591545194387436, | |
| "learning_rate": 1.1329078066088732e-05, | |
| "loss": 0.8454440832138062, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.9398907103825138, | |
| "grad_norm": 0.14875464141368866, | |
| "learning_rate": 1.125404865816585e-05, | |
| "loss": 0.6483862400054932, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.9453551912568305, | |
| "grad_norm": 0.17959018051624298, | |
| "learning_rate": 1.1179209627061345e-05, | |
| "loss": 0.5884549021720886, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.9508196721311475, | |
| "grad_norm": 0.29064252972602844, | |
| "learning_rate": 1.1104563688701392e-05, | |
| "loss": 0.9911268353462219, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.9562841530054644, | |
| "grad_norm": 0.303365021944046, | |
| "learning_rate": 1.1030113552004807e-05, | |
| "loss": 0.7614367008209229, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.9617486338797814, | |
| "grad_norm": 0.22090303897857666, | |
| "learning_rate": 1.0955861918784711e-05, | |
| "loss": 0.9217373132705688, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.9672131147540983, | |
| "grad_norm": 0.3355685770511627, | |
| "learning_rate": 1.088181148365048e-05, | |
| "loss": 1.0169565677642822, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.9726775956284153, | |
| "grad_norm": 0.3264828622341156, | |
| "learning_rate": 1.0807964933909968e-05, | |
| "loss": 0.989068865776062, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.9781420765027322, | |
| "grad_norm": 0.22458317875862122, | |
| "learning_rate": 1.0734324949471974e-05, | |
| "loss": 0.9663054347038269, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.9836065573770492, | |
| "grad_norm": 0.5973827242851257, | |
| "learning_rate": 1.0660894202749005e-05, | |
| "loss": 1.044677972793579, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.989071038251366, | |
| "grad_norm": 0.17764398455619812, | |
| "learning_rate": 1.0587675358560278e-05, | |
| "loss": 0.9577726125717163, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.994535519125683, | |
| "grad_norm": 0.32449790835380554, | |
| "learning_rate": 1.0514671074035003e-05, | |
| "loss": 0.9161467552185059, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.22582565248012543, | |
| "learning_rate": 1.0441883998515987e-05, | |
| "loss": 1.003676176071167, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.0054644808743167, | |
| "grad_norm": 0.14095257222652435, | |
| "learning_rate": 1.0369316773463461e-05, | |
| "loss": 0.3889709413051605, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.010928961748634, | |
| "grad_norm": 0.25997933745384216, | |
| "learning_rate": 1.029697203235924e-05, | |
| "loss": 0.5877363681793213, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.0163934426229506, | |
| "grad_norm": 0.1753978580236435, | |
| "learning_rate": 1.0224852400611125e-05, | |
| "loss": 0.6436468958854675, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.021857923497268, | |
| "grad_norm": 0.2699877619743347, | |
| "learning_rate": 1.0152960495457662e-05, | |
| "loss": 0.621852457523346, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.0273224043715845, | |
| "grad_norm": 0.4822777509689331, | |
| "learning_rate": 1.008129892587314e-05, | |
| "loss": 0.45858633518218994, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.0327868852459017, | |
| "grad_norm": 3.2039668560028076, | |
| "learning_rate": 1.0009870292472921e-05, | |
| "loss": 0.5885698199272156, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.0382513661202184, | |
| "grad_norm": 0.3171454668045044, | |
| "learning_rate": 9.938677187419039e-06, | |
| "loss": 0.4143402576446533, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.0437158469945356, | |
| "grad_norm": 0.3015151917934418, | |
| "learning_rate": 9.867722194326169e-06, | |
| "loss": 0.6467013359069824, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.0491803278688523, | |
| "grad_norm": 0.1930829882621765, | |
| "learning_rate": 9.797007888167837e-06, | |
| "loss": 0.3702736496925354, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.0546448087431695, | |
| "grad_norm": 1.4496486186981201, | |
| "learning_rate": 9.72653683518299e-06, | |
| "loss": 0.40615609288215637, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 2.060109289617486, | |
| "grad_norm": 0.8078898191452026, | |
| "learning_rate": 9.656311592782845e-06, | |
| "loss": 0.665539026260376, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.0655737704918034, | |
| "grad_norm": 0.5273921489715576, | |
| "learning_rate": 9.586334709458108e-06, | |
| "loss": 0.7102009654045105, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 2.07103825136612, | |
| "grad_norm": 0.3275352716445923, | |
| "learning_rate": 9.516608724686478e-06, | |
| "loss": 0.6393009424209595, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 2.0765027322404372, | |
| "grad_norm": 0.2689841091632843, | |
| "learning_rate": 9.447136168840466e-06, | |
| "loss": 0.6792262196540833, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.081967213114754, | |
| "grad_norm": 0.26896512508392334, | |
| "learning_rate": 9.37791956309561e-06, | |
| "loss": 0.8553983569145203, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 2.087431693989071, | |
| "grad_norm": 0.21515020728111267, | |
| "learning_rate": 9.308961419338943e-06, | |
| "loss": 0.6522472500801086, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 2.092896174863388, | |
| "grad_norm": 0.35790205001831055, | |
| "learning_rate": 9.240264240077867e-06, | |
| "loss": 0.4608457088470459, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 2.098360655737705, | |
| "grad_norm": 0.2197129875421524, | |
| "learning_rate": 9.171830518349296e-06, | |
| "loss": 0.5415955781936646, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.1038251366120218, | |
| "grad_norm": 0.29368406534194946, | |
| "learning_rate": 9.10366273762923e-06, | |
| "loss": 0.5925282835960388, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.109289617486339, | |
| "grad_norm": 1.7829784154891968, | |
| "learning_rate": 9.035763371742596e-06, | |
| "loss": 0.5528727173805237, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 2.1147540983606556, | |
| "grad_norm": 0.2791202962398529, | |
| "learning_rate": 8.968134884773492e-06, | |
| "loss": 0.5645961761474609, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 2.120218579234973, | |
| "grad_norm": 0.31881171464920044, | |
| "learning_rate": 8.900779730975739e-06, | |
| "loss": 0.4499683082103729, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 2.1256830601092895, | |
| "grad_norm": 1.1030975580215454, | |
| "learning_rate": 8.83370035468385e-06, | |
| "loss": 0.5326492786407471, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 2.1311475409836067, | |
| "grad_norm": 0.23826377093791962, | |
| "learning_rate": 8.766899190224297e-06, | |
| "loss": 0.5621083974838257, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.1366120218579234, | |
| "grad_norm": 0.22605067491531372, | |
| "learning_rate": 8.700378661827188e-06, | |
| "loss": 0.5861777663230896, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 2.1420765027322406, | |
| "grad_norm": 0.2230478972196579, | |
| "learning_rate": 8.634141183538265e-06, | |
| "loss": 0.6019716858863831, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 2.1475409836065573, | |
| "grad_norm": 0.13455802202224731, | |
| "learning_rate": 8.568189159131328e-06, | |
| "loss": 0.2907164692878723, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 2.1530054644808745, | |
| "grad_norm": 0.8440735340118408, | |
| "learning_rate": 8.502524982020986e-06, | |
| "loss": 0.5836420059204102, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 2.158469945355191, | |
| "grad_norm": 0.3224068284034729, | |
| "learning_rate": 8.4371510351758e-06, | |
| "loss": 0.5517048239707947, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.1639344262295084, | |
| "grad_norm": 0.2886510491371155, | |
| "learning_rate": 8.372069691031804e-06, | |
| "loss": 0.44468042254447937, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.169398907103825, | |
| "grad_norm": 1.749972939491272, | |
| "learning_rate": 8.307283311406416e-06, | |
| "loss": 0.5100968480110168, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.1748633879781423, | |
| "grad_norm": 0.2291492521762848, | |
| "learning_rate": 8.242794247412717e-06, | |
| "loss": 0.49309858679771423, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.180327868852459, | |
| "grad_norm": 0.23026710748672485, | |
| "learning_rate": 8.178604839374129e-06, | |
| "loss": 0.5055176615715027, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.185792349726776, | |
| "grad_norm": 0.6971049904823303, | |
| "learning_rate": 8.114717416739497e-06, | |
| "loss": 0.6149801015853882, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.191256830601093, | |
| "grad_norm": 0.46303942799568176, | |
| "learning_rate": 8.051134297998542e-06, | |
| "loss": 0.5053858757019043, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 2.19672131147541, | |
| "grad_norm": 0.2438529133796692, | |
| "learning_rate": 7.987857790597723e-06, | |
| "loss": 0.739100992679596, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 2.202185792349727, | |
| "grad_norm": 0.4665067791938782, | |
| "learning_rate": 7.924890190856498e-06, | |
| "loss": 0.6623398661613464, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 2.2076502732240435, | |
| "grad_norm": 0.11752183735370636, | |
| "learning_rate": 7.862233783883996e-06, | |
| "loss": 0.30094265937805176, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 2.2131147540983607, | |
| "grad_norm": 0.2757886052131653, | |
| "learning_rate": 7.799890843496084e-06, | |
| "loss": 0.30672988295555115, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.2185792349726774, | |
| "grad_norm": 0.23019720613956451, | |
| "learning_rate": 7.737863632132867e-06, | |
| "loss": 0.608177900314331, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.2240437158469946, | |
| "grad_norm": 0.27627524733543396, | |
| "learning_rate": 7.67615440077654e-06, | |
| "loss": 0.6392920017242432, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 2.2295081967213113, | |
| "grad_norm": 0.24705146253108978, | |
| "learning_rate": 7.614765388869751e-06, | |
| "loss": 0.594744861125946, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.2349726775956285, | |
| "grad_norm": 0.26148557662963867, | |
| "learning_rate": 7.553698824234304e-06, | |
| "loss": 0.5958356261253357, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 2.240437158469945, | |
| "grad_norm": 0.18818399310112, | |
| "learning_rate": 7.492956922990313e-06, | |
| "loss": 0.4794551134109497, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.2459016393442623, | |
| "grad_norm": 0.4666682481765747, | |
| "learning_rate": 7.4325418894757735e-06, | |
| "loss": 0.5876472592353821, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 2.251366120218579, | |
| "grad_norm": 0.3914511799812317, | |
| "learning_rate": 7.3724559161665876e-06, | |
| "loss": 0.7053080797195435, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 2.2568306010928962, | |
| "grad_norm": 5.045498371124268, | |
| "learning_rate": 7.312701183596972e-06, | |
| "loss": 0.4529895484447479, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.262295081967213, | |
| "grad_norm": 0.6430360674858093, | |
| "learning_rate": 7.25327986028035e-06, | |
| "loss": 0.4203529953956604, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.26775956284153, | |
| "grad_norm": 0.22158369421958923, | |
| "learning_rate": 7.19419410263063e-06, | |
| "loss": 0.578734815120697, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.273224043715847, | |
| "grad_norm": 0.2735978364944458, | |
| "learning_rate": 7.135446054883974e-06, | |
| "loss": 0.3725210726261139, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.278688524590164, | |
| "grad_norm": 0.24813127517700195, | |
| "learning_rate": 7.077037849020968e-06, | |
| "loss": 0.524337649345398, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.2841530054644807, | |
| "grad_norm": 0.4815691113471985, | |
| "learning_rate": 7.01897160468926e-06, | |
| "loss": 0.7133941054344177, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.289617486338798, | |
| "grad_norm": 0.9871602058410645, | |
| "learning_rate": 6.9612494291266196e-06, | |
| "loss": 0.6212149858474731, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.2950819672131146, | |
| "grad_norm": 0.3345879912376404, | |
| "learning_rate": 6.903873417084491e-06, | |
| "loss": 0.4452197253704071, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.300546448087432, | |
| "grad_norm": 0.20456527173519135, | |
| "learning_rate": 6.846845650751961e-06, | |
| "loss": 0.6149989366531372, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.3060109289617485, | |
| "grad_norm": 0.27311307191848755, | |
| "learning_rate": 6.790168199680199e-06, | |
| "loss": 0.578364908695221, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.3114754098360657, | |
| "grad_norm": 0.21572433412075043, | |
| "learning_rate": 6.733843120707336e-06, | |
| "loss": 0.3441348075866699, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.3169398907103824, | |
| "grad_norm": 0.1840299516916275, | |
| "learning_rate": 6.677872457883853e-06, | |
| "loss": 0.5099568963050842, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.3224043715846996, | |
| "grad_norm": 0.22911682724952698, | |
| "learning_rate": 6.622258242398381e-06, | |
| "loss": 0.5536096692085266, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.3278688524590163, | |
| "grad_norm": 0.43230611085891724, | |
| "learning_rate": 6.567002492503983e-06, | |
| "loss": 0.5580962300300598, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.272631973028183, | |
| "learning_rate": 6.512107213444929e-06, | |
| "loss": 0.476674348115921, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.33879781420765, | |
| "grad_norm": 0.1946922391653061, | |
| "learning_rate": 6.457574397383919e-06, | |
| "loss": 0.7618679404258728, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.3442622950819674, | |
| "grad_norm": 0.259090781211853, | |
| "learning_rate": 6.403406023329784e-06, | |
| "loss": 0.4828750193119049, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.349726775956284, | |
| "grad_norm": 0.2866133153438568, | |
| "learning_rate": 6.349604057065658e-06, | |
| "loss": 0.5762677192687988, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.3551912568306013, | |
| "grad_norm": 0.25988462567329407, | |
| "learning_rate": 6.296170451077658e-06, | |
| "loss": 0.5723397731781006, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.360655737704918, | |
| "grad_norm": 0.22316047549247742, | |
| "learning_rate": 6.243107144484023e-06, | |
| "loss": 0.44569721817970276, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.366120218579235, | |
| "grad_norm": 0.5619032382965088, | |
| "learning_rate": 6.190416062964731e-06, | |
| "loss": 0.5945219397544861, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.371584699453552, | |
| "grad_norm": 0.2615291476249695, | |
| "learning_rate": 6.138099118691626e-06, | |
| "loss": 0.5078909993171692, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.3770491803278686, | |
| "grad_norm": 0.2805182933807373, | |
| "learning_rate": 6.086158210259026e-06, | |
| "loss": 0.560880184173584, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.3825136612021858, | |
| "grad_norm": 0.21691034734249115, | |
| "learning_rate": 6.03459522261482e-06, | |
| "loss": 0.7180853486061096, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.387978142076503, | |
| "grad_norm": 0.20864130556583405, | |
| "learning_rate": 5.9834120269920505e-06, | |
| "loss": 0.5531293749809265, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.3934426229508197, | |
| "grad_norm": 4.040912628173828, | |
| "learning_rate": 5.9326104808410305e-06, | |
| "loss": 0.6732316017150879, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.3989071038251364, | |
| "grad_norm": 0.21024088561534882, | |
| "learning_rate": 5.882192427761917e-06, | |
| "loss": 0.5328571200370789, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.4043715846994536, | |
| "grad_norm": 0.1811281442642212, | |
| "learning_rate": 5.832159697437816e-06, | |
| "loss": 0.5717867016792297, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.4098360655737707, | |
| "grad_norm": 0.25271499156951904, | |
| "learning_rate": 5.7825141055683785e-06, | |
| "loss": 0.36495983600616455, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.4153005464480874, | |
| "grad_norm": 0.2296585738658905, | |
| "learning_rate": 5.733257453803902e-06, | |
| "loss": 0.6167256832122803, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.420765027322404, | |
| "grad_norm": 0.2678411900997162, | |
| "learning_rate": 5.684391529679966e-06, | |
| "loss": 0.6026586294174194, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.4262295081967213, | |
| "grad_norm": 0.2298223376274109, | |
| "learning_rate": 5.635918106552542e-06, | |
| "loss": 0.3028765618801117, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.431693989071038, | |
| "grad_norm": 0.19897599518299103, | |
| "learning_rate": 5.587838943533659e-06, | |
| "loss": 0.4490368664264679, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.4371584699453552, | |
| "grad_norm": 0.43332573771476746, | |
| "learning_rate": 5.540155785427538e-06, | |
| "loss": 0.6402949690818787, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.442622950819672, | |
| "grad_norm": 0.22874437272548676, | |
| "learning_rate": 5.4928703626672996e-06, | |
| "loss": 0.3874991536140442, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.448087431693989, | |
| "grad_norm": 0.21198917925357819, | |
| "learning_rate": 5.445984391252152e-06, | |
| "loss": 0.5726388096809387, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.453551912568306, | |
| "grad_norm": 0.30260413885116577, | |
| "learning_rate": 5.3994995726851136e-06, | |
| "loss": 0.6022024750709534, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.459016393442623, | |
| "grad_norm": 0.29864200949668884, | |
| "learning_rate": 5.353417593911275e-06, | |
| "loss": 0.5415441989898682, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.4644808743169397, | |
| "grad_norm": 0.454162061214447, | |
| "learning_rate": 5.30774012725658e-06, | |
| "loss": 0.6477721333503723, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.469945355191257, | |
| "grad_norm": 0.211436927318573, | |
| "learning_rate": 5.262468830367125e-06, | |
| "loss": 0.6445870995521545, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.4754098360655736, | |
| "grad_norm": 0.5261885523796082, | |
| "learning_rate": 5.217605346149008e-06, | |
| "loss": 0.6112150549888611, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.480874316939891, | |
| "grad_norm": 0.07862430065870285, | |
| "learning_rate": 5.173151302708711e-06, | |
| "loss": 0.1697760969400406, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.4863387978142075, | |
| "grad_norm": 0.08352333307266235, | |
| "learning_rate": 5.129108313294017e-06, | |
| "loss": 0.5381441712379456, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.4918032786885247, | |
| "grad_norm": 0.17970986664295197, | |
| "learning_rate": 5.085477976235461e-06, | |
| "loss": 0.3261277377605438, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.4972677595628414, | |
| "grad_norm": 0.27545392513275146, | |
| "learning_rate": 5.042261874888313e-06, | |
| "loss": 0.5439776182174683, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.5027322404371586, | |
| "grad_norm": 0.18720275163650513, | |
| "learning_rate": 4.9994615775751455e-06, | |
| "loss": 0.47730955481529236, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.5081967213114753, | |
| "grad_norm": 0.23059923946857452, | |
| "learning_rate": 4.957078637528901e-06, | |
| "loss": 0.4923263192176819, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.5136612021857925, | |
| "grad_norm": 0.24093477427959442, | |
| "learning_rate": 4.915114592836523e-06, | |
| "loss": 0.47829610109329224, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.519125683060109, | |
| "grad_norm": 0.19941234588623047, | |
| "learning_rate": 4.873570966383147e-06, | |
| "loss": 0.5637532472610474, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.5245901639344264, | |
| "grad_norm": 0.3805748522281647, | |
| "learning_rate": 4.832449265796828e-06, | |
| "loss": 0.46285977959632874, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.530054644808743, | |
| "grad_norm": 0.18005476891994476, | |
| "learning_rate": 4.791750983393834e-06, | |
| "loss": 0.4361620843410492, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.5355191256830603, | |
| "grad_norm": 0.49426957964897156, | |
| "learning_rate": 4.751477596124489e-06, | |
| "loss": 0.3771613836288452, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.540983606557377, | |
| "grad_norm": 0.349456250667572, | |
| "learning_rate": 4.711630565519564e-06, | |
| "loss": 0.5420392155647278, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.546448087431694, | |
| "grad_norm": 0.7472839951515198, | |
| "learning_rate": 4.6722113376372515e-06, | |
| "loss": 0.48701003193855286, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.551912568306011, | |
| "grad_norm": 0.656858503818512, | |
| "learning_rate": 4.63322134301068e-06, | |
| "loss": 0.6057359576225281, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.557377049180328, | |
| "grad_norm": 0.6535321474075317, | |
| "learning_rate": 4.594661996596006e-06, | |
| "loss": 0.5973996520042419, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.5628415300546448, | |
| "grad_norm": 0.903624951839447, | |
| "learning_rate": 4.556534697721051e-06, | |
| "loss": 0.37523767352104187, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.5683060109289615, | |
| "grad_norm": 0.5161249041557312, | |
| "learning_rate": 4.518840830034534e-06, | |
| "loss": 0.6633897423744202, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.5737704918032787, | |
| "grad_norm": 0.31110140681266785, | |
| "learning_rate": 4.4815817614558535e-06, | |
| "loss": 0.44261860847473145, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.579234972677596, | |
| "grad_norm": 0.25676819682121277, | |
| "learning_rate": 4.444758844125447e-06, | |
| "loss": 0.5755467414855957, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.5846994535519126, | |
| "grad_norm": 0.07289361208677292, | |
| "learning_rate": 4.408373414355716e-06, | |
| "loss": 0.1789170801639557, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.5901639344262293, | |
| "grad_norm": 0.22974097728729248, | |
| "learning_rate": 4.37242679258254e-06, | |
| "loss": 0.4640808701515198, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.5956284153005464, | |
| "grad_norm": 0.4629270136356354, | |
| "learning_rate": 4.336920283317344e-06, | |
| "loss": 0.3182026743888855, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.6010928961748636, | |
| "grad_norm": 0.37970679998397827, | |
| "learning_rate": 4.3018551750997694e-06, | |
| "loss": 0.6208051443099976, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.6065573770491803, | |
| "grad_norm": 0.3654148578643799, | |
| "learning_rate": 4.267232740450912e-06, | |
| "loss": 0.378692626953125, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.612021857923497, | |
| "grad_norm": 0.09711217880249023, | |
| "learning_rate": 4.233054235827138e-06, | |
| "loss": 0.19142156839370728, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.6174863387978142, | |
| "grad_norm": 0.3945452570915222, | |
| "learning_rate": 4.199320901574488e-06, | |
| "loss": 0.3060496747493744, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.6229508196721314, | |
| "grad_norm": 0.26368603110313416, | |
| "learning_rate": 4.166033961883656e-06, | |
| "loss": 0.5142332911491394, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.628415300546448, | |
| "grad_norm": 0.2471064180135727, | |
| "learning_rate": 4.133194624745581e-06, | |
| "loss": 0.3456544578075409, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.633879781420765, | |
| "grad_norm": 0.24647603929042816, | |
| "learning_rate": 4.100804081907598e-06, | |
| "loss": 0.5130555033683777, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.639344262295082, | |
| "grad_norm": 0.24391725659370422, | |
| "learning_rate": 4.068863508830188e-06, | |
| "loss": 0.5546358823776245, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.644808743169399, | |
| "grad_norm": 0.2519559860229492, | |
| "learning_rate": 4.037374064644322e-06, | |
| "loss": 0.45380809903144836, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.650273224043716, | |
| "grad_norm": 0.38750696182250977, | |
| "learning_rate": 4.006336892109398e-06, | |
| "loss": 0.5361884236335754, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.6557377049180326, | |
| "grad_norm": 0.2832329273223877, | |
| "learning_rate": 3.9757531175717695e-06, | |
| "loss": 0.5075885653495789, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.66120218579235, | |
| "grad_norm": 0.28929048776626587, | |
| "learning_rate": 3.945623850923867e-06, | |
| "loss": 0.40441015362739563, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.26623356342315674, | |
| "learning_rate": 3.915950185563923e-06, | |
| "loss": 0.5119766592979431, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.6721311475409837, | |
| "grad_norm": 0.22525067627429962, | |
| "learning_rate": 3.886733198356292e-06, | |
| "loss": 0.5085029602050781, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.6775956284153004, | |
| "grad_norm": 0.3791970908641815, | |
| "learning_rate": 3.857973949592365e-06, | |
| "loss": 0.8541042804718018, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.6830601092896176, | |
| "grad_norm": 0.23859044909477234, | |
| "learning_rate": 3.829673482952108e-06, | |
| "loss": 0.4586790204048157, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.6885245901639343, | |
| "grad_norm": 0.15074767172336578, | |
| "learning_rate": 3.8018328254661618e-06, | |
| "loss": 0.3177950382232666, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.6939890710382515, | |
| "grad_norm": 0.2635786533355713, | |
| "learning_rate": 3.77445298747859e-06, | |
| "loss": 0.5969187021255493, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.699453551912568, | |
| "grad_norm": 0.25986602902412415, | |
| "learning_rate": 3.7475349626102107e-06, | |
| "loss": 0.6235385537147522, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.7049180327868854, | |
| "grad_norm": 0.3215150237083435, | |
| "learning_rate": 3.721079727722524e-06, | |
| "loss": 0.6164737939834595, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.710382513661202, | |
| "grad_norm": 0.19917510449886322, | |
| "learning_rate": 3.6950882428822834e-06, | |
| "loss": 0.28230756521224976, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.7158469945355193, | |
| "grad_norm": 0.18247978389263153, | |
| "learning_rate": 3.669561451326635e-06, | |
| "loss": 0.5849312543869019, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.721311475409836, | |
| "grad_norm": 0.18750226497650146, | |
| "learning_rate": 3.6445002794289026e-06, | |
| "loss": 0.6486452221870422, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.726775956284153, | |
| "grad_norm": 0.23418167233467102, | |
| "learning_rate": 3.6199056366649534e-06, | |
| "loss": 0.5458055734634399, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.73224043715847, | |
| "grad_norm": 0.5310984253883362, | |
| "learning_rate": 3.5957784155802096e-06, | |
| "loss": 0.575606107711792, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.737704918032787, | |
| "grad_norm": 0.2529151141643524, | |
| "learning_rate": 3.572119491757245e-06, | |
| "loss": 0.5274743437767029, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.7431693989071038, | |
| "grad_norm": 0.2487734705209732, | |
| "learning_rate": 3.5489297237840187e-06, | |
| "loss": 0.5259593725204468, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.748633879781421, | |
| "grad_norm": 0.5198191404342651, | |
| "learning_rate": 3.5262099532227064e-06, | |
| "loss": 0.32947495579719543, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.7540983606557377, | |
| "grad_norm": 0.32850825786590576, | |
| "learning_rate": 3.5039610045791717e-06, | |
| "loss": 0.5193657875061035, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.7595628415300544, | |
| "grad_norm": 0.33162155747413635, | |
| "learning_rate": 3.4821836852730397e-06, | |
| "loss": 0.5816477537155151, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.7650273224043715, | |
| "grad_norm": 0.39614418148994446, | |
| "learning_rate": 3.460878785608393e-06, | |
| "loss": 0.5574301481246948, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.7704918032786887, | |
| "grad_norm": 0.32036471366882324, | |
| "learning_rate": 3.440047078745089e-06, | |
| "loss": 0.5831928253173828, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.7759562841530054, | |
| "grad_norm": 0.34923821687698364, | |
| "learning_rate": 3.4196893206707136e-06, | |
| "loss": 0.49546846747398376, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.781420765027322, | |
| "grad_norm": 0.18867231905460358, | |
| "learning_rate": 3.3998062501731353e-06, | |
| "loss": 0.4363085925579071, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.7868852459016393, | |
| "grad_norm": 0.534398078918457, | |
| "learning_rate": 3.3803985888136974e-06, | |
| "loss": 0.49442851543426514, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.7923497267759565, | |
| "grad_norm": 2.4884400367736816, | |
| "learning_rate": 3.3614670409010353e-06, | |
| "loss": 0.4803468585014343, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.797814207650273, | |
| "grad_norm": 0.29524970054626465, | |
| "learning_rate": 3.3430122934655115e-06, | |
| "loss": 0.3448233902454376, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.80327868852459, | |
| "grad_norm": 0.2110273241996765, | |
| "learning_rate": 3.3250350162342862e-06, | |
| "loss": 0.744547963142395, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.808743169398907, | |
| "grad_norm": 0.7826948761940002, | |
| "learning_rate": 3.3075358616070156e-06, | |
| "loss": 0.29925408959388733, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.8142076502732243, | |
| "grad_norm": 0.2872641980648041, | |
| "learning_rate": 3.2905154646321698e-06, | |
| "loss": 0.6437289714813232, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.819672131147541, | |
| "grad_norm": 0.1935124546289444, | |
| "learning_rate": 3.273974442983994e-06, | |
| "loss": 0.4832029342651367, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.8251366120218577, | |
| "grad_norm": 0.689404308795929, | |
| "learning_rate": 3.2579133969400855e-06, | |
| "loss": 0.44030508399009705, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.830601092896175, | |
| "grad_norm": 0.4986112117767334, | |
| "learning_rate": 3.2423329093596135e-06, | |
| "loss": 0.5633320212364197, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.836065573770492, | |
| "grad_norm": 0.3007761240005493, | |
| "learning_rate": 3.2272335456621674e-06, | |
| "loss": 0.5471495389938354, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.841530054644809, | |
| "grad_norm": 0.31454378366470337, | |
| "learning_rate": 3.2126158538072387e-06, | |
| "loss": 0.6150797605514526, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.8469945355191255, | |
| "grad_norm": 0.23484109342098236, | |
| "learning_rate": 3.1984803642743314e-06, | |
| "loss": 0.5517333149909973, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.8524590163934427, | |
| "grad_norm": 0.17831581830978394, | |
| "learning_rate": 3.184827590043711e-06, | |
| "loss": 0.39860761165618896, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.8579234972677594, | |
| "grad_norm": 0.17937374114990234, | |
| "learning_rate": 3.1716580265777947e-06, | |
| "loss": 0.5375534892082214, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.8633879781420766, | |
| "grad_norm": 0.06227630004286766, | |
| "learning_rate": 3.158972151803164e-06, | |
| "loss": 0.2906738519668579, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.8688524590163933, | |
| "grad_norm": 0.9109938740730286, | |
| "learning_rate": 3.1467704260932225e-06, | |
| "loss": 0.5708995461463928, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.8743169398907105, | |
| "grad_norm": 0.26088184118270874, | |
| "learning_rate": 3.135053292251489e-06, | |
| "loss": 0.4835635721683502, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.879781420765027, | |
| "grad_norm": 0.3253296911716461, | |
| "learning_rate": 3.123821175495531e-06, | |
| "loss": 0.5986778140068054, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.8852459016393444, | |
| "grad_norm": 1.1687480211257935, | |
| "learning_rate": 3.1130744834415284e-06, | |
| "loss": 0.6116738319396973, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.890710382513661, | |
| "grad_norm": 0.525957465171814, | |
| "learning_rate": 3.1028136060894867e-06, | |
| "loss": 0.381021648645401, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.8961748633879782, | |
| "grad_norm": 0.23480363190174103, | |
| "learning_rate": 3.0930389158090754e-06, | |
| "loss": 0.5317255258560181, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.901639344262295, | |
| "grad_norm": 0.21484972536563873, | |
| "learning_rate": 3.0837507673261266e-06, | |
| "loss": 0.5115377306938171, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.907103825136612, | |
| "grad_norm": 0.74167799949646, | |
| "learning_rate": 3.0749494977097482e-06, | |
| "loss": 0.6704295873641968, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.912568306010929, | |
| "grad_norm": 0.1196637749671936, | |
| "learning_rate": 3.066635426360105e-06, | |
| "loss": 0.40651005506515503, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.918032786885246, | |
| "grad_norm": 0.197874054312706, | |
| "learning_rate": 3.058808854996815e-06, | |
| "loss": 0.39443740248680115, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.9234972677595628, | |
| "grad_norm": 0.3340167999267578, | |
| "learning_rate": 3.051470067648011e-06, | |
| "loss": 0.6645128726959229, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.92896174863388, | |
| "grad_norm": 0.21869197487831116, | |
| "learning_rate": 3.044619330640027e-06, | |
| "loss": 0.5734466314315796, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.9344262295081966, | |
| "grad_norm": 0.2982349991798401, | |
| "learning_rate": 3.038256892587734e-06, | |
| "loss": 0.24794718623161316, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.939890710382514, | |
| "grad_norm": 0.29161694645881653, | |
| "learning_rate": 3.032382984385516e-06, | |
| "loss": 0.26806071400642395, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.9453551912568305, | |
| "grad_norm": 0.288613885641098, | |
| "learning_rate": 3.026997819198895e-06, | |
| "loss": 0.43651899695396423, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.9508196721311473, | |
| "grad_norm": 0.2575402557849884, | |
| "learning_rate": 3.022101592456795e-06, | |
| "loss": 0.4721393287181854, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.9562841530054644, | |
| "grad_norm": 0.2845239043235779, | |
| "learning_rate": 3.0176944818444437e-06, | |
| "loss": 0.45804911851882935, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.9617486338797816, | |
| "grad_norm": 0.1906892955303192, | |
| "learning_rate": 3.013776647296931e-06, | |
| "loss": 0.49305209517478943, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.9672131147540983, | |
| "grad_norm": 0.2843456268310547, | |
| "learning_rate": 3.010348230993402e-06, | |
| "loss": 0.35434451699256897, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.972677595628415, | |
| "grad_norm": 0.32488515973091125, | |
| "learning_rate": 3.007409357351896e-06, | |
| "loss": 0.631571352481842, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.978142076502732, | |
| "grad_norm": 0.23308050632476807, | |
| "learning_rate": 3.004960133024837e-06, | |
| "loss": 0.40969452261924744, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.9836065573770494, | |
| "grad_norm": 0.20882023870944977, | |
| "learning_rate": 3.0030006468951557e-06, | |
| "loss": 0.5743120908737183, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.989071038251366, | |
| "grad_norm": 0.3456875681877136, | |
| "learning_rate": 3.001530970073067e-06, | |
| "loss": 0.6334488391876221, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.994535519125683, | |
| "grad_norm": 1.099088430404663, | |
| "learning_rate": 3.0005511558934945e-06, | |
| "loss": 0.37412285804748535, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.584098756313324, | |
| "learning_rate": 3.0000612399141247e-06, | |
| "loss": 0.5203018188476562, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1098, | |
| "total_flos": 4.931331991116186e+18, | |
| "train_loss": 0.88325994085853, | |
| "train_runtime": 11968.8346, | |
| "train_samples_per_second": 5.504, | |
| "train_steps_per_second": 0.092 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1098, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.931331991116186e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |