Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-35 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-35 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-35") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-35") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-35") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-35 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-35" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-35", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-35
- SGLang
How to use furproxy/9b-35 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-35" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-35", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-35" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-35", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-35 with Docker Model Runner:
docker model run hf.co/furproxy/9b-35
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1410, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004259850905218318, | |
| "grad_norm": 1.125, | |
| "learning_rate": 2.8169014084507043e-07, | |
| "loss": 1.820163607597351, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008519701810436636, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 8.450704225352114e-07, | |
| "loss": 1.8241215944290161, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012779552715654952, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1.4084507042253523e-06, | |
| "loss": 1.7736045122146606, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01703940362087327, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 1.971830985915493e-06, | |
| "loss": 1.9067131280899048, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.021299254526091587, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 2.535211267605634e-06, | |
| "loss": 1.8296759128570557, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.025559105431309903, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 3.0985915492957746e-06, | |
| "loss": 1.698632001876831, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.029818956336528223, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 3.6619718309859158e-06, | |
| "loss": 1.7496201992034912, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03407880724174654, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 4.225352112676057e-06, | |
| "loss": 1.7501145601272583, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.038338658146964855, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 4.788732394366197e-06, | |
| "loss": 1.8459789752960205, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.042598509052183174, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 5.352112676056338e-06, | |
| "loss": 1.8062622547149658, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.046858359957401494, | |
| "grad_norm": 1.9140625, | |
| "learning_rate": 5.915492957746479e-06, | |
| "loss": 1.6213020086288452, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.051118210862619806, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 6.478873239436621e-06, | |
| "loss": 1.6766525506973267, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.055378061767838126, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 7.042253521126761e-06, | |
| "loss": 1.6821609735488892, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.059637912673056445, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 7.6056338028169015e-06, | |
| "loss": 1.6819344758987427, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06389776357827476, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 8.169014084507043e-06, | |
| "loss": 1.668290376663208, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06815761448349308, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8.732394366197183e-06, | |
| "loss": 1.7105621099472046, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0724174653887114, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 9.295774647887325e-06, | |
| "loss": 1.640573501586914, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07667731629392971, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 9.859154929577466e-06, | |
| "loss": 1.7408779859542847, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08093716719914804, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.0422535211267606e-05, | |
| "loss": 1.7062400579452515, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08519701810436635, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.0985915492957748e-05, | |
| "loss": 1.6299972534179688, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08945686900958466, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 1.1549295774647888e-05, | |
| "loss": 1.7348750829696655, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09371671991480299, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 1.211267605633803e-05, | |
| "loss": 1.5983866453170776, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0979765708200213, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1.2676056338028171e-05, | |
| "loss": 1.6268177032470703, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10223642172523961, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.323943661971831e-05, | |
| "loss": 1.6371753215789795, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10649627263045794, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.380281690140845e-05, | |
| "loss": 1.4547175168991089, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11075612353567625, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 1.4366197183098594e-05, | |
| "loss": 1.5619021654129028, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11501597444089456, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 1.4929577464788734e-05, | |
| "loss": 1.5433554649353027, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11927582534611289, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.5492957746478872e-05, | |
| "loss": 1.4989991188049316, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1235356762513312, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 1.6056338028169017e-05, | |
| "loss": 1.549402117729187, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12779552715654952, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 1.6619718309859155e-05, | |
| "loss": 1.434061050415039, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13205537806176784, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 1.7183098591549297e-05, | |
| "loss": 1.5171629190444946, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.13631522896698617, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.774647887323944e-05, | |
| "loss": 1.3912996053695679, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14057507987220447, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 1.830985915492958e-05, | |
| "loss": 1.4431164264678955, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1448349307774228, | |
| "grad_norm": 0.625, | |
| "learning_rate": 1.887323943661972e-05, | |
| "loss": 1.385206699371338, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14909478168264112, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 1.943661971830986e-05, | |
| "loss": 1.4210329055786133, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15335463258785942, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4347320795059204, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15761448349307774, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1.9999911923960593e-05, | |
| "loss": 1.4454214572906494, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16187433439829607, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 1.9999647697781703e-05, | |
| "loss": 1.34307861328125, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16613418530351437, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 1.9999207327281333e-05, | |
| "loss": 1.3641051054000854, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1703940362087327, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 1.9998590822156014e-05, | |
| "loss": 1.3956571817398071, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17465388711395102, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 1.999779819598057e-05, | |
| "loss": 1.3472541570663452, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.17891373801916932, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 1.999682946620784e-05, | |
| "loss": 1.3230623006820679, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18317358892438765, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 1.999568465416831e-05, | |
| "loss": 1.3147826194763184, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.18743343982960597, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 1.9994363785069595e-05, | |
| "loss": 1.3436126708984375, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19169329073482427, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 1.9992866887995928e-05, | |
| "loss": 1.2736291885375977, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1959531416400426, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 1.99911939959075e-05, | |
| "loss": 1.4134427309036255, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20021299254526093, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 1.9989345145639734e-05, | |
| "loss": 1.3293910026550293, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20447284345047922, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.998732037790249e-05, | |
| "loss": 1.3805630207061768, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20873269435569755, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.9985119737279156e-05, | |
| "loss": 1.3184444904327393, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.21299254526091588, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1.9982743272225637e-05, | |
| "loss": 1.2683892250061035, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21725239616613418, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.9980191035069357e-05, | |
| "loss": 1.2953828573226929, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2215122470713525, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.9977463082008048e-05, | |
| "loss": 1.3181703090667725, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22577209797657083, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 1.997455947310854e-05, | |
| "loss": 1.3661359548568726, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.23003194888178913, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 1.997148027230541e-05, | |
| "loss": 1.277463436126709, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.23429179978700745, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 1.9968225547399624e-05, | |
| "loss": 1.244004249572754, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23855165069222578, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 1.9964795370057004e-05, | |
| "loss": 1.247603416442871, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24281150159744408, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 1.996118981580665e-05, | |
| "loss": 1.2560734748840332, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2470713525026624, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.99574089640393e-05, | |
| "loss": 1.2559478282928467, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.25133120340788073, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 1.9953452898005564e-05, | |
| "loss": 1.2884942293167114, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.25559105431309903, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 1.9949321704814103e-05, | |
| "loss": 1.2233449220657349, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2598509052183174, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 1.9945015475429704e-05, | |
| "loss": 1.282986044883728, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2641107561235357, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 1.9940534304671266e-05, | |
| "loss": 1.2828234434127808, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.268370607028754, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.9935878291209737e-05, | |
| "loss": 1.2087836265563965, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.27263045793397234, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.9931047537565917e-05, | |
| "loss": 1.2843331098556519, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.27689030883919064, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.9926042150108228e-05, | |
| "loss": 1.337433099746704, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.28115015974440893, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 1.9920862239050333e-05, | |
| "loss": 1.244279146194458, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2854100106496273, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 1.9915507918448763e-05, | |
| "loss": 1.191616415977478, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2896698615548456, | |
| "grad_norm": 1.7421875, | |
| "learning_rate": 1.9909979306200337e-05, | |
| "loss": 1.3028016090393066, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2939297124600639, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.9904276524039633e-05, | |
| "loss": 1.2914124727249146, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.29818956336528224, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1.9898399697536263e-05, | |
| "loss": 1.318739652633667, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.30244941427050054, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 1.9892348956092136e-05, | |
| "loss": 1.2955219745635986, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.30670926517571884, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 1.9886124432938582e-05, | |
| "loss": 1.2421311140060425, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3109691160809372, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 1.9879726265133432e-05, | |
| "loss": 1.2619209289550781, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3152289669861555, | |
| "grad_norm": 1.875, | |
| "learning_rate": 1.987315459355801e-05, | |
| "loss": 1.2909760475158691, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3194888178913738, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.9866409562914022e-05, | |
| "loss": 1.2878739833831787, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.32374866879659214, | |
| "grad_norm": 2.515625, | |
| "learning_rate": 1.985949132172036e-05, | |
| "loss": 1.2502833604812622, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.32800851970181044, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 1.9852400022309845e-05, | |
| "loss": 1.2763196229934692, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.33226837060702874, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.984513582082587e-05, | |
| "loss": 1.3070451021194458, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3365282215122471, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1.9837698877218955e-05, | |
| "loss": 1.2980157136917114, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3407880724174654, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 1.983008935524324e-05, | |
| "loss": 1.2509483098983765, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3450479233226837, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 1.9822307422452862e-05, | |
| "loss": 1.1746965646743774, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.34930777422790205, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.9814353250198275e-05, | |
| "loss": 1.1982603073120117, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.35356762513312034, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 1.9806227013622483e-05, | |
| "loss": 1.2470253705978394, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.35782747603833864, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 1.9797928891657156e-05, | |
| "loss": 1.2994393110275269, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.362087326943557, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.9789459067018733e-05, | |
| "loss": 1.2793241739273071, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3663471778487753, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1.9780817726204363e-05, | |
| "loss": 1.2065937519073486, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3706070287539936, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.9772005059487815e-05, | |
| "loss": 1.290358543395996, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.37486687965921195, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 1.9763021260915283e-05, | |
| "loss": 1.2577617168426514, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.37912673056443025, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 1.9753866528301128e-05, | |
| "loss": 1.1449509859085083, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.38338658146964855, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 1.9744541063223477e-05, | |
| "loss": 1.2480742931365967, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3876464323748669, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.9735045071019853e-05, | |
| "loss": 1.245701789855957, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3919062832800852, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1.9725378760782593e-05, | |
| "loss": 1.2594965696334839, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3961661341853035, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1.9715542345354285e-05, | |
| "loss": 1.112318992614746, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.40042598509052185, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 1.9705536041323056e-05, | |
| "loss": 1.240614891052246, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.40468583599574015, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 1.9695360069017798e-05, | |
| "loss": 1.219789981842041, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.40894568690095845, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 1.9685014652503368e-05, | |
| "loss": 1.2046880722045898, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4132055378061768, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 1.9674500019575585e-05, | |
| "loss": 1.3095979690551758, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4174653887113951, | |
| "grad_norm": 2.515625, | |
| "learning_rate": 1.966381640175625e-05, | |
| "loss": 1.236427903175354, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4217252396166134, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 1.9652964034288054e-05, | |
| "loss": 1.2324111461639404, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.42598509052183176, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 1.9641943156129385e-05, | |
| "loss": 1.296306848526001, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43024494142705005, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 1.9630754009949062e-05, | |
| "loss": 1.2453858852386475, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.43450479233226835, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 1.961939684212101e-05, | |
| "loss": 1.2566311359405518, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4387646432374867, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.9607871902718817e-05, | |
| "loss": 1.1997429132461548, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.443024494142705, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 1.959617944551024e-05, | |
| "loss": 1.2592103481292725, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4472843450479233, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 1.9584319727951608e-05, | |
| "loss": 1.2379335165023804, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.45154419595314166, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 1.9572293011182153e-05, | |
| "loss": 1.2336797714233398, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.45580404685835996, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 1.9560099560018276e-05, | |
| "loss": 1.2236690521240234, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.46006389776357826, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.95477396429477e-05, | |
| "loss": 1.2254308462142944, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.4643237486687966, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.9535213532123554e-05, | |
| "loss": 1.1878576278686523, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4685835995740149, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.95225215033584e-05, | |
| "loss": 1.1797441244125366, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4728434504792332, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.950966383611815e-05, | |
| "loss": 1.2125729322433472, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.47710330138445156, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 1.9496640813515896e-05, | |
| "loss": 1.3302891254425049, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.48136315228966986, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 1.9483452722305705e-05, | |
| "loss": 1.206823468208313, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.48562300319488816, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 1.9470099852876284e-05, | |
| "loss": 1.1781184673309326, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.4898828541001065, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 1.9456582499244607e-05, | |
| "loss": 1.1779451370239258, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4941427050053248, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.9442900959049402e-05, | |
| "loss": 1.2148332595825195, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4984025559105431, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 1.942905553354465e-05, | |
| "loss": 1.2204636335372925, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5026624068157615, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 1.9415046527592905e-05, | |
| "loss": 1.19704008102417, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5069222577209798, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 1.9400874249658606e-05, | |
| "loss": 1.230122685432434, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5111821086261981, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 1.9386539011801288e-05, | |
| "loss": 1.2034316062927246, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5154419595314164, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1.9372041129668688e-05, | |
| "loss": 1.310076355934143, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5197018104366348, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 1.935738092248981e-05, | |
| "loss": 1.2447279691696167, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5239616613418531, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 1.9342558713067916e-05, | |
| "loss": 1.249355673789978, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5282215122470714, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.932757482777337e-05, | |
| "loss": 1.2074915170669556, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5324813631522897, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.9312429596536493e-05, | |
| "loss": 1.2255703210830688, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.536741214057508, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 1.929712335284028e-05, | |
| "loss": 1.174513339996338, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5410010649627263, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 1.9281656433713065e-05, | |
| "loss": 1.186693787574768, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5452609158679447, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.9266029179721087e-05, | |
| "loss": 1.1817073822021484, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.549520766773163, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 1.925024193496101e-05, | |
| "loss": 1.2056523561477661, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5537806176783813, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.923429504705233e-05, | |
| "loss": 1.2128726243972778, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5580404685835996, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 1.921818886712973e-05, | |
| "loss": 1.2569330930709839, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5623003194888179, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 1.9201923749835343e-05, | |
| "loss": 1.2348226308822632, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5665601703940362, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 1.9185500053310947e-05, | |
| "loss": 1.2059990167617798, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5708200212992546, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 1.916891813919008e-05, | |
| "loss": 1.2185040712356567, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5750798722044729, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.915217837259007e-05, | |
| "loss": 1.1790536642074585, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5793397231096912, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 1.9135281122104e-05, | |
| "loss": 1.2463805675506592, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5835995740149095, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 1.9118226759792606e-05, | |
| "loss": 1.2532294988632202, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5878594249201278, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 1.910101566117605e-05, | |
| "loss": 1.257046103477478, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5921192758253461, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 1.908364820522568e-05, | |
| "loss": 1.2225136756896973, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5963791267305645, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 1.906612477435569e-05, | |
| "loss": 1.1029458045959473, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6006389776357828, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 1.9048445754414667e-05, | |
| "loss": 1.2707245349884033, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6048988285410011, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 1.903061153467713e-05, | |
| "loss": 1.2144739627838135, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6091586794462194, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.9012622507834943e-05, | |
| "loss": 1.267938494682312, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6134185303514377, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.8994479069988654e-05, | |
| "loss": 1.158606767654419, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.617678381256656, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 1.897618162063881e-05, | |
| "loss": 1.3172389268875122, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6219382321618744, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.895773056267712e-05, | |
| "loss": 1.1937861442565918, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6261980830670927, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 1.893912630237762e-05, | |
| "loss": 1.2631405591964722, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.630457933972311, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1.8920369249387697e-05, | |
| "loss": 1.2060716152191162, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6347177848775293, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 1.8901459816719085e-05, | |
| "loss": 1.1968940496444702, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6389776357827476, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 1.8882398420738766e-05, | |
| "loss": 1.2191784381866455, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6432374866879659, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 1.8863185481159815e-05, | |
| "loss": 1.1612508296966553, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6474973375931843, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.8843821421032124e-05, | |
| "loss": 1.2527700662612915, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6517571884984026, | |
| "grad_norm": 2.484375, | |
| "learning_rate": 1.8824306666733133e-05, | |
| "loss": 1.2492940425872803, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6560170394036209, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.880464164795841e-05, | |
| "loss": 1.2013378143310547, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6602768903088392, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 1.8784826797712195e-05, | |
| "loss": 1.1568100452423096, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6645367412140575, | |
| "grad_norm": 3.8125, | |
| "learning_rate": 1.8764862552297865e-05, | |
| "loss": 1.2038320302963257, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6687965921192758, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.874474935130835e-05, | |
| "loss": 1.1380341053009033, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6730564430244942, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.8724487637616405e-05, | |
| "loss": 1.1762800216674805, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6773162939297125, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 1.8704077857364913e-05, | |
| "loss": 1.2073653936386108, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6815761448349308, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1.8683520459957027e-05, | |
| "loss": 1.2313804626464844, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6858359957401491, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.8662815898046277e-05, | |
| "loss": 1.1902257204055786, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6900958466453674, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1.8641964627526623e-05, | |
| "loss": 1.177359938621521, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6943556975505857, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 1.8620967107522393e-05, | |
| "loss": 1.1724225282669067, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6986155484558041, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.8599823800378183e-05, | |
| "loss": 1.2089399099349976, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7028753993610224, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.8578535171648683e-05, | |
| "loss": 1.1843369007110596, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7071352502662407, | |
| "grad_norm": 0.625, | |
| "learning_rate": 1.8557101690088423e-05, | |
| "loss": 1.2020741701126099, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.711395101171459, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.853552382764144e-05, | |
| "loss": 1.2178312540054321, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7156549520766773, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 1.8513802059430906e-05, | |
| "loss": 1.3234312534332275, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7199148029818956, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 1.8491936863748638e-05, | |
| "loss": 1.2087228298187256, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.724174653887114, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 1.8469928722044607e-05, | |
| "loss": 1.213451862335205, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7284345047923323, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 1.8447778118916297e-05, | |
| "loss": 1.2303829193115234, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7326943556975506, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 1.8425485542098055e-05, | |
| "loss": 1.226832389831543, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7369542066027689, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 1.8403051482450352e-05, | |
| "loss": 1.1740474700927734, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7412140575079872, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 1.8380476433948967e-05, | |
| "loss": 1.234078288078308, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7454739084132055, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 1.8357760893674118e-05, | |
| "loss": 1.1722126007080078, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7497337593184239, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 1.8334905361799505e-05, | |
| "loss": 1.1766189336776733, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7539936102236422, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 1.8311910341581317e-05, | |
| "loss": 1.2379800081253052, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7582534611288605, | |
| "grad_norm": 1.9375, | |
| "learning_rate": 1.8288776339347123e-05, | |
| "loss": 1.3067048788070679, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7625133120340788, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 1.826550386448475e-05, | |
| "loss": 1.249035120010376, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7667731629392971, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.8242093429431054e-05, | |
| "loss": 1.153590440750122, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7710330138445154, | |
| "grad_norm": 1.515625, | |
| "learning_rate": 1.8218545549660627e-05, | |
| "loss": 1.2533551454544067, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7752928647497338, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 1.8194860743674475e-05, | |
| "loss": 1.2324496507644653, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7795527156549521, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.817103953298857e-05, | |
| "loss": 1.1727190017700195, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7838125665601704, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 1.814708244212238e-05, | |
| "loss": 1.202933430671692, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7880724174653887, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 1.8122989998587334e-05, | |
| "loss": 1.1554442644119263, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.792332268370607, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 1.8098762732875176e-05, | |
| "loss": 1.2414895296096802, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7965921192758253, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 1.807440117844632e-05, | |
| "loss": 1.196638584136963, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8008519701810437, | |
| "grad_norm": 0.625, | |
| "learning_rate": 1.8049905871718057e-05, | |
| "loss": 1.1261889934539795, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.805111821086262, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 1.80252773520528e-05, | |
| "loss": 1.1604783535003662, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8093716719914803, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 1.8000516161746158e-05, | |
| "loss": 1.2182838916778564, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8136315228966986, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 1.7975622846015025e-05, | |
| "loss": 1.118109941482544, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8178913738019169, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 1.795059795298557e-05, | |
| "loss": 1.171169638633728, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8221512247071352, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1.792544203368116e-05, | |
| "loss": 1.2411582469940186, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8264110756123536, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.7900155642010224e-05, | |
| "loss": 1.2089407444000244, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8306709265175719, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.7874739334754072e-05, | |
| "loss": 1.1152267456054688, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8349307774227902, | |
| "grad_norm": 0.625, | |
| "learning_rate": 1.7849193671554625e-05, | |
| "loss": 1.1154651641845703, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8391906283280085, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 1.7823519214902093e-05, | |
| "loss": 1.1766343116760254, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8434504792332268, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 1.7797716530122588e-05, | |
| "loss": 1.061235785484314, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8477103301384451, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.777178618536568e-05, | |
| "loss": 1.1713075637817383, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8519701810436635, | |
| "grad_norm": 0.5, | |
| "learning_rate": 1.774572875159189e-05, | |
| "loss": 1.1845115423202515, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8562300319488818, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.77195448025601e-05, | |
| "loss": 1.1638309955596924, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8604898828541001, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 1.7693234914814955e-05, | |
| "loss": 1.1716026067733765, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8647497337593184, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.7666799667674123e-05, | |
| "loss": 1.2654908895492554, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8690095846645367, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 1.7640239643215574e-05, | |
| "loss": 1.219713568687439, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.873269435569755, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 1.761355542626475e-05, | |
| "loss": 1.3021862506866455, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8775292864749734, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.758674760438168e-05, | |
| "loss": 1.2263654470443726, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8817891373801917, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.7559816767848063e-05, | |
| "loss": 1.212475061416626, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.88604898828541, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.7532763509654257e-05, | |
| "loss": 1.1879194974899292, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8903088391906283, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.7505588425486216e-05, | |
| "loss": 1.1831648349761963, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8945686900958466, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1.7478292113712387e-05, | |
| "loss": 1.2148785591125488, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.898828541001065, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 1.7450875175370532e-05, | |
| "loss": 1.1164610385894775, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9030883919062833, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 1.7423338214154476e-05, | |
| "loss": 1.1142120361328125, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9073482428115016, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 1.7395681836400856e-05, | |
| "loss": 1.1910451650619507, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9116080937167199, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.736790665107571e-05, | |
| "loss": 1.1739782094955444, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9158679446219382, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 1.7340013269761127e-05, | |
| "loss": 1.156859278678894, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9201277955271565, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 1.731200230664174e-05, | |
| "loss": 1.1874017715454102, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9243876464323749, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 1.7283874378491226e-05, | |
| "loss": 1.1091042757034302, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9286474973375932, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.7255630104658707e-05, | |
| "loss": 1.2352137565612793, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9329073482428115, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.7227270107055134e-05, | |
| "loss": 1.1861075162887573, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9371671991480298, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.7198795010139565e-05, | |
| "loss": 1.1825189590454102, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9414270500532481, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.7170205440905444e-05, | |
| "loss": 1.1355581283569336, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9456869009584664, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.7141502028866782e-05, | |
| "loss": 1.1689903736114502, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9499467518636848, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1.7112685406044283e-05, | |
| "loss": 1.1596486568450928, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9542066027689031, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 1.7083756206951457e-05, | |
| "loss": 1.2047994136810303, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9584664536741214, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.705471506858062e-05, | |
| "loss": 1.1618238687515259, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9627263045793397, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 1.702556263038888e-05, | |
| "loss": 1.158185601234436, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.966986155484558, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 1.699629953428407e-05, | |
| "loss": 1.182377576828003, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9712460063897763, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.696692642461058e-05, | |
| "loss": 1.1322381496429443, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9755058572949947, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 1.693744394813521e-05, | |
| "loss": 1.1632483005523682, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.979765708200213, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 1.6907852754032897e-05, | |
| "loss": 1.1526472568511963, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9840255591054313, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 1.6878153493872438e-05, | |
| "loss": 1.212873101234436, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9882854100106496, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 1.684834682160213e-05, | |
| "loss": 1.1329699754714966, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9925452609158679, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 1.681843339353539e-05, | |
| "loss": 1.2156919240951538, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9968051118210862, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 1.678841386833628e-05, | |
| "loss": 1.1542456150054932, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 1.675828890700502e-05, | |
| "loss": 1.2353312969207764, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0042598509052183, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 1.6728059172863434e-05, | |
| "loss": 1.0001182556152344, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0085197018104366, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 1.6697725331540337e-05, | |
| "loss": 1.0624783039093018, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.012779552715655, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 1.666728805095688e-05, | |
| "loss": 1.0327106714248657, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0170394036208732, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.663674800131184e-05, | |
| "loss": 0.9451117515563965, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0212992545260915, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 1.660610585506687e-05, | |
| "loss": 1.0440083742141724, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0255591054313098, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 1.65753622869317e-05, | |
| "loss": 1.0152018070220947, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0298189563365283, | |
| "grad_norm": 3.34375, | |
| "learning_rate": 1.6544517973849247e-05, | |
| "loss": 1.005466341972351, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0340788072417466, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.6513573594980753e-05, | |
| "loss": 1.0366133451461792, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.038338658146965, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 1.6482529831690805e-05, | |
| "loss": 1.0141640901565552, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0425985090521832, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.6451387367532316e-05, | |
| "loss": 0.9699265956878662, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0468583599574015, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.642014688823153e-05, | |
| "loss": 1.0510493516921997, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0511182108626198, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 1.6388809081672854e-05, | |
| "loss": 0.9604178071022034, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.055378061767838, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 1.635737463788377e-05, | |
| "loss": 1.0432132482528687, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0596379126730564, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 1.63258442490196e-05, | |
| "loss": 0.9879993796348572, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0638977635782747, | |
| "grad_norm": 1.8671875, | |
| "learning_rate": 1.6294218609348292e-05, | |
| "loss": 0.989188015460968, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.068157614483493, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 1.626249841523511e-05, | |
| "loss": 1.0020525455474854, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.0724174653887113, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 1.623068436512732e-05, | |
| "loss": 1.0052525997161865, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.0766773162939298, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.619877715953882e-05, | |
| "loss": 1.030534029006958, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0809371671991481, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.616677750103467e-05, | |
| "loss": 1.1060301065444946, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.0851970181043664, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 1.6134686094215663e-05, | |
| "loss": 0.976584255695343, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0894568690095847, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 1.6102503645702813e-05, | |
| "loss": 0.9827842712402344, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.093716719914803, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 1.6070230864121765e-05, | |
| "loss": 0.9941512942314148, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.0979765708200213, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 1.603786846008722e-05, | |
| "loss": 0.9916704893112183, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1022364217252396, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.6005417146187262e-05, | |
| "loss": 0.9758402109146118, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.106496272630458, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.59728776369677e-05, | |
| "loss": 1.0404748916625977, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1107561235356762, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 1.5940250648916305e-05, | |
| "loss": 1.0453121662139893, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1150159744408945, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 1.5907536900447046e-05, | |
| "loss": 0.9855365753173828, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1192758253461128, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1.587473711188428e-05, | |
| "loss": 0.9536042213439941, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.123535676251331, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.5841852005446866e-05, | |
| "loss": 1.0321474075317383, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1277955271565494, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 1.5808882305232284e-05, | |
| "loss": 0.9870891571044922, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.132055378061768, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1.5775828737200708e-05, | |
| "loss": 1.0295721292495728, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1363152289669862, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 1.5742692029158956e-05, | |
| "loss": 0.9844380021095276, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.1405750798722045, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.5709472910744535e-05, | |
| "loss": 1.0327517986297607, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1448349307774228, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.567617211340955e-05, | |
| "loss": 1.015358328819275, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1490947816826411, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.5642790370404572e-05, | |
| "loss": 0.9756560325622559, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1533546325878594, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 1.5609328416762538e-05, | |
| "loss": 0.9365598559379578, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1576144834930777, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 1.557578698928253e-05, | |
| "loss": 1.0103881359100342, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.161874334398296, | |
| "grad_norm": 1.25, | |
| "learning_rate": 1.5542166826513576e-05, | |
| "loss": 0.972273051738739, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.1661341853035143, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 1.550846866873837e-05, | |
| "loss": 1.016709804534912, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.1703940362087326, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.5474693257956976e-05, | |
| "loss": 1.0085346698760986, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.174653887113951, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 1.5440841337870497e-05, | |
| "loss": 0.9733573794364929, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1789137380191694, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 1.54069136538647e-05, | |
| "loss": 1.0755430459976196, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1831735889243877, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 1.5372910952993588e-05, | |
| "loss": 1.1009080410003662, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.187433439829606, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1.5338833983962972e-05, | |
| "loss": 0.9900831580162048, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.1916932907348243, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 1.5304683497113963e-05, | |
| "loss": 1.0196163654327393, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1959531416400426, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 1.5270460244406467e-05, | |
| "loss": 1.012573480606079, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.200212992545261, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 1.523616497940263e-05, | |
| "loss": 0.9771086573600769, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2044728434504792, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 1.5201798457250227e-05, | |
| "loss": 1.048552393913269, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2087326943556975, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 1.5167361434666044e-05, | |
| "loss": 0.9992176294326782, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2129925452609158, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.5132854669919223e-05, | |
| "loss": 0.9978233575820923, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2172523961661341, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 1.5098278922814555e-05, | |
| "loss": 0.9533007740974426, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2215122470713524, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 1.5063634954675758e-05, | |
| "loss": 1.05226469039917, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.225772097976571, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 1.5028923528328712e-05, | |
| "loss": 1.0399305820465088, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.230031948881789, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 1.499414540808465e-05, | |
| "loss": 1.0016851425170898, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2342917997870075, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 1.4959301359723348e-05, | |
| "loss": 0.9840012192726135, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2385516506922258, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 1.4924392150476247e-05, | |
| "loss": 0.9944754838943481, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2428115015974441, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 1.488941854900957e-05, | |
| "loss": 1.0165916681289673, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2470713525026624, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1.4854381325407393e-05, | |
| "loss": 1.0132522583007812, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.2513312034078807, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.4819281251154686e-05, | |
| "loss": 1.0050327777862549, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.255591054313099, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 1.4784119099120338e-05, | |
| "loss": 1.0251795053482056, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2598509052183173, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 1.4748895643540109e-05, | |
| "loss": 1.0751694440841675, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2641107561235356, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1.4713611659999618e-05, | |
| "loss": 0.9825355410575867, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.268370607028754, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.4678267925417254e-05, | |
| "loss": 1.0707963705062866, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.2726304579339724, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 1.4642865218027047e-05, | |
| "loss": 0.9957266449928284, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.2768903088391905, | |
| "grad_norm": 1.7265625, | |
| "learning_rate": 1.4607404317361566e-05, | |
| "loss": 0.9615009427070618, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.281150159744409, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 1.4571886004234728e-05, | |
| "loss": 0.9674661159515381, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.2854100106496273, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 1.4536311060724622e-05, | |
| "loss": 1.0484395027160645, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.2896698615548456, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 1.4500680270156285e-05, | |
| "loss": 1.0365606546401978, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.293929712460064, | |
| "grad_norm": 3.34375, | |
| "learning_rate": 1.4464994417084434e-05, | |
| "loss": 0.9990244507789612, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.2981895633652822, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.442925428727623e-05, | |
| "loss": 0.9851716756820679, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3024494142705005, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 1.4393460667693937e-05, | |
| "loss": 1.04202139377594, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3067092651757188, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 1.4357614346477622e-05, | |
| "loss": 1.027279019355774, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3109691160809371, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 1.4321716112927777e-05, | |
| "loss": 0.987048864364624, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3152289669861554, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1.4285766757487966e-05, | |
| "loss": 0.9855493307113647, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3194888178913737, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 1.4249767071727406e-05, | |
| "loss": 0.9904506802558899, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.323748668796592, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.4213717848323523e-05, | |
| "loss": 1.0218966007232666, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3280085197018106, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 1.4177619881044527e-05, | |
| "loss": 1.0790345668792725, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3322683706070286, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 1.4141473964731915e-05, | |
| "loss": 0.9817459583282471, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3365282215122471, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.4105280895282972e-05, | |
| "loss": 0.9989323019981384, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3407880724174654, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 1.4069041469633251e-05, | |
| "loss": 0.9893896579742432, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3450479233226837, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 1.4032756485739025e-05, | |
| "loss": 1.0551786422729492, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.349307774227902, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 1.3996426742559712e-05, | |
| "loss": 0.9710611701011658, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.3535676251331203, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.3960053040040284e-05, | |
| "loss": 0.9912348985671997, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3578274760383386, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 1.3923636179093653e-05, | |
| "loss": 0.9898839592933655, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.362087326943557, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 1.3887176961583043e-05, | |
| "loss": 0.9827688932418823, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3663471778487752, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 1.3850676190304322e-05, | |
| "loss": 0.9909817576408386, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.3706070287539935, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 1.3814134668968331e-05, | |
| "loss": 1.054543375968933, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.374866879659212, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 1.3777553202183186e-05, | |
| "loss": 1.0329318046569824, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.3791267305644301, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 1.3740932595436574e-05, | |
| "loss": 0.9633697867393494, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.3833865814696487, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 1.3704273655077986e-05, | |
| "loss": 0.9963237643241882, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.387646432374867, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 1.3667577188300994e-05, | |
| "loss": 0.9815477728843689, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.3919062832800853, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 1.3630844003125466e-05, | |
| "loss": 0.9907623529434204, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.3961661341853036, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.3594074908379759e-05, | |
| "loss": 1.0208251476287842, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4004259850905219, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 1.3557270713682944e-05, | |
| "loss": 1.0356820821762085, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4046858359957402, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 1.3520432229426933e-05, | |
| "loss": 0.9749876260757446, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4089456869009584, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 1.3483560266758688e-05, | |
| "loss": 0.9936063885688782, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4132055378061767, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 1.344665563756231e-05, | |
| "loss": 0.9732779264450073, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.417465388711395, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 1.3409719154441194e-05, | |
| "loss": 1.0013114213943481, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4217252396166133, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 1.3372751630700127e-05, | |
| "loss": 0.9554456472396851, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4259850905218316, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 1.3335753880327378e-05, | |
| "loss": 1.013936161994934, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4302449414270502, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.3298726717976776e-05, | |
| "loss": 0.9992549419403076, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4345047923322682, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.3261670958949778e-05, | |
| "loss": 0.9462178945541382, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4387646432374868, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 1.32245874191775e-05, | |
| "loss": 1.0281662940979004, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.443024494142705, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.3187476915202772e-05, | |
| "loss": 1.080543875694275, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4472843450479234, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.315034026416215e-05, | |
| "loss": 0.9739678502082825, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4515441959531417, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 1.3113178283767916e-05, | |
| "loss": 1.058993935585022, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.45580404685836, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 1.3075991792290087e-05, | |
| "loss": 0.9686048626899719, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4600638977635783, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 1.303878160853838e-05, | |
| "loss": 1.0147266387939453, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4643237486687966, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.3001548551844198e-05, | |
| "loss": 0.9957408905029297, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.4685835995740149, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 1.2964293442042583e-05, | |
| "loss": 1.0241044759750366, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.4728434504792332, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1.2927017099454174e-05, | |
| "loss": 0.9795851707458496, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.4771033013844517, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.288972034486711e-05, | |
| "loss": 1.0294287204742432, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.4813631522896697, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 1.2852403999519009e-05, | |
| "loss": 1.04385507106781, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.4856230031948883, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 1.2815068885078848e-05, | |
| "loss": 1.0272358655929565, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.4898828541001066, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 1.2777715823628869e-05, | |
| "loss": 1.0158170461654663, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.4941427050053249, | |
| "grad_norm": 3.25, | |
| "learning_rate": 1.274034563764651e-05, | |
| "loss": 1.0180643796920776, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.4984025559105432, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.2702959149986263e-05, | |
| "loss": 0.9400717616081238, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.5026624068157615, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 1.2665557183861563e-05, | |
| "loss": 1.0463502407073975, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5069222577209798, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 1.2628140562826674e-05, | |
| "loss": 0.9682408571243286, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.511182108626198, | |
| "grad_norm": 2.0, | |
| "learning_rate": 1.2590710110758542e-05, | |
| "loss": 0.9887672066688538, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5154419595314164, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 1.2553266651838664e-05, | |
| "loss": 0.9790204167366028, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5197018104366347, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 1.2515811010534926e-05, | |
| "loss": 1.048098087310791, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5239616613418532, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.2478344011583463e-05, | |
| "loss": 0.9795944094657898, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5282215122470713, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 1.2440866479970501e-05, | |
| "loss": 1.1048449277877808, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5324813631522898, | |
| "grad_norm": 0.5, | |
| "learning_rate": 1.2403379240914171e-05, | |
| "loss": 1.1177802085876465, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5367412140575079, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 1.2365883119846358e-05, | |
| "loss": 0.9775673747062683, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5410010649627264, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 1.2328378942394526e-05, | |
| "loss": 1.0114903450012207, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5452609158679447, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1.2290867534363523e-05, | |
| "loss": 0.8942697048187256, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.549520766773163, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 1.225334972171742e-05, | |
| "loss": 1.0175893306732178, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.5537806176783813, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.2215826330561296e-05, | |
| "loss": 1.027167797088623, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5580404685835996, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 1.2178298187123072e-05, | |
| "loss": 0.9816038608551025, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.5623003194888179, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.2140766117735309e-05, | |
| "loss": 1.032719612121582, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5665601703940362, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.210323094881701e-05, | |
| "loss": 1.034834384918213, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.5708200212992547, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 1.2065693506855438e-05, | |
| "loss": 0.9969629645347595, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.5750798722044728, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1.2028154618387888e-05, | |
| "loss": 0.9849120378494263, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5793397231096913, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 1.1990615109983516e-05, | |
| "loss": 1.042475700378418, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.5835995740149094, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 1.1953075808225134e-05, | |
| "loss": 0.9866620898246765, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.5878594249201279, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 1.191553753969099e-05, | |
| "loss": 1.012044906616211, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.592119275825346, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.1878001130936596e-05, | |
| "loss": 0.9986369609832764, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.5963791267305645, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1.1840467408476495e-05, | |
| "loss": 0.9213187098503113, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6006389776357828, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.1802937198766105e-05, | |
| "loss": 1.0106372833251953, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.604898828541001, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 1.1765411328183474e-05, | |
| "loss": 1.030936360359192, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6091586794462194, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 1.172789062301112e-05, | |
| "loss": 0.9806941747665405, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.6134185303514377, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 1.1690375909417817e-05, | |
| "loss": 1.0275804996490479, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.617678381256656, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 1.165286801344042e-05, | |
| "loss": 0.9850936532020569, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6219382321618743, | |
| "grad_norm": 0.5, | |
| "learning_rate": 1.1615367760965652e-05, | |
| "loss": 0.9898473620414734, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.6261980830670928, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1.1577875977711943e-05, | |
| "loss": 0.9166401624679565, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6304579339723109, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.1540393489211246e-05, | |
| "loss": 1.0430673360824585, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.6347177848775294, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.1502921120790832e-05, | |
| "loss": 0.9577189683914185, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6389776357827475, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 1.146545969755516e-05, | |
| "loss": 1.046250581741333, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.643237486687966, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 1.1428010044367676e-05, | |
| "loss": 0.9433165788650513, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6474973375931843, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.139057298583266e-05, | |
| "loss": 1.0670093297958374, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.6517571884984026, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.1353149346277071e-05, | |
| "loss": 1.0747054815292358, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6560170394036209, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 1.1315739949732403e-05, | |
| "loss": 0.9709829092025757, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.6602768903088392, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.1278345619916519e-05, | |
| "loss": 0.9456352591514587, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.6645367412140575, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 1.1240967180215546e-05, | |
| "loss": 0.9906789064407349, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.6687965921192758, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 1.1203605453665706e-05, | |
| "loss": 0.9821105599403381, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.6730564430244943, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1.1166261262935234e-05, | |
| "loss": 0.9833469390869141, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.6773162939297124, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 1.1128935430306224e-05, | |
| "loss": 0.9621955752372742, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.681576144834931, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1.1091628777656558e-05, | |
| "loss": 1.0520445108413696, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.685835995740149, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 1.105434212644179e-05, | |
| "loss": 1.00932776927948, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.6900958466453675, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 1.1017076297677054e-05, | |
| "loss": 1.0297919511795044, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.6943556975505856, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 1.0979832111919002e-05, | |
| "loss": 0.9979485273361206, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.698615548455804, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1.0942610389247735e-05, | |
| "loss": 1.0736881494522095, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7028753993610224, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 1.0905411949248722e-05, | |
| "loss": 0.9282923936843872, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.7071352502662407, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 1.086823761099478e-05, | |
| "loss": 0.9751482605934143, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.711395101171459, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 1.0831088193028037e-05, | |
| "loss": 1.050538182258606, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.7156549520766773, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 1.0793964513341892e-05, | |
| "loss": 0.8432624936103821, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7199148029818956, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.0756867389363007e-05, | |
| "loss": 1.0375334024429321, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7241746538871139, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 1.0719797637933325e-05, | |
| "loss": 1.0315303802490234, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7284345047923324, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.068275607529207e-05, | |
| "loss": 1.0522267818450928, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.7326943556975505, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 1.0645743517057766e-05, | |
| "loss": 0.9734303951263428, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.736954206602769, | |
| "grad_norm": 0.5, | |
| "learning_rate": 1.0608760778210305e-05, | |
| "loss": 0.956866979598999, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.741214057507987, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 1.057180867307298e-05, | |
| "loss": 0.9936807751655579, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.7454739084132056, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.053488801529455e-05, | |
| "loss": 1.0136879682540894, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.749733759318424, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.0497999617831352e-05, | |
| "loss": 0.9871541261672974, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.7539936102236422, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.0461144292929361e-05, | |
| "loss": 0.990527868270874, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.7582534611288605, | |
| "grad_norm": 1.25, | |
| "learning_rate": 1.0424322852106354e-05, | |
| "loss": 1.039267897605896, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7625133120340788, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 1.0387536106133989e-05, | |
| "loss": 1.069290280342102, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.766773162939297, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 1.0350784865019987e-05, | |
| "loss": 0.9809550642967224, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7710330138445154, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 1.0314069937990298e-05, | |
| "loss": 0.9778714776039124, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.775292864749734, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 1.027739213347125e-05, | |
| "loss": 0.9834805130958557, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.779552715654952, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1.0240752259071792e-05, | |
| "loss": 1.0088666677474976, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.7838125665601705, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 1.0204151121565675e-05, | |
| "loss": 0.958771288394928, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.7880724174653886, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 1.0167589526873706e-05, | |
| "loss": 1.0218135118484497, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.792332268370607, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 1.0131068280045997e-05, | |
| "loss": 1.027093768119812, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.7965921192758252, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 1.0094588185244248e-05, | |
| "loss": 1.0224063396453857, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8008519701810437, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 1.0058150045724022e-05, | |
| "loss": 1.0029429197311401, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.805111821086262, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1.002175466381707e-05, | |
| "loss": 0.9247448444366455, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8093716719914803, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 9.985402840913666e-06, | |
| "loss": 1.0142505168914795, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8136315228966986, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 9.949095377444961e-06, | |
| "loss": 0.9922072291374207, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.817891373801917, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 9.912833072865345e-06, | |
| "loss": 1.0142475366592407, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8221512247071352, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 9.876616725634864e-06, | |
| "loss": 0.9615439176559448, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.8264110756123535, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 9.840447133201623e-06, | |
| "loss": 0.9525391459465027, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.830670926517572, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 9.804325091984234e-06, | |
| "loss": 1.0148870944976807, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.83493077742279, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 9.768251397354276e-06, | |
| "loss": 0.9480238556861877, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.8391906283280086, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 9.73222684361879e-06, | |
| "loss": 1.0006335973739624, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8434504792332267, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 9.696252224002783e-06, | |
| "loss": 0.9949763417243958, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.8477103301384452, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 9.660328330631752e-06, | |
| "loss": 0.9215972423553467, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.8519701810436635, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 9.624455954514258e-06, | |
| "loss": 0.9809213280677795, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.8562300319488818, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 9.588635885524514e-06, | |
| "loss": 0.9926417469978333, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8604898828541, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 9.552868912384962e-06, | |
| "loss": 1.0632542371749878, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.8647497337593184, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 9.517155822648938e-06, | |
| "loss": 1.0109238624572754, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8690095846645367, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 9.481497402683325e-06, | |
| "loss": 0.9798102974891663, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.873269435569755, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 9.445894437651211e-06, | |
| "loss": 1.0319267511367798, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8775292864749735, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 9.41034771149464e-06, | |
| "loss": 1.0264664888381958, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.8817891373801916, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 9.374858006917328e-06, | |
| "loss": 1.014363169670105, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.8860489882854101, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 9.339426105367428e-06, | |
| "loss": 0.9093478322029114, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.8903088391906282, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 9.304052787020326e-06, | |
| "loss": 0.9274075627326965, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.8945686900958467, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 9.26873883076147e-06, | |
| "loss": 1.0133625268936157, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.898828541001065, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 9.233485014169214e-06, | |
| "loss": 1.0017213821411133, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9030883919062833, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 9.198292113497686e-06, | |
| "loss": 1.0270392894744873, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9073482428115016, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 9.163160903659715e-06, | |
| "loss": 0.9596851468086243, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.91160809371672, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 9.128092158209762e-06, | |
| "loss": 1.0112367868423462, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9158679446219382, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 9.093086649326873e-06, | |
| "loss": 1.0255273580551147, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9201277955271565, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 9.05814514779769e-06, | |
| "loss": 0.9857978820800781, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.924387646432375, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 9.02326842299949e-06, | |
| "loss": 0.9689497351646423, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.928647497337593, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8.98845724288321e-06, | |
| "loss": 1.039789080619812, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9329073482428116, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8.95371237395657e-06, | |
| "loss": 0.9854997396469116, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9371671991480297, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8.919034581267185e-06, | |
| "loss": 0.863085150718689, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9414270500532482, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8.884424628385713e-06, | |
| "loss": 0.9845672845840454, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9456869009584663, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8.849883277389046e-06, | |
| "loss": 0.9393159747123718, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9499467518636848, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 8.815411288843531e-06, | |
| "loss": 1.019649624824524, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.9542066027689031, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8.781009421788226e-06, | |
| "loss": 1.001703143119812, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.9584664536741214, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 8.74667843371818e-06, | |
| "loss": 1.0148494243621826, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9627263045793397, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8.712419080567753e-06, | |
| "loss": 0.9227896332740784, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.966986155484558, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8.678232116693988e-06, | |
| "loss": 0.9633923768997192, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.9712460063897763, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8.644118294859972e-06, | |
| "loss": 0.9478899836540222, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.9755058572949946, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 8.610078366218287e-06, | |
| "loss": 1.0053707361221313, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.9797657082002131, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8.576113080294459e-06, | |
| "loss": 0.9101724624633789, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.9840255591054312, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8.542223184970451e-06, | |
| "loss": 1.0562280416488647, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.9882854100106497, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8.5084094264682e-06, | |
| "loss": 0.9434181451797485, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.9925452609158678, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8.474672549333191e-06, | |
| "loss": 1.0050933361053467, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.9968051118210863, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8.441013296418057e-06, | |
| "loss": 1.014664649963379, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 8.407432408866211e-06, | |
| "loss": 0.9959129095077515, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0042598509052185, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 8.373930626095557e-06, | |
| "loss": 0.8519871830940247, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.0085197018104366, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8.340508685782173e-06, | |
| "loss": 0.8345139622688293, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.012779552715655, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8.30716732384409e-06, | |
| "loss": 0.8144312500953674, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.017039403620873, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8.27390727442509e-06, | |
| "loss": 0.8890555500984192, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.0212992545260917, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8.24072926987853e-06, | |
| "loss": 0.9254493117332458, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.02555910543131, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8.207634040751218e-06, | |
| "loss": 0.8629240989685059, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.0298189563365283, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8.174622315767325e-06, | |
| "loss": 0.9234293699264526, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.0340788072417464, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8.141694821812352e-06, | |
| "loss": 0.8904613852500916, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.038338658146965, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 8.108852283917116e-06, | |
| "loss": 0.8621918559074402, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.042598509052183, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8.07609542524178e-06, | |
| "loss": 0.8154129981994629, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.0468583599574015, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8.043424967059936e-06, | |
| "loss": 0.7776715755462646, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.0511182108626196, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 8.010841628742728e-06, | |
| "loss": 0.9061995148658752, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.055378061767838, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 7.978346127742999e-06, | |
| "loss": 0.8465338349342346, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.0596379126730566, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 7.945939179579509e-06, | |
| "loss": 0.8736094832420349, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.0638977635782747, | |
| "grad_norm": 0.625, | |
| "learning_rate": 7.913621497821174e-06, | |
| "loss": 0.8707051277160645, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.0681576144834932, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 7.881393794071338e-06, | |
| "loss": 0.8639911413192749, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.0724174653887113, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 7.84925677795213e-06, | |
| "loss": 0.8878441452980042, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.07667731629393, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 7.817211157088823e-06, | |
| "loss": 0.8697801232337952, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.080937167199148, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.785257637094265e-06, | |
| "loss": 0.9132862687110901, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.0851970181043664, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 7.753396921553325e-06, | |
| "loss": 0.9692128896713257, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.0894568690095845, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 7.721629712007408e-06, | |
| "loss": 0.8117839694023132, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.093716719914803, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 7.68995670793902e-06, | |
| "loss": 0.8652855157852173, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.097976570820021, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 7.658378606756342e-06, | |
| "loss": 0.8414390683174133, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1022364217252396, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 7.626896103777901e-06, | |
| "loss": 0.975841224193573, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.106496272630458, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 7.595509892217243e-06, | |
| "loss": 0.8733735680580139, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.110756123535676, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 7.5642206631676655e-06, | |
| "loss": 0.8554286360740662, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.1150159744408947, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 7.533029105587006e-06, | |
| "loss": 0.8418490290641785, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.119275825346113, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.501935906282486e-06, | |
| "loss": 0.8224227428436279, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.1235356762513313, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 7.4709417498955676e-06, | |
| "loss": 0.8177536129951477, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.1277955271565494, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.440047318886882e-06, | |
| "loss": 0.8571978211402893, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.132055378061768, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 7.409253293521209e-06, | |
| "loss": 0.8153810501098633, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.136315228966986, | |
| "grad_norm": 0.75, | |
| "learning_rate": 7.378560351852502e-06, | |
| "loss": 0.8463664650917053, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.1405750798722045, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.347969169708937e-06, | |
| "loss": 0.8821415305137634, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.1448349307774226, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 7.317480420678064e-06, | |
| "loss": 0.9291739463806152, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.149094781682641, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 7.2870947760919486e-06, | |
| "loss": 0.772282063961029, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.1533546325878596, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 7.256812905012389e-06, | |
| "loss": 0.8808871507644653, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.1576144834930777, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 7.2266354742162095e-06, | |
| "loss": 0.8753591179847717, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.1618743343982962, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 7.196563148180554e-06, | |
| "loss": 0.8668736815452576, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.1661341853035143, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 7.166596589068271e-06, | |
| "loss": 0.8054329752922058, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.170394036208733, | |
| "grad_norm": 1.953125, | |
| "learning_rate": 7.136736456713316e-06, | |
| "loss": 0.8477678298950195, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.174653887113951, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 7.1069834086062366e-06, | |
| "loss": 0.7990297079086304, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.1789137380191694, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 7.077338099879696e-06, | |
| "loss": 0.87007075548172, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.1831735889243875, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 7.047801183294034e-06, | |
| "loss": 0.8642436265945435, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.187433439829606, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 7.018373309222914e-06, | |
| "loss": 0.9285038113594055, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.191693290734824, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 6.989055125638982e-06, | |
| "loss": 0.861228346824646, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.1959531416400426, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 6.95984727809961e-06, | |
| "loss": 0.8538438677787781, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.2002129925452607, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 6.930750409732679e-06, | |
| "loss": 0.9002454876899719, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2044728434504792, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 6.901765161222426e-06, | |
| "loss": 0.8670125603675842, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.2087326943556977, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 6.8728921707953225e-06, | |
| "loss": 0.8965079188346863, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.212992545260916, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 6.844132074206028e-06, | |
| "loss": 0.9224348664283752, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.2172523961661343, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 6.815485504723389e-06, | |
| "loss": 0.8701453804969788, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2215122470713524, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 6.786953093116508e-06, | |
| "loss": 0.8607268333435059, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.225772097976571, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 6.758535467640832e-06, | |
| "loss": 0.8190476298332214, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.230031948881789, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 6.730233254024339e-06, | |
| "loss": 0.7581257820129395, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.2342917997870075, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 6.702047075453751e-06, | |
| "loss": 0.867296576499939, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.2385516506922256, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 6.673977552560804e-06, | |
| "loss": 0.8582524061203003, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.242811501597444, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 6.646025303408606e-06, | |
| "loss": 0.8571093082427979, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.247071352502662, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 6.618190943478003e-06, | |
| "loss": 0.867594301700592, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.2513312034078807, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 6.590475085654047e-06, | |
| "loss": 0.7921283841133118, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.255591054313099, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 6.56287834021248e-06, | |
| "loss": 0.8648768067359924, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.2598509052183173, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 6.535401314806309e-06, | |
| "loss": 0.8589727282524109, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.264110756123536, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 6.508044614452431e-06, | |
| "loss": 0.8992140889167786, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.268370607028754, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 6.480808841518298e-06, | |
| "loss": 0.8318920135498047, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.2726304579339724, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 6.453694595708667e-06, | |
| "loss": 0.8525157570838928, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.2768903088391905, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 6.426702474052381e-06, | |
| "loss": 0.8547057509422302, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.281150159744409, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 6.39983307088923e-06, | |
| "loss": 0.8861790299415588, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.285410010649627, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 6.373086977856868e-06, | |
| "loss": 0.8417205810546875, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.2896698615548456, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 6.346464783877784e-06, | |
| "loss": 0.9120402932167053, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.2939297124600637, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 6.319967075146332e-06, | |
| "loss": 0.8795621991157532, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.2981895633652822, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 6.293594435115814e-06, | |
| "loss": 0.884793758392334, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.3024494142705008, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 6.267347444485654e-06, | |
| "loss": 0.9300041198730469, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.306709265175719, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 6.2412266811886005e-06, | |
| "loss": 0.8604894876480103, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.3109691160809374, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 6.215232720377991e-06, | |
| "loss": 0.8723834753036499, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.3152289669861554, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 6.189366134415112e-06, | |
| "loss": 0.8270635008811951, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.319488817891374, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 6.163627492856572e-06, | |
| "loss": 0.873162567615509, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.323748668796592, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 6.13801736244177e-06, | |
| "loss": 0.8589759469032288, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.3280085197018106, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 6.112536307080422e-06, | |
| "loss": 0.8987887501716614, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.3322683706070286, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 6.087184887840139e-06, | |
| "loss": 0.8068996071815491, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.336528221512247, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 6.061963662934072e-06, | |
| "loss": 0.802603006362915, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.3407880724174652, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 6.036873187708615e-06, | |
| "loss": 0.8735599517822266, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.3450479233226837, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 6.011914014631191e-06, | |
| "loss": 0.7997400760650635, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.349307774227902, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 5.987086693278083e-06, | |
| "loss": 0.8167827129364014, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.3535676251331203, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 5.962391770322319e-06, | |
| "loss": 0.853871762752533, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.357827476038339, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 5.937829789521655e-06, | |
| "loss": 0.8599565029144287, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.362087326943557, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 5.913401291706585e-06, | |
| "loss": 0.8716296553611755, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.3663471778487755, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 5.889106814768442e-06, | |
| "loss": 0.8599187731742859, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.3706070287539935, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 5.864946893647556e-06, | |
| "loss": 0.8140082955360413, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.374866879659212, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 5.8409220603214704e-06, | |
| "loss": 0.8339557647705078, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.37912673056443, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 5.817032843793222e-06, | |
| "loss": 0.828040599822998, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.3833865814696487, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 5.793279770079706e-06, | |
| "loss": 0.869745135307312, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.3876464323748667, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 5.769663362200086e-06, | |
| "loss": 0.8580743074417114, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.3919062832800853, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 5.746184140164285e-06, | |
| "loss": 0.8233816027641296, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.3961661341853033, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 5.722842620961518e-06, | |
| "loss": 0.8709803819656372, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.400425985090522, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 5.69963931854893e-06, | |
| "loss": 0.9047388434410095, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.40468583599574, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 5.676574743840264e-06, | |
| "loss": 0.8460210561752319, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.4089456869009584, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 5.6536494046946174e-06, | |
| "loss": 0.9233959913253784, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.413205537806177, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 5.630863805905259e-06, | |
| "loss": 0.8673460483551025, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.417465388711395, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 5.608218449188516e-06, | |
| "loss": 0.9345380663871765, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.4217252396166136, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 5.585713833172718e-06, | |
| "loss": 0.9205328226089478, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.4259850905218316, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 5.563350453387224e-06, | |
| "loss": 0.7964959740638733, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.43024494142705, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 5.541128802251518e-06, | |
| "loss": 0.811107873916626, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.4345047923322682, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 5.5190493690643566e-06, | |
| "loss": 0.787108838558197, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.4387646432374868, | |
| "grad_norm": 17.375, | |
| "learning_rate": 5.497112639992993e-06, | |
| "loss": 0.9524678587913513, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.443024494142705, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 5.4753190980624834e-06, | |
| "loss": 0.7983661890029907, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.4472843450479234, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 5.453669223145042e-06, | |
| "loss": 0.8548356294631958, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.451544195953142, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 5.432163491949475e-06, | |
| "loss": 0.9406867027282715, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.45580404685836, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 5.410802378010691e-06, | |
| "loss": 0.8843863010406494, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.460063897763578, | |
| "grad_norm": 1.125, | |
| "learning_rate": 5.389586351679272e-06, | |
| "loss": 0.8945695757865906, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.4643237486687966, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 5.368515880111107e-06, | |
| "loss": 0.8422825336456299, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.468583599574015, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 5.347591427257115e-06, | |
| "loss": 0.940974771976471, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.472843450479233, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 5.326813453853033e-06, | |
| "loss": 0.849635124206543, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.4771033013844517, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 5.3061824174092645e-06, | |
| "loss": 0.8890464901924133, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.4813631522896697, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 5.2856987722008004e-06, | |
| "loss": 0.8929099440574646, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.4856230031948883, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 5.265362969257225e-06, | |
| "loss": 0.811379075050354, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.4898828541001063, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 5.245175456352788e-06, | |
| "loss": 0.8641577959060669, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.494142705005325, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 5.225136677996531e-06, | |
| "loss": 0.8928030133247375, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.498402555910543, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 5.205247075422514e-06, | |
| "loss": 0.8985695838928223, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.5026624068157615, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 5.185507086580094e-06, | |
| "loss": 0.825484037399292, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.50692225772098, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 5.165917146124276e-06, | |
| "loss": 0.8295057415962219, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.511182108626198, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 5.146477685406154e-06, | |
| "loss": 0.8838967084884644, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.515441959531416, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 5.127189132463404e-06, | |
| "loss": 0.8865538239479065, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.5197018104366347, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 5.108051912010865e-06, | |
| "loss": 0.839586079120636, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.523961661341853, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 5.089066445431183e-06, | |
| "loss": 0.8859398365020752, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.5282215122470713, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 5.070233150765531e-06, | |
| "loss": 0.8947311639785767, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.5324813631522898, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 5.051552442704411e-06, | |
| "loss": 0.8858785033226013, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.536741214057508, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 5.033024732578518e-06, | |
| "loss": 0.8125391006469727, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.5410010649627264, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 5.014650428349683e-06, | |
| "loss": 0.8854655027389526, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.545260915867945, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 4.996429934601895e-06, | |
| "loss": 0.8128026127815247, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.549520766773163, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 4.978363652532383e-06, | |
| "loss": 0.8405453562736511, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.553780617678381, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 4.960451979942789e-06, | |
| "loss": 0.959130585193634, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.5580404685835996, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 4.942695311230408e-06, | |
| "loss": 0.7953628897666931, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.562300319488818, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.925094037379507e-06, | |
| "loss": 0.7662147283554077, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.566560170394036, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 4.907648545952706e-06, | |
| "loss": 0.8059567213058472, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.5708200212992547, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.890359221082451e-06, | |
| "loss": 0.9323968291282654, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.5750798722044728, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.87322644346256e-06, | |
| "loss": 0.8555244207382202, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.5793397231096913, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.856250590339828e-06, | |
| "loss": 0.8834168314933777, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.5835995740149094, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 4.8394320355057355e-06, | |
| "loss": 0.9264652132987976, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.587859424920128, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 4.822771149288208e-06, | |
| "loss": 0.8777004480361938, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.592119275825346, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 4.80626829854346e-06, | |
| "loss": 0.8719021081924438, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.5963791267305645, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 4.789923846647926e-06, | |
| "loss": 0.8178717494010925, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.600638977635783, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 4.773738153490252e-06, | |
| "loss": 0.9265211820602417, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.604898828541001, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.75771157546338e-06, | |
| "loss": 0.9368568658828735, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.609158679446219, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 4.7418444654566845e-06, | |
| "loss": 0.8576453328132629, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.6134185303514377, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 4.726137172848219e-06, | |
| "loss": 0.8761544823646545, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.617678381256656, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 4.710590043497013e-06, | |
| "loss": 0.8335532546043396, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.6219382321618743, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.695203419735462e-06, | |
| "loss": 0.8370216488838196, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.626198083067093, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 4.6799776403617875e-06, | |
| "loss": 0.844344973564148, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.630457933972311, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 4.6649130406325726e-06, | |
| "loss": 0.9227336645126343, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.6347177848775294, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 4.650009952255389e-06, | |
| "loss": 0.8581987023353577, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.6389776357827475, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 4.635268703381484e-06, | |
| "loss": 0.8400872349739075, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.643237486687966, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 4.620689618598568e-06, | |
| "loss": 0.8582939505577087, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.647497337593184, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 4.606273018923644e-06, | |
| "loss": 0.9465791583061218, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.6517571884984026, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 4.592019221795969e-06, | |
| "loss": 0.8765722513198853, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.656017039403621, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.57792854107004e-06, | |
| "loss": 0.8363880515098572, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.660276890308839, | |
| "grad_norm": 0.625, | |
| "learning_rate": 4.564001287008695e-06, | |
| "loss": 0.9550789594650269, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.6645367412140573, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 4.550237766276277e-06, | |
| "loss": 0.8685204982757568, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.668796592119276, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 4.536638281931887e-06, | |
| "loss": 0.8562531471252441, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.6730564430244943, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 4.523203133422705e-06, | |
| "loss": 0.8494756817817688, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.6773162939297124, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 4.509932616577394e-06, | |
| "loss": 0.899334728717804, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.681576144834931, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.4968270235995995e-06, | |
| "loss": 0.8993234634399414, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.685835995740149, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 4.483886643061501e-06, | |
| "loss": 0.8710511326789856, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.6900958466453675, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 4.471111759897464e-06, | |
| "loss": 0.8785170912742615, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.6943556975505856, | |
| "grad_norm": 1.6796875, | |
| "learning_rate": 4.45850265539777e-06, | |
| "loss": 0.8322556018829346, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.698615548455804, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.4460596072024114e-06, | |
| "loss": 0.8931154012680054, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.702875399361022, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 4.433782889294992e-06, | |
| "loss": 0.8494217395782471, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.7071352502662407, | |
| "grad_norm": 1.0, | |
| "learning_rate": 4.4216727719966825e-06, | |
| "loss": 0.8380395174026489, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.711395101171459, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.409729521960276e-06, | |
| "loss": 0.89030522108078, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.7156549520766773, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.397953402164312e-06, | |
| "loss": 0.8374807238578796, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.7199148029818954, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 4.386344671907289e-06, | |
| "loss": 0.8489108085632324, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.724174653887114, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.374903586801952e-06, | |
| "loss": 0.9123415946960449, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.7284345047923324, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.363630398769667e-06, | |
| "loss": 0.8218215107917786, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.7326943556975505, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 4.352525356034874e-06, | |
| "loss": 0.8863515853881836, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.736954206602769, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 4.341588703119615e-06, | |
| "loss": 0.8300585150718689, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.741214057507987, | |
| "grad_norm": 1.96875, | |
| "learning_rate": 4.330820680838162e-06, | |
| "loss": 0.8769638538360596, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.7454739084132056, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 4.320221526291701e-06, | |
| "loss": 0.8285037279129028, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.749733759318424, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.309791472863121e-06, | |
| "loss": 0.8763640522956848, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.753993610223642, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 4.2995307502118735e-06, | |
| "loss": 0.8772199749946594, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.7582534611288603, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 4.289439584268909e-06, | |
| "loss": 0.9215362668037415, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.762513312034079, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 4.279518197231709e-06, | |
| "loss": 0.781262218952179, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.7667731629392973, | |
| "grad_norm": 2.09375, | |
| "learning_rate": 4.2697668075593955e-06, | |
| "loss": 0.8765016198158264, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.7710330138445154, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 4.260185629967912e-06, | |
| "loss": 0.9362728595733643, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.775292864749734, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 4.250774875425303e-06, | |
| "loss": 0.9154821634292603, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.779552715654952, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 4.241534751147065e-06, | |
| "loss": 0.8477409482002258, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.7838125665601705, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.232465460591584e-06, | |
| "loss": 0.8393524289131165, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.7880724174653886, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 4.223567203455662e-06, | |
| "loss": 1.008418083190918, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.792332268370607, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 4.214840175670106e-06, | |
| "loss": 0.8442473411560059, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.796592119275825, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 4.2062845693954315e-06, | |
| "loss": 0.8037950992584229, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.8008519701810437, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 4.197900573017613e-06, | |
| "loss": 0.9449222683906555, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.8051118210862622, | |
| "grad_norm": 1.71875, | |
| "learning_rate": 4.189688371143952e-06, | |
| "loss": 0.9002764225006104, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.8093716719914803, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.181648144599001e-06, | |
| "loss": 0.9393887519836426, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.8136315228966984, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 4.17378007042059e-06, | |
| "loss": 0.9079067707061768, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.817891373801917, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 4.166084321855919e-06, | |
| "loss": 0.9096862077713013, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.8221512247071354, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.158561068357751e-06, | |
| "loss": 0.9297478199005127, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.8264110756123535, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 4.151210475580683e-06, | |
| "loss": 0.7916253805160522, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.830670926517572, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 4.144032705377486e-06, | |
| "loss": 0.7997951507568359, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.83493077742279, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.137027915795555e-06, | |
| "loss": 0.8519441485404968, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.8391906283280086, | |
| "grad_norm": 0.5, | |
| "learning_rate": 4.130196261073419e-06, | |
| "loss": 0.8826906681060791, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.8434504792332267, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 4.123537891637352e-06, | |
| "loss": 0.9272059798240662, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.847710330138445, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 4.117052954098055e-06, | |
| "loss": 0.8484643697738647, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.8519701810436633, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 4.110741591247433e-06, | |
| "loss": 0.8416329026222229, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.856230031948882, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 4.1046039420554465e-06, | |
| "loss": 0.7760175466537476, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.8604898828541003, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 4.0986401416670535e-06, | |
| "loss": 0.8102884888648987, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.8647497337593184, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 4.092850321399232e-06, | |
| "loss": 0.8387068510055542, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.8690095846645365, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.0872346087380924e-06, | |
| "loss": 0.8194290399551392, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.873269435569755, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 4.0817931273360686e-06, | |
| "loss": 0.8201800584793091, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.8775292864749735, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 4.076525997009191e-06, | |
| "loss": 0.8607783913612366, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.8817891373801916, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.0714333337344565e-06, | |
| "loss": 0.8402998447418213, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.88604898828541, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 4.066515249647266e-06, | |
| "loss": 0.8459066152572632, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.890308839190628, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 4.061771853038961e-06, | |
| "loss": 0.8821372985839844, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.8945686900958467, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.05720324835444e-06, | |
| "loss": 0.8806694746017456, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.8988285410010652, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.052809536189854e-06, | |
| "loss": 0.9144023656845093, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.9030883919062833, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.048590813290395e-06, | |
| "loss": 0.8206741213798523, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.9073482428115014, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 4.044547172548163e-06, | |
| "loss": 0.8178682923316956, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.91160809371672, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 4.040678703000123e-06, | |
| "loss": 0.8602153658866882, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.9158679446219384, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.036985489826145e-06, | |
| "loss": 0.8640646934509277, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.9201277955271565, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 4.033467614347126e-06, | |
| "loss": 0.8199517130851746, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.924387646432375, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 4.030125154023201e-06, | |
| "loss": 0.8197891712188721, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.928647497337593, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 4.026958182452037e-06, | |
| "loss": 0.9104003310203552, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.9329073482428116, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.023966769367212e-06, | |
| "loss": 0.9194601774215698, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.9371671991480297, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 4.021150980636679e-06, | |
| "loss": 0.8961591720581055, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.9414270500532482, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.01851087826132e-06, | |
| "loss": 0.9252373576164246, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.9456869009584663, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 4.016046520373573e-06, | |
| "loss": 0.8721043467521667, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.949946751863685, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 4.013757961236162e-06, | |
| "loss": 0.8307064771652222, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.9542066027689033, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 4.011645251240889e-06, | |
| "loss": 0.8707568049430847, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.9584664536741214, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 4.009708436907538e-06, | |
| "loss": 0.826111376285553, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.9627263045793395, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.007947560882844e-06, | |
| "loss": 0.8147805333137512, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.966986155484558, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 4.006362661939552e-06, | |
| "loss": 0.9187301993370056, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.9712460063897765, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 4.0049537749755685e-06, | |
| "loss": 0.7911329865455627, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.9755058572949946, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 4.003720931013188e-06, | |
| "loss": 0.8627235293388367, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.979765708200213, | |
| "grad_norm": 1.75, | |
| "learning_rate": 4.002664157198416e-06, | |
| "loss": 0.7859567999839783, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.984025559105431, | |
| "grad_norm": 1.5703125, | |
| "learning_rate": 4.001783476800364e-06, | |
| "loss": 0.8008852005004883, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.9882854100106497, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.001078909210742e-06, | |
| "loss": 0.8270421624183655, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.992545260915868, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 4.000550469943431e-06, | |
| "loss": 0.8458138704299927, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.9968051118210863, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 4.0001981706341416e-06, | |
| "loss": 0.8161488771438599, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.000022019040156e-06, | |
| "loss": 0.8455473780632019, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1410, | |
| "total_flos": 4.4194007019526554e+18, | |
| "train_loss": 1.0514326345836018, | |
| "train_runtime": 11229.2105, | |
| "train_samples_per_second": 4.014, | |
| "train_steps_per_second": 0.126 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1410, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.4194007019526554e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |