Text Generation
Transformers
Safetensors
qwen2
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use cjiao/OpenThinker3-1.5B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cjiao/OpenThinker3-1.5B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="cjiao/OpenThinker3-1.5B") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("cjiao/OpenThinker3-1.5B") model = AutoModelForCausalLM.from_pretrained("cjiao/OpenThinker3-1.5B") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use cjiao/OpenThinker3-1.5B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "cjiao/OpenThinker3-1.5B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cjiao/OpenThinker3-1.5B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/cjiao/OpenThinker3-1.5B
- SGLang
How to use cjiao/OpenThinker3-1.5B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "cjiao/OpenThinker3-1.5B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cjiao/OpenThinker3-1.5B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "cjiao/OpenThinker3-1.5B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cjiao/OpenThinker3-1.5B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use cjiao/OpenThinker3-1.5B with Docker Model Runner:
docker model run hf.co/cjiao/OpenThinker3-1.5B
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9983155530600785, | |
| "eval_steps": 500, | |
| "global_step": 1335, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0022459292532285235, | |
| "grad_norm": 1.9819730520248413, | |
| "learning_rate": 1.1940298507462686e-06, | |
| "loss": 0.8684, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004491858506457047, | |
| "grad_norm": 2.0259573459625244, | |
| "learning_rate": 2.3880597014925373e-06, | |
| "loss": 0.8821, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00673778775968557, | |
| "grad_norm": 2.085845947265625, | |
| "learning_rate": 3.582089552238806e-06, | |
| "loss": 0.9416, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008983717012914094, | |
| "grad_norm": 1.9758139848709106, | |
| "learning_rate": 4.7761194029850745e-06, | |
| "loss": 0.9074, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.011229646266142616, | |
| "grad_norm": 1.7997628450393677, | |
| "learning_rate": 5.970149253731343e-06, | |
| "loss": 0.9069, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01347557551937114, | |
| "grad_norm": 1.4710899591445923, | |
| "learning_rate": 7.164179104477612e-06, | |
| "loss": 0.8754, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.015721504772599662, | |
| "grad_norm": 1.4286643266677856, | |
| "learning_rate": 8.35820895522388e-06, | |
| "loss": 0.8896, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.017967434025828188, | |
| "grad_norm": 1.2477623224258423, | |
| "learning_rate": 9.552238805970149e-06, | |
| "loss": 0.8545, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02021336327905671, | |
| "grad_norm": 1.2966960668563843, | |
| "learning_rate": 1.074626865671642e-05, | |
| "loss": 0.839, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.022459292532285232, | |
| "grad_norm": 1.4356369972229004, | |
| "learning_rate": 1.1940298507462686e-05, | |
| "loss": 0.8639, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.024705221785513758, | |
| "grad_norm": 1.1653496026992798, | |
| "learning_rate": 1.3134328358208957e-05, | |
| "loss": 0.8142, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.02695115103874228, | |
| "grad_norm": 0.9282035231590271, | |
| "learning_rate": 1.4328358208955224e-05, | |
| "loss": 0.8022, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.029197080291970802, | |
| "grad_norm": 1.086950421333313, | |
| "learning_rate": 1.5522388059701494e-05, | |
| "loss": 0.7908, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.031443009545199324, | |
| "grad_norm": 0.7987905144691467, | |
| "learning_rate": 1.671641791044776e-05, | |
| "loss": 0.7838, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.033688938798427846, | |
| "grad_norm": 0.7030066847801208, | |
| "learning_rate": 1.791044776119403e-05, | |
| "loss": 0.7856, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.035934868051656375, | |
| "grad_norm": 0.7216812372207642, | |
| "learning_rate": 1.9104477611940298e-05, | |
| "loss": 0.7666, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0381807973048849, | |
| "grad_norm": 0.7004393935203552, | |
| "learning_rate": 2.029850746268657e-05, | |
| "loss": 0.7602, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.04042672655811342, | |
| "grad_norm": 0.5651209950447083, | |
| "learning_rate": 2.149253731343284e-05, | |
| "loss": 0.7637, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04267265581134194, | |
| "grad_norm": 0.5799914598464966, | |
| "learning_rate": 2.2686567164179106e-05, | |
| "loss": 0.7357, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.044918585064570464, | |
| "grad_norm": 0.531233549118042, | |
| "learning_rate": 2.3880597014925373e-05, | |
| "loss": 0.7552, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.047164514317798986, | |
| "grad_norm": 0.5684418678283691, | |
| "learning_rate": 2.5074626865671646e-05, | |
| "loss": 0.7671, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.049410443571027515, | |
| "grad_norm": 0.4873054623603821, | |
| "learning_rate": 2.6268656716417913e-05, | |
| "loss": 0.737, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05165637282425604, | |
| "grad_norm": 0.49275314807891846, | |
| "learning_rate": 2.746268656716418e-05, | |
| "loss": 0.7362, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05390230207748456, | |
| "grad_norm": 0.47075843811035156, | |
| "learning_rate": 2.8656716417910447e-05, | |
| "loss": 0.7234, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05614823133071308, | |
| "grad_norm": 0.3865251839160919, | |
| "learning_rate": 2.985074626865672e-05, | |
| "loss": 0.7169, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.058394160583941604, | |
| "grad_norm": 0.4154004156589508, | |
| "learning_rate": 3.104477611940299e-05, | |
| "loss": 0.7119, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.060640089837170126, | |
| "grad_norm": 0.37092125415802, | |
| "learning_rate": 3.2238805970149255e-05, | |
| "loss": 0.7138, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.06288601909039865, | |
| "grad_norm": 0.3488411605358124, | |
| "learning_rate": 3.343283582089552e-05, | |
| "loss": 0.7216, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06513194834362718, | |
| "grad_norm": 0.32693928480148315, | |
| "learning_rate": 3.462686567164179e-05, | |
| "loss": 0.6925, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06737787759685569, | |
| "grad_norm": 0.34904295206069946, | |
| "learning_rate": 3.582089552238806e-05, | |
| "loss": 0.7105, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06962380685008422, | |
| "grad_norm": 0.32673367857933044, | |
| "learning_rate": 3.701492537313433e-05, | |
| "loss": 0.696, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.07186973610331275, | |
| "grad_norm": 0.32177790999412537, | |
| "learning_rate": 3.8208955223880596e-05, | |
| "loss": 0.7064, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07411566535654127, | |
| "grad_norm": 0.3286134600639343, | |
| "learning_rate": 3.940298507462687e-05, | |
| "loss": 0.7091, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0763615946097698, | |
| "grad_norm": 0.3438747525215149, | |
| "learning_rate": 4.059701492537314e-05, | |
| "loss": 0.7149, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07860752386299831, | |
| "grad_norm": 0.29362648725509644, | |
| "learning_rate": 4.1791044776119404e-05, | |
| "loss": 0.685, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.08085345311622684, | |
| "grad_norm": 0.30074256658554077, | |
| "learning_rate": 4.298507462686568e-05, | |
| "loss": 0.7011, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08309938236945537, | |
| "grad_norm": 0.3120618462562561, | |
| "learning_rate": 4.4179104477611944e-05, | |
| "loss": 0.684, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.08534531162268388, | |
| "grad_norm": 0.2569892406463623, | |
| "learning_rate": 4.537313432835821e-05, | |
| "loss": 0.684, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08759124087591241, | |
| "grad_norm": 0.28327882289886475, | |
| "learning_rate": 4.6567164179104485e-05, | |
| "loss": 0.6968, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08983717012914093, | |
| "grad_norm": 0.26424843072891235, | |
| "learning_rate": 4.7761194029850745e-05, | |
| "loss": 0.6915, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09208309938236946, | |
| "grad_norm": 0.2620261609554291, | |
| "learning_rate": 4.895522388059702e-05, | |
| "loss": 0.6744, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.09432902863559797, | |
| "grad_norm": 0.32121092081069946, | |
| "learning_rate": 5.014925373134329e-05, | |
| "loss": 0.6746, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0965749578888265, | |
| "grad_norm": 0.3997937738895416, | |
| "learning_rate": 5.134328358208955e-05, | |
| "loss": 0.6806, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09882088714205503, | |
| "grad_norm": 0.3264799416065216, | |
| "learning_rate": 5.2537313432835826e-05, | |
| "loss": 0.6729, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.10106681639528355, | |
| "grad_norm": 0.33052176237106323, | |
| "learning_rate": 5.37313432835821e-05, | |
| "loss": 0.6758, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10331274564851207, | |
| "grad_norm": 0.43345314264297485, | |
| "learning_rate": 5.492537313432836e-05, | |
| "loss": 0.6767, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10555867490174059, | |
| "grad_norm": 0.37080681324005127, | |
| "learning_rate": 5.6119402985074634e-05, | |
| "loss": 0.6526, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10780460415496912, | |
| "grad_norm": 0.381356418132782, | |
| "learning_rate": 5.7313432835820894e-05, | |
| "loss": 0.6739, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.11005053340819765, | |
| "grad_norm": 0.36677348613739014, | |
| "learning_rate": 5.850746268656717e-05, | |
| "loss": 0.6782, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.11229646266142616, | |
| "grad_norm": 0.40393349528312683, | |
| "learning_rate": 5.970149253731344e-05, | |
| "loss": 0.6528, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11454239191465469, | |
| "grad_norm": 0.5078914165496826, | |
| "learning_rate": 6.08955223880597e-05, | |
| "loss": 0.6697, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.11678832116788321, | |
| "grad_norm": 0.742857813835144, | |
| "learning_rate": 6.208955223880598e-05, | |
| "loss": 0.6341, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11903425042111174, | |
| "grad_norm": 0.8604367971420288, | |
| "learning_rate": 6.328358208955224e-05, | |
| "loss": 0.662, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.12128017967434025, | |
| "grad_norm": 0.7391287684440613, | |
| "learning_rate": 6.447761194029851e-05, | |
| "loss": 0.6696, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12352610892756878, | |
| "grad_norm": 0.5703966617584229, | |
| "learning_rate": 6.567164179104479e-05, | |
| "loss": 0.6619, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1257720381807973, | |
| "grad_norm": 0.7210264801979065, | |
| "learning_rate": 6.686567164179104e-05, | |
| "loss": 0.6647, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12801796743402583, | |
| "grad_norm": 0.8133912682533264, | |
| "learning_rate": 6.805970149253732e-05, | |
| "loss": 0.658, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.13026389668725435, | |
| "grad_norm": 0.9062953591346741, | |
| "learning_rate": 6.925373134328358e-05, | |
| "loss": 0.6732, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13250982594048288, | |
| "grad_norm": 0.9497516751289368, | |
| "learning_rate": 7.044776119402986e-05, | |
| "loss": 0.6743, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.13475575519371139, | |
| "grad_norm": 0.5923281908035278, | |
| "learning_rate": 7.164179104477612e-05, | |
| "loss": 0.6609, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13700168444693991, | |
| "grad_norm": 0.839241087436676, | |
| "learning_rate": 7.283582089552239e-05, | |
| "loss": 0.6673, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.13924761370016844, | |
| "grad_norm": 0.9110313653945923, | |
| "learning_rate": 7.402985074626866e-05, | |
| "loss": 0.6795, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.14149354295339697, | |
| "grad_norm": 0.6465680599212646, | |
| "learning_rate": 7.522388059701494e-05, | |
| "loss": 0.6634, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.1437394722066255, | |
| "grad_norm": 0.5419987440109253, | |
| "learning_rate": 7.641791044776119e-05, | |
| "loss": 0.6489, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.145985401459854, | |
| "grad_norm": 0.6124593019485474, | |
| "learning_rate": 7.761194029850747e-05, | |
| "loss": 0.6617, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.14823133071308253, | |
| "grad_norm": 0.5836852788925171, | |
| "learning_rate": 7.880597014925374e-05, | |
| "loss": 0.6319, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.15047725996631106, | |
| "grad_norm": 0.6319289207458496, | |
| "learning_rate": 8e-05, | |
| "loss": 0.6504, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1527231892195396, | |
| "grad_norm": 0.6081493496894836, | |
| "learning_rate": 8.119402985074627e-05, | |
| "loss": 0.65, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.15496911847276812, | |
| "grad_norm": 0.5973412394523621, | |
| "learning_rate": 8.238805970149255e-05, | |
| "loss": 0.6449, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.15721504772599662, | |
| "grad_norm": 0.6423139572143555, | |
| "learning_rate": 8.358208955223881e-05, | |
| "loss": 0.6584, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15946097697922515, | |
| "grad_norm": 0.7579260468482971, | |
| "learning_rate": 8.477611940298507e-05, | |
| "loss": 0.6472, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.16170690623245368, | |
| "grad_norm": 0.8475743532180786, | |
| "learning_rate": 8.597014925373135e-05, | |
| "loss": 0.6405, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1639528354856822, | |
| "grad_norm": 0.5964512228965759, | |
| "learning_rate": 8.716417910447762e-05, | |
| "loss": 0.633, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.16619876473891074, | |
| "grad_norm": 0.47265729308128357, | |
| "learning_rate": 8.835820895522389e-05, | |
| "loss": 0.6453, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16844469399213924, | |
| "grad_norm": 0.7188097238540649, | |
| "learning_rate": 8.955223880597014e-05, | |
| "loss": 0.6603, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.17069062324536777, | |
| "grad_norm": 0.49939826130867004, | |
| "learning_rate": 9.074626865671642e-05, | |
| "loss": 0.6339, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1729365524985963, | |
| "grad_norm": 0.5468081831932068, | |
| "learning_rate": 9.194029850746269e-05, | |
| "loss": 0.639, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.17518248175182483, | |
| "grad_norm": 0.6105530858039856, | |
| "learning_rate": 9.313432835820897e-05, | |
| "loss": 0.6537, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.17742841100505333, | |
| "grad_norm": 0.48114606738090515, | |
| "learning_rate": 9.432835820895524e-05, | |
| "loss": 0.6579, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.17967434025828186, | |
| "grad_norm": 0.6263488531112671, | |
| "learning_rate": 9.552238805970149e-05, | |
| "loss": 0.6292, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18192026951151039, | |
| "grad_norm": 0.5369325280189514, | |
| "learning_rate": 9.671641791044777e-05, | |
| "loss": 0.6608, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.18416619876473891, | |
| "grad_norm": 0.7140039801597595, | |
| "learning_rate": 9.791044776119404e-05, | |
| "loss": 0.6339, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.18641212801796744, | |
| "grad_norm": 0.9011125564575195, | |
| "learning_rate": 9.91044776119403e-05, | |
| "loss": 0.6342, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.18865805727119594, | |
| "grad_norm": 1.1369616985321045, | |
| "learning_rate": 0.00010029850746268659, | |
| "loss": 0.6442, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.19090398652442447, | |
| "grad_norm": 1.0306285619735718, | |
| "learning_rate": 0.00010149253731343285, | |
| "loss": 0.6419, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.193149915777653, | |
| "grad_norm": 0.8979660272598267, | |
| "learning_rate": 0.0001026865671641791, | |
| "loss": 0.632, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.19539584503088153, | |
| "grad_norm": 0.6676183342933655, | |
| "learning_rate": 0.00010388059701492539, | |
| "loss": 0.6386, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.19764177428411006, | |
| "grad_norm": 0.7217721939086914, | |
| "learning_rate": 0.00010507462686567165, | |
| "loss": 0.6546, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19988770353733856, | |
| "grad_norm": 0.7290446162223816, | |
| "learning_rate": 0.00010626865671641792, | |
| "loss": 0.6328, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2021336327905671, | |
| "grad_norm": 0.8381432890892029, | |
| "learning_rate": 0.0001074626865671642, | |
| "loss": 0.6311, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.20437956204379562, | |
| "grad_norm": 1.0938982963562012, | |
| "learning_rate": 0.00010865671641791045, | |
| "loss": 0.6559, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.20662549129702415, | |
| "grad_norm": 0.8039063215255737, | |
| "learning_rate": 0.00010985074626865672, | |
| "loss": 0.636, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20887142055025268, | |
| "grad_norm": 0.7171061635017395, | |
| "learning_rate": 0.000111044776119403, | |
| "loss": 0.6456, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.21111734980348118, | |
| "grad_norm": 0.7186174988746643, | |
| "learning_rate": 0.00011223880597014927, | |
| "loss": 0.6285, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2133632790567097, | |
| "grad_norm": 0.6290779113769531, | |
| "learning_rate": 0.00011343283582089553, | |
| "loss": 0.6336, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.21560920830993824, | |
| "grad_norm": 0.7359249591827393, | |
| "learning_rate": 0.00011462686567164179, | |
| "loss": 0.6542, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.21785513756316677, | |
| "grad_norm": 0.775365948677063, | |
| "learning_rate": 0.00011582089552238807, | |
| "loss": 0.6369, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2201010668163953, | |
| "grad_norm": 0.8260976076126099, | |
| "learning_rate": 0.00011701492537313434, | |
| "loss": 0.6142, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2223469960696238, | |
| "grad_norm": 0.704872727394104, | |
| "learning_rate": 0.00011820895522388062, | |
| "loss": 0.6473, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.22459292532285233, | |
| "grad_norm": 0.5987293124198914, | |
| "learning_rate": 0.00011940298507462688, | |
| "loss": 0.6458, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22683885457608086, | |
| "grad_norm": 0.7472802400588989, | |
| "learning_rate": 0.00012059701492537314, | |
| "loss": 0.6235, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.22908478382930939, | |
| "grad_norm": 0.7303177118301392, | |
| "learning_rate": 0.0001217910447761194, | |
| "loss": 0.6432, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2313307130825379, | |
| "grad_norm": 0.5669957995414734, | |
| "learning_rate": 0.00012298507462686568, | |
| "loss": 0.6276, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.23357664233576642, | |
| "grad_norm": 0.47117286920547485, | |
| "learning_rate": 0.00012417910447761195, | |
| "loss": 0.6429, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.23582257158899494, | |
| "grad_norm": 0.6563988327980042, | |
| "learning_rate": 0.00012537313432835822, | |
| "loss": 0.6276, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.23806850084222347, | |
| "grad_norm": 0.5849066972732544, | |
| "learning_rate": 0.00012656716417910448, | |
| "loss": 0.6309, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.240314430095452, | |
| "grad_norm": 0.7347849607467651, | |
| "learning_rate": 0.00012776119402985075, | |
| "loss": 0.6382, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2425603593486805, | |
| "grad_norm": 0.6520137190818787, | |
| "learning_rate": 0.00012895522388059702, | |
| "loss": 0.6386, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.24480628860190903, | |
| "grad_norm": 0.60540372133255, | |
| "learning_rate": 0.00013014925373134329, | |
| "loss": 0.613, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.24705221785513756, | |
| "grad_norm": 0.7710558176040649, | |
| "learning_rate": 0.00013134328358208958, | |
| "loss": 0.6104, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2492981471083661, | |
| "grad_norm": 0.6582499742507935, | |
| "learning_rate": 0.00013253731343283582, | |
| "loss": 0.628, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2515440763615946, | |
| "grad_norm": 0.6089588403701782, | |
| "learning_rate": 0.00013373134328358209, | |
| "loss": 0.6313, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2537900056148231, | |
| "grad_norm": 0.5754179358482361, | |
| "learning_rate": 0.00013492537313432838, | |
| "loss": 0.6283, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.25603593486805165, | |
| "grad_norm": 0.617273211479187, | |
| "learning_rate": 0.00013611940298507465, | |
| "loss": 0.6187, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2582818641212802, | |
| "grad_norm": 0.6104961037635803, | |
| "learning_rate": 0.00013731343283582091, | |
| "loss": 0.6267, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2605277933745087, | |
| "grad_norm": 0.691856861114502, | |
| "learning_rate": 0.00013850746268656715, | |
| "loss": 0.6202, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.26277372262773724, | |
| "grad_norm": 0.8089864253997803, | |
| "learning_rate": 0.00013970149253731345, | |
| "loss": 0.635, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.26501965188096577, | |
| "grad_norm": 1.1346023082733154, | |
| "learning_rate": 0.00014089552238805972, | |
| "loss": 0.6462, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2672655811341943, | |
| "grad_norm": 0.8319297432899475, | |
| "learning_rate": 0.00014208955223880598, | |
| "loss": 0.6179, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.26951151038742277, | |
| "grad_norm": 0.5904942154884338, | |
| "learning_rate": 0.00014328358208955225, | |
| "loss": 0.629, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2717574396406513, | |
| "grad_norm": 0.5950160026550293, | |
| "learning_rate": 0.00014447761194029852, | |
| "loss": 0.6245, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.27400336889387983, | |
| "grad_norm": 0.6426451802253723, | |
| "learning_rate": 0.00014567164179104478, | |
| "loss": 0.614, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.27624929814710836, | |
| "grad_norm": 0.6028596758842468, | |
| "learning_rate": 0.00014686567164179105, | |
| "loss": 0.6127, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.2784952274003369, | |
| "grad_norm": 0.6075330972671509, | |
| "learning_rate": 0.00014805970149253732, | |
| "loss": 0.6283, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2807411566535654, | |
| "grad_norm": 0.6084921360015869, | |
| "learning_rate": 0.0001492537313432836, | |
| "loss": 0.6351, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.28298708590679394, | |
| "grad_norm": 0.627112865447998, | |
| "learning_rate": 0.00015044776119402988, | |
| "loss": 0.6393, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2852330151600225, | |
| "grad_norm": 0.6501988172531128, | |
| "learning_rate": 0.00015164179104477612, | |
| "loss": 0.6097, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.287478944413251, | |
| "grad_norm": 0.6280235648155212, | |
| "learning_rate": 0.00015283582089552238, | |
| "loss": 0.6281, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.28972487366647953, | |
| "grad_norm": 0.49232786893844604, | |
| "learning_rate": 0.00015402985074626868, | |
| "loss": 0.6341, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 0.5303974747657776, | |
| "learning_rate": 0.00015522388059701495, | |
| "loss": 0.6098, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.29421673217293653, | |
| "grad_norm": 0.5729207992553711, | |
| "learning_rate": 0.0001564179104477612, | |
| "loss": 0.617, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.29646266142616506, | |
| "grad_norm": 0.6265519857406616, | |
| "learning_rate": 0.00015761194029850748, | |
| "loss": 0.5968, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2987085906793936, | |
| "grad_norm": 0.6463232636451721, | |
| "learning_rate": 0.00015880597014925375, | |
| "loss": 0.6391, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3009545199326221, | |
| "grad_norm": 0.593257486820221, | |
| "learning_rate": 0.00016, | |
| "loss": 0.6189, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.30320044918585065, | |
| "grad_norm": 0.5925970077514648, | |
| "learning_rate": 0.00015999972630083387, | |
| "loss": 0.6139, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3054463784390792, | |
| "grad_norm": 0.6394967436790466, | |
| "learning_rate": 0.00015999890520520824, | |
| "loss": 0.6038, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.649044394493103, | |
| "learning_rate": 0.00015999753671874147, | |
| "loss": 0.614, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.30993823694553624, | |
| "grad_norm": 0.6179019808769226, | |
| "learning_rate": 0.00015999562085079733, | |
| "loss": 0.6171, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3121841661987647, | |
| "grad_norm": 0.5114040374755859, | |
| "learning_rate": 0.0001599931576144852, | |
| "loss": 0.6076, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.31443009545199324, | |
| "grad_norm": 0.5721436142921448, | |
| "learning_rate": 0.00015999014702665964, | |
| "loss": 0.6173, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.31667602470522177, | |
| "grad_norm": 0.7164266109466553, | |
| "learning_rate": 0.00015998658910792058, | |
| "loss": 0.611, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3189219539584503, | |
| "grad_norm": 0.8134217858314514, | |
| "learning_rate": 0.00015998248388261302, | |
| "loss": 0.6296, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.32116788321167883, | |
| "grad_norm": 0.828131377696991, | |
| "learning_rate": 0.00015997783137882682, | |
| "loss": 0.6331, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.32341381246490736, | |
| "grad_norm": 0.7628505825996399, | |
| "learning_rate": 0.00015997263162839667, | |
| "loss": 0.6524, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3256597417181359, | |
| "grad_norm": 0.6403250098228455, | |
| "learning_rate": 0.0001599668846669018, | |
| "loss": 0.6097, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3279056709713644, | |
| "grad_norm": 0.5496403574943542, | |
| "learning_rate": 0.00015996059053366562, | |
| "loss": 0.6187, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.33015160022459294, | |
| "grad_norm": 0.6352928876876831, | |
| "learning_rate": 0.0001599537492717556, | |
| "loss": 0.619, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3323975294778215, | |
| "grad_norm": 0.6073532104492188, | |
| "learning_rate": 0.00015994636092798295, | |
| "loss": 0.6218, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.33464345873104995, | |
| "grad_norm": 0.40914225578308105, | |
| "learning_rate": 0.00015993842555290226, | |
| "loss": 0.6161, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3368893879842785, | |
| "grad_norm": 0.4364437758922577, | |
| "learning_rate": 0.0001599299432008112, | |
| "loss": 0.637, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.339135317237507, | |
| "grad_norm": 0.5311095118522644, | |
| "learning_rate": 0.00015992091392975002, | |
| "loss": 0.5972, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.34138124649073553, | |
| "grad_norm": 0.545671284198761, | |
| "learning_rate": 0.00015991133780150136, | |
| "loss": 0.6103, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.34362717574396406, | |
| "grad_norm": 0.4276280105113983, | |
| "learning_rate": 0.00015990121488158968, | |
| "loss": 0.6148, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3458731049971926, | |
| "grad_norm": 0.4059518575668335, | |
| "learning_rate": 0.00015989054523928085, | |
| "loss": 0.6332, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3481190342504211, | |
| "grad_norm": 0.42028188705444336, | |
| "learning_rate": 0.00015987932894758164, | |
| "loss": 0.5972, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 0.44919151067733765, | |
| "learning_rate": 0.00015986756608323932, | |
| "loss": 0.6017, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3526108927568782, | |
| "grad_norm": 0.3990235924720764, | |
| "learning_rate": 0.00015985525672674103, | |
| "loss": 0.6146, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.35485682201010665, | |
| "grad_norm": 0.42787206172943115, | |
| "learning_rate": 0.0001598424009623133, | |
| "loss": 0.6199, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3571027512633352, | |
| "grad_norm": 0.44234439730644226, | |
| "learning_rate": 0.00015982899887792145, | |
| "loss": 0.6279, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3593486805165637, | |
| "grad_norm": 0.4152110815048218, | |
| "learning_rate": 0.00015981505056526893, | |
| "loss": 0.6032, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.36159460976979224, | |
| "grad_norm": 0.36194872856140137, | |
| "learning_rate": 0.0001598005561197968, | |
| "loss": 0.6286, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.36384053902302077, | |
| "grad_norm": 0.4214819669723511, | |
| "learning_rate": 0.00015978551564068295, | |
| "loss": 0.6006, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3660864682762493, | |
| "grad_norm": 0.41948559880256653, | |
| "learning_rate": 0.00015976992923084161, | |
| "loss": 0.615, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.36833239752947783, | |
| "grad_norm": 0.44141775369644165, | |
| "learning_rate": 0.00015975379699692245, | |
| "loss": 0.6236, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.37057832678270636, | |
| "grad_norm": 0.47903239727020264, | |
| "learning_rate": 0.00015973711904930993, | |
| "loss": 0.5979, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3728242560359349, | |
| "grad_norm": 0.45099982619285583, | |
| "learning_rate": 0.00015971989550212255, | |
| "loss": 0.6229, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3750701852891634, | |
| "grad_norm": 0.4214828312397003, | |
| "learning_rate": 0.00015970212647321207, | |
| "loss": 0.6146, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.3773161145423919, | |
| "grad_norm": 0.43835896253585815, | |
| "learning_rate": 0.00015968381208416273, | |
| "loss": 0.6162, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3795620437956204, | |
| "grad_norm": 0.4372192621231079, | |
| "learning_rate": 0.00015966495246029033, | |
| "loss": 0.6152, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.38180797304884895, | |
| "grad_norm": 0.45570138096809387, | |
| "learning_rate": 0.00015964554773064148, | |
| "loss": 0.6107, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3840539023020775, | |
| "grad_norm": 0.5014758706092834, | |
| "learning_rate": 0.0001596255980279926, | |
| "loss": 0.5861, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.386299831555306, | |
| "grad_norm": 0.46727222204208374, | |
| "learning_rate": 0.00015960510348884914, | |
| "loss": 0.6104, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.38854576080853453, | |
| "grad_norm": 0.5081140398979187, | |
| "learning_rate": 0.00015958406425344455, | |
| "loss": 0.5948, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.39079169006176306, | |
| "grad_norm": 0.4470350742340088, | |
| "learning_rate": 0.00015956248046573938, | |
| "loss": 0.5924, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3930376193149916, | |
| "grad_norm": 0.36340662837028503, | |
| "learning_rate": 0.00015954035227342019, | |
| "loss": 0.5972, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3952835485682201, | |
| "grad_norm": 0.34276771545410156, | |
| "learning_rate": 0.00015951767982789875, | |
| "loss": 0.5955, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.39752947782144865, | |
| "grad_norm": 0.35867977142333984, | |
| "learning_rate": 0.00015949446328431075, | |
| "loss": 0.611, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.3997754070746771, | |
| "grad_norm": 0.3728366792201996, | |
| "learning_rate": 0.00015947070280151492, | |
| "loss": 0.6117, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.40202133632790565, | |
| "grad_norm": 0.32302939891815186, | |
| "learning_rate": 0.00015944639854209184, | |
| "loss": 0.6225, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.4042672655811342, | |
| "grad_norm": 0.33579641580581665, | |
| "learning_rate": 0.00015942155067234293, | |
| "loss": 0.5915, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4065131948343627, | |
| "grad_norm": 0.29243412613868713, | |
| "learning_rate": 0.00015939615936228922, | |
| "loss": 0.5915, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.40875912408759124, | |
| "grad_norm": 0.32980793714523315, | |
| "learning_rate": 0.00015937022478567023, | |
| "loss": 0.6172, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.41100505334081977, | |
| "grad_norm": 0.30575114488601685, | |
| "learning_rate": 0.0001593437471199427, | |
| "loss": 0.5958, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.4132509825940483, | |
| "grad_norm": 0.3298383951187134, | |
| "learning_rate": 0.00015931672654627958, | |
| "loss": 0.5949, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.41549691184727683, | |
| "grad_norm": 0.330642431974411, | |
| "learning_rate": 0.00015928916324956855, | |
| "loss": 0.5929, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.41774284110050536, | |
| "grad_norm": 0.33059626817703247, | |
| "learning_rate": 0.00015926105741841088, | |
| "loss": 0.609, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.41998877035373383, | |
| "grad_norm": 0.3915445804595947, | |
| "learning_rate": 0.00015923240924512014, | |
| "loss": 0.6045, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.42223469960696236, | |
| "grad_norm": 0.3589101731777191, | |
| "learning_rate": 0.00015920321892572088, | |
| "loss": 0.6175, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4244806288601909, | |
| "grad_norm": 0.399964302778244, | |
| "learning_rate": 0.00015917348665994723, | |
| "loss": 0.6157, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4267265581134194, | |
| "grad_norm": 0.3923908770084381, | |
| "learning_rate": 0.0001591432126512416, | |
| "loss": 0.6041, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.42897248736664795, | |
| "grad_norm": 0.38844165205955505, | |
| "learning_rate": 0.0001591123971067533, | |
| "loss": 0.5865, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.4312184166198765, | |
| "grad_norm": 0.41744178533554077, | |
| "learning_rate": 0.00015908104023733697, | |
| "loss": 0.5823, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.433464345873105, | |
| "grad_norm": 0.3478281795978546, | |
| "learning_rate": 0.0001590491422575514, | |
| "loss": 0.6064, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.43571027512633353, | |
| "grad_norm": 0.38580065965652466, | |
| "learning_rate": 0.00015901670338565785, | |
| "loss": 0.6119, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 0.5283933877944946, | |
| "learning_rate": 0.0001589837238436186, | |
| "loss": 0.5945, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4402021336327906, | |
| "grad_norm": 0.48087194561958313, | |
| "learning_rate": 0.00015895020385709553, | |
| "loss": 0.6058, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.44244806288601907, | |
| "grad_norm": 0.35071608424186707, | |
| "learning_rate": 0.00015891614365544837, | |
| "loss": 0.5672, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.4446939921392476, | |
| "grad_norm": 0.3820844888687134, | |
| "learning_rate": 0.0001588815434717334, | |
| "loss": 0.5898, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4469399213924761, | |
| "grad_norm": 0.3622789680957794, | |
| "learning_rate": 0.0001588464035427016, | |
| "loss": 0.5842, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.44918585064570465, | |
| "grad_norm": 0.348568856716156, | |
| "learning_rate": 0.00015881072410879726, | |
| "loss": 0.6025, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4514317798989332, | |
| "grad_norm": 0.36718496680259705, | |
| "learning_rate": 0.00015877450541415615, | |
| "loss": 0.5888, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.4536777091521617, | |
| "grad_norm": 0.39695170521736145, | |
| "learning_rate": 0.0001587377477066039, | |
| "loss": 0.6159, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.45592363840539024, | |
| "grad_norm": 0.4380107522010803, | |
| "learning_rate": 0.0001587004512376544, | |
| "loss": 0.6001, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.45816956765861877, | |
| "grad_norm": 0.40494075417518616, | |
| "learning_rate": 0.00015866261626250794, | |
| "loss": 0.6016, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4604154969118473, | |
| "grad_norm": 0.3275372385978699, | |
| "learning_rate": 0.00015862424304004954, | |
| "loss": 0.5918, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4626614261650758, | |
| "grad_norm": 0.3288284242153168, | |
| "learning_rate": 0.00015858533183284718, | |
| "loss": 0.608, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4649073554183043, | |
| "grad_norm": 0.32171040773391724, | |
| "learning_rate": 0.00015854588290714999, | |
| "loss": 0.5816, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.46715328467153283, | |
| "grad_norm": 0.3992040157318115, | |
| "learning_rate": 0.00015850589653288642, | |
| "loss": 0.591, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.46939921392476136, | |
| "grad_norm": 0.38158226013183594, | |
| "learning_rate": 0.00015846537298366242, | |
| "loss": 0.5831, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.4716451431779899, | |
| "grad_norm": 0.32366326451301575, | |
| "learning_rate": 0.0001584243125367595, | |
| "loss": 0.5822, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4738910724312184, | |
| "grad_norm": 0.41187676787376404, | |
| "learning_rate": 0.00015838271547313293, | |
| "loss": 0.6027, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.47613700168444695, | |
| "grad_norm": 0.48473531007766724, | |
| "learning_rate": 0.00015834058207740974, | |
| "loss": 0.5819, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4783829309376755, | |
| "grad_norm": 0.3934939205646515, | |
| "learning_rate": 0.00015829791263788682, | |
| "loss": 0.6042, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.480628860190904, | |
| "grad_norm": 0.32344624400138855, | |
| "learning_rate": 0.00015825470744652894, | |
| "loss": 0.5717, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.48287478944413254, | |
| "grad_norm": 0.27189725637435913, | |
| "learning_rate": 0.0001582109667989667, | |
| "loss": 0.6015, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.485120718697361, | |
| "grad_norm": 0.349128395318985, | |
| "learning_rate": 0.00015816669099449454, | |
| "loss": 0.6037, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.48736664795058954, | |
| "grad_norm": 0.3456957936286926, | |
| "learning_rate": 0.00015812188033606877, | |
| "loss": 0.5974, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.48961257720381807, | |
| "grad_norm": 0.29926273226737976, | |
| "learning_rate": 0.00015807653513030538, | |
| "loss": 0.6, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4918585064570466, | |
| "grad_norm": 0.3260749280452728, | |
| "learning_rate": 0.00015803065568747798, | |
| "loss": 0.5955, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4941044357102751, | |
| "grad_norm": 0.4071785509586334, | |
| "learning_rate": 0.00015798424232151573, | |
| "loss": 0.5899, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.49635036496350365, | |
| "grad_norm": 0.37568220496177673, | |
| "learning_rate": 0.00015793729535000108, | |
| "loss": 0.6008, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4985962942167322, | |
| "grad_norm": 0.4158768355846405, | |
| "learning_rate": 0.00015788981509416773, | |
| "loss": 0.5897, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5008422234699607, | |
| "grad_norm": 0.44514065980911255, | |
| "learning_rate": 0.00015784180187889833, | |
| "loss": 0.5807, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5030881527231892, | |
| "grad_norm": 0.37475013732910156, | |
| "learning_rate": 0.00015779325603272232, | |
| "loss": 0.586, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5053340819764177, | |
| "grad_norm": 0.4093579649925232, | |
| "learning_rate": 0.0001577441778878136, | |
| "loss": 0.5966, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5075800112296462, | |
| "grad_norm": 0.4048860967159271, | |
| "learning_rate": 0.00015769456777998842, | |
| "loss": 0.6107, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5098259404828748, | |
| "grad_norm": 0.31557515263557434, | |
| "learning_rate": 0.00015764442604870285, | |
| "loss": 0.609, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.5120718697361033, | |
| "grad_norm": 0.33514130115509033, | |
| "learning_rate": 0.0001575937530370507, | |
| "loss": 0.5866, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5143177989893318, | |
| "grad_norm": 0.3601367771625519, | |
| "learning_rate": 0.0001575425490917609, | |
| "loss": 0.586, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5165637282425604, | |
| "grad_norm": 0.3701965808868408, | |
| "learning_rate": 0.00015749081456319544, | |
| "loss": 0.5755, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5188096574957889, | |
| "grad_norm": 0.3042786419391632, | |
| "learning_rate": 0.0001574385498053468, | |
| "loss": 0.5978, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5210555867490174, | |
| "grad_norm": 0.33692997694015503, | |
| "learning_rate": 0.00015738575517583542, | |
| "loss": 0.6078, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.523301516002246, | |
| "grad_norm": 0.36644524335861206, | |
| "learning_rate": 0.00015733243103590748, | |
| "loss": 0.575, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5255474452554745, | |
| "grad_norm": 0.3802548050880432, | |
| "learning_rate": 0.00015727857775043227, | |
| "loss": 0.6041, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.527793374508703, | |
| "grad_norm": 0.37767213582992554, | |
| "learning_rate": 0.00015722419568789983, | |
| "loss": 0.591, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5300393037619315, | |
| "grad_norm": 0.38524535298347473, | |
| "learning_rate": 0.00015716928522041825, | |
| "loss": 0.601, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5322852330151601, | |
| "grad_norm": 0.4806888699531555, | |
| "learning_rate": 0.00015711384672371126, | |
| "loss": 0.5935, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5345311622683886, | |
| "grad_norm": 0.3960248827934265, | |
| "learning_rate": 0.0001570578805771156, | |
| "loss": 0.5789, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5367770915216171, | |
| "grad_norm": 0.29831525683403015, | |
| "learning_rate": 0.00015700138716357852, | |
| "loss": 0.5917, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5390230207748455, | |
| "grad_norm": 0.30690401792526245, | |
| "learning_rate": 0.00015694436686965497, | |
| "loss": 0.5819, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5412689500280741, | |
| "grad_norm": 0.30107825994491577, | |
| "learning_rate": 0.00015688682008550514, | |
| "loss": 0.5965, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5435148792813026, | |
| "grad_norm": 0.30696406960487366, | |
| "learning_rate": 0.0001568287472048917, | |
| "loss": 0.6025, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5457608085345311, | |
| "grad_norm": 0.32280731201171875, | |
| "learning_rate": 0.00015677014862517714, | |
| "loss": 0.5868, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5480067377877597, | |
| "grad_norm": 0.31739377975463867, | |
| "learning_rate": 0.000156711024747321, | |
| "loss": 0.5898, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5502526670409882, | |
| "grad_norm": 0.3620510995388031, | |
| "learning_rate": 0.0001566513759758772, | |
| "loss": 0.5621, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5524985962942167, | |
| "grad_norm": 0.26646366715431213, | |
| "learning_rate": 0.00015659120271899118, | |
| "loss": 0.5731, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5547445255474452, | |
| "grad_norm": 0.3814524710178375, | |
| "learning_rate": 0.00015653050538839722, | |
| "loss": 0.5947, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5569904548006738, | |
| "grad_norm": 0.4031396210193634, | |
| "learning_rate": 0.00015646928439941557, | |
| "loss": 0.612, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5592363840539023, | |
| "grad_norm": 0.38268253207206726, | |
| "learning_rate": 0.00015640754017094954, | |
| "loss": 0.5792, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5614823133071308, | |
| "grad_norm": 0.37941139936447144, | |
| "learning_rate": 0.0001563452731254827, | |
| "loss": 0.6071, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5637282425603594, | |
| "grad_norm": 0.3618276119232178, | |
| "learning_rate": 0.00015628248368907603, | |
| "loss": 0.5776, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5659741718135879, | |
| "grad_norm": 0.3906313180923462, | |
| "learning_rate": 0.000156219172291365, | |
| "loss": 0.5732, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5682201010668164, | |
| "grad_norm": 0.4234972894191742, | |
| "learning_rate": 0.0001561553393655564, | |
| "loss": 0.5674, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.570466030320045, | |
| "grad_norm": 0.4400922954082489, | |
| "learning_rate": 0.00015609098534842582, | |
| "loss": 0.5894, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5727119595732735, | |
| "grad_norm": 0.38799750804901123, | |
| "learning_rate": 0.0001560261106803142, | |
| "loss": 0.5833, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.574957888826502, | |
| "grad_norm": 0.31524044275283813, | |
| "learning_rate": 0.00015596071580512515, | |
| "loss": 0.5841, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5772038180797305, | |
| "grad_norm": 0.3451038599014282, | |
| "learning_rate": 0.00015589480117032174, | |
| "loss": 0.6003, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5794497473329591, | |
| "grad_norm": 0.3648560047149658, | |
| "learning_rate": 0.00015582836722692346, | |
| "loss": 0.5787, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5816956765861875, | |
| "grad_norm": 0.37476226687431335, | |
| "learning_rate": 0.00015576141442950317, | |
| "loss": 0.5719, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 0.33187106251716614, | |
| "learning_rate": 0.00015569394323618403, | |
| "loss": 0.5785, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5861875350926445, | |
| "grad_norm": 0.36073818802833557, | |
| "learning_rate": 0.00015562595410863626, | |
| "loss": 0.5965, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5884334643458731, | |
| "grad_norm": 0.3586486577987671, | |
| "learning_rate": 0.00015555744751207404, | |
| "loss": 0.5857, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5906793935991016, | |
| "grad_norm": 0.44820883870124817, | |
| "learning_rate": 0.0001554884239152523, | |
| "loss": 0.5804, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5929253228523301, | |
| "grad_norm": 0.43128344416618347, | |
| "learning_rate": 0.00015541888379046366, | |
| "loss": 0.5613, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5951712521055587, | |
| "grad_norm": 0.38606396317481995, | |
| "learning_rate": 0.0001553488276135349, | |
| "loss": 0.5958, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5974171813587872, | |
| "grad_norm": 0.36493563652038574, | |
| "learning_rate": 0.0001552782558638239, | |
| "loss": 0.5663, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5996631106120157, | |
| "grad_norm": 0.40545809268951416, | |
| "learning_rate": 0.00015520716902421648, | |
| "loss": 0.5934, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.6019090398652442, | |
| "grad_norm": 0.42288488149642944, | |
| "learning_rate": 0.00015513556758112282, | |
| "loss": 0.5729, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6041549691184728, | |
| "grad_norm": 0.2895568311214447, | |
| "learning_rate": 0.00015506345202447432, | |
| "loss": 0.6046, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.6064008983717013, | |
| "grad_norm": 0.3440837562084198, | |
| "learning_rate": 0.00015499082284772017, | |
| "loss": 0.5654, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6086468276249298, | |
| "grad_norm": 0.36002352833747864, | |
| "learning_rate": 0.00015491768054782395, | |
| "loss": 0.5923, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.6108927568781584, | |
| "grad_norm": 0.28700196743011475, | |
| "learning_rate": 0.00015484402562526036, | |
| "loss": 0.5826, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6131386861313869, | |
| "grad_norm": 0.32599133253097534, | |
| "learning_rate": 0.0001547698585840117, | |
| "loss": 0.5783, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.37215182185173035, | |
| "learning_rate": 0.00015469517993156435, | |
| "loss": 0.583, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.617630544637844, | |
| "grad_norm": 0.3325370252132416, | |
| "learning_rate": 0.0001546199901789055, | |
| "loss": 0.582, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6198764738910725, | |
| "grad_norm": 0.3477807939052582, | |
| "learning_rate": 0.00015454428984051937, | |
| "loss": 0.5726, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.622122403144301, | |
| "grad_norm": 0.37678489089012146, | |
| "learning_rate": 0.000154468079434384, | |
| "loss": 0.5786, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.6243683323975294, | |
| "grad_norm": 0.3045758008956909, | |
| "learning_rate": 0.00015439135948196756, | |
| "loss": 0.5829, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.626614261650758, | |
| "grad_norm": 0.3221797049045563, | |
| "learning_rate": 0.0001543141305082246, | |
| "loss": 0.5811, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.6288601909039865, | |
| "grad_norm": 0.35202842950820923, | |
| "learning_rate": 0.00015423639304159288, | |
| "loss": 0.5655, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.631106120157215, | |
| "grad_norm": 0.2838123142719269, | |
| "learning_rate": 0.00015415814761398936, | |
| "loss": 0.5991, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6333520494104435, | |
| "grad_norm": 0.33944493532180786, | |
| "learning_rate": 0.0001540793947608067, | |
| "loss": 0.5764, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6355979786636721, | |
| "grad_norm": 0.29667678475379944, | |
| "learning_rate": 0.0001540001350209097, | |
| "loss": 0.5745, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6378439079169006, | |
| "grad_norm": 0.37716934084892273, | |
| "learning_rate": 0.00015392036893663148, | |
| "loss": 0.5739, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6400898371701291, | |
| "grad_norm": 0.3955274522304535, | |
| "learning_rate": 0.00015384009705376978, | |
| "loss": 0.574, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6423357664233577, | |
| "grad_norm": 0.29740408062934875, | |
| "learning_rate": 0.00015375931992158331, | |
| "loss": 0.567, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6445816956765862, | |
| "grad_norm": 0.3198919892311096, | |
| "learning_rate": 0.0001536780380927879, | |
| "loss": 0.5672, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6468276249298147, | |
| "grad_norm": 0.3355892598628998, | |
| "learning_rate": 0.0001535962521235528, | |
| "loss": 0.57, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6490735541830432, | |
| "grad_norm": 0.32803425192832947, | |
| "learning_rate": 0.00015351396257349675, | |
| "loss": 0.5839, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6513194834362718, | |
| "grad_norm": 0.3538999557495117, | |
| "learning_rate": 0.00015343117000568432, | |
| "loss": 0.5864, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6535654126895003, | |
| "grad_norm": 0.3156984746456146, | |
| "learning_rate": 0.00015334787498662192, | |
| "loss": 0.5872, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6558113419427288, | |
| "grad_norm": 0.336056113243103, | |
| "learning_rate": 0.00015326407808625395, | |
| "loss": 0.578, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6580572711959574, | |
| "grad_norm": 0.3894708454608917, | |
| "learning_rate": 0.00015317977987795898, | |
| "loss": 0.5682, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6603032004491859, | |
| "grad_norm": 0.3500683605670929, | |
| "learning_rate": 0.00015309498093854577, | |
| "loss": 0.5934, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6625491297024144, | |
| "grad_norm": 0.331767201423645, | |
| "learning_rate": 0.00015300968184824926, | |
| "loss": 0.5781, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.664795058955643, | |
| "grad_norm": 0.4042721092700958, | |
| "learning_rate": 0.0001529238831907267, | |
| "loss": 0.5811, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6670409882088714, | |
| "grad_norm": 0.2907451093196869, | |
| "learning_rate": 0.00015283758555305362, | |
| "loss": 0.5925, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6692869174620999, | |
| "grad_norm": 0.28044381737709045, | |
| "learning_rate": 0.0001527507895257198, | |
| "loss": 0.5717, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6715328467153284, | |
| "grad_norm": 0.2812747359275818, | |
| "learning_rate": 0.00015266349570262528, | |
| "loss": 0.5796, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.673778775968557, | |
| "grad_norm": 0.28039273619651794, | |
| "learning_rate": 0.00015257570468107617, | |
| "loss": 0.5682, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6760247052217855, | |
| "grad_norm": 0.2821033000946045, | |
| "learning_rate": 0.00015248741706178073, | |
| "loss": 0.5939, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.678270634475014, | |
| "grad_norm": 0.31085771322250366, | |
| "learning_rate": 0.0001523986334488452, | |
| "loss": 0.5829, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6805165637282425, | |
| "grad_norm": 0.31658798456192017, | |
| "learning_rate": 0.00015230935444976955, | |
| "loss": 0.6073, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.6827624929814711, | |
| "grad_norm": 0.28057488799095154, | |
| "learning_rate": 0.00015221958067544348, | |
| "loss": 0.5888, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6850084222346996, | |
| "grad_norm": 0.29499179124832153, | |
| "learning_rate": 0.00015212931274014214, | |
| "loss": 0.5713, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6872543514879281, | |
| "grad_norm": 0.31696656346321106, | |
| "learning_rate": 0.00015203855126152204, | |
| "loss": 0.5956, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6895002807411567, | |
| "grad_norm": 0.2905656695365906, | |
| "learning_rate": 0.00015194729686061672, | |
| "loss": 0.56, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6917462099943852, | |
| "grad_norm": 0.33711618185043335, | |
| "learning_rate": 0.00015185555016183246, | |
| "loss": 0.5816, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6939921392476137, | |
| "grad_norm": 0.3962436616420746, | |
| "learning_rate": 0.00015176331179294416, | |
| "loss": 0.5933, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6962380685008422, | |
| "grad_norm": 0.2827875316143036, | |
| "learning_rate": 0.00015167058238509093, | |
| "loss": 0.5529, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6984839977540708, | |
| "grad_norm": 0.252986878156662, | |
| "learning_rate": 0.00015157736257277182, | |
| "loss": 0.5915, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 0.28363773226737976, | |
| "learning_rate": 0.00015148365299384145, | |
| "loss": 0.5621, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.7029758562605278, | |
| "grad_norm": 0.26527139544487, | |
| "learning_rate": 0.00015138945428950566, | |
| "loss": 0.5791, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.7052217855137564, | |
| "grad_norm": 0.24393455684185028, | |
| "learning_rate": 0.0001512947671043171, | |
| "loss": 0.5549, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7074677147669849, | |
| "grad_norm": 0.24904131889343262, | |
| "learning_rate": 0.00015119959208617092, | |
| "loss": 0.5627, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.7097136440202133, | |
| "grad_norm": 0.3018868863582611, | |
| "learning_rate": 0.00015110392988630016, | |
| "loss": 0.5802, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.7119595732734418, | |
| "grad_norm": 0.34517163038253784, | |
| "learning_rate": 0.0001510077811592714, | |
| "loss": 0.5831, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.7142055025266704, | |
| "grad_norm": 0.3295687437057495, | |
| "learning_rate": 0.00015091114656298033, | |
| "loss": 0.5978, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7164514317798989, | |
| "grad_norm": 0.3116067945957184, | |
| "learning_rate": 0.00015081402675864717, | |
| "loss": 0.58, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.7186973610331274, | |
| "grad_norm": 0.2843012809753418, | |
| "learning_rate": 0.00015071642241081212, | |
| "loss": 0.5837, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.720943290286356, | |
| "grad_norm": 0.27185961604118347, | |
| "learning_rate": 0.00015061833418733095, | |
| "loss": 0.5746, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.7231892195395845, | |
| "grad_norm": 0.26890790462493896, | |
| "learning_rate": 0.00015051976275937023, | |
| "loss": 0.5642, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.725435148792813, | |
| "grad_norm": 0.29379114508628845, | |
| "learning_rate": 0.00015042070880140292, | |
| "loss": 0.5796, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.7276810780460415, | |
| "grad_norm": 0.2906297743320465, | |
| "learning_rate": 0.0001503211729912037, | |
| "loss": 0.5666, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "grad_norm": 0.2815559506416321, | |
| "learning_rate": 0.00015022115600984423, | |
| "loss": 0.5582, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7321729365524986, | |
| "grad_norm": 0.3286380469799042, | |
| "learning_rate": 0.0001501206585416886, | |
| "loss": 0.5462, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7344188658057271, | |
| "grad_norm": 0.3522341549396515, | |
| "learning_rate": 0.00015001968127438872, | |
| "loss": 0.5654, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.7366647950589557, | |
| "grad_norm": 0.33905208110809326, | |
| "learning_rate": 0.00014991822489887938, | |
| "loss": 0.5606, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7389107243121842, | |
| "grad_norm": 0.29921072721481323, | |
| "learning_rate": 0.00014981629010937372, | |
| "loss": 0.5772, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.7411566535654127, | |
| "grad_norm": 0.2822812497615814, | |
| "learning_rate": 0.00014971387760335841, | |
| "loss": 0.5772, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7434025828186412, | |
| "grad_norm": 0.3244154155254364, | |
| "learning_rate": 0.0001496109880815889, | |
| "loss": 0.5736, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.7456485120718698, | |
| "grad_norm": 0.3305480480194092, | |
| "learning_rate": 0.0001495076222480846, | |
| "loss": 0.586, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7478944413250983, | |
| "grad_norm": 0.3018239140510559, | |
| "learning_rate": 0.00014940378081012407, | |
| "loss": 0.579, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.7501403705783268, | |
| "grad_norm": 0.3692958652973175, | |
| "learning_rate": 0.00014929946447824014, | |
| "loss": 0.5767, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7523862998315554, | |
| "grad_norm": 0.3724178373813629, | |
| "learning_rate": 0.00014919467396621523, | |
| "loss": 0.5721, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7546322290847838, | |
| "grad_norm": 0.3226647973060608, | |
| "learning_rate": 0.00014908940999107615, | |
| "loss": 0.553, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7568781583380123, | |
| "grad_norm": 0.28518086671829224, | |
| "learning_rate": 0.00014898367327308945, | |
| "loss": 0.566, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7591240875912408, | |
| "grad_norm": 0.2642190158367157, | |
| "learning_rate": 0.0001488774645357565, | |
| "loss": 0.5732, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7613700168444694, | |
| "grad_norm": 0.2713199555873871, | |
| "learning_rate": 0.0001487707845058083, | |
| "loss": 0.5679, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.7636159460976979, | |
| "grad_norm": 0.28339532017707825, | |
| "learning_rate": 0.00014866363391320076, | |
| "loss": 0.5664, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7658618753509264, | |
| "grad_norm": 0.26976078748703003, | |
| "learning_rate": 0.0001485560134911096, | |
| "loss": 0.5917, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.768107804604155, | |
| "grad_norm": 0.31055644154548645, | |
| "learning_rate": 0.00014844792397592524, | |
| "loss": 0.5609, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7703537338573835, | |
| "grad_norm": 0.28089481592178345, | |
| "learning_rate": 0.000148339366107248, | |
| "loss": 0.5553, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.772599663110612, | |
| "grad_norm": 0.3059735894203186, | |
| "learning_rate": 0.00014823034062788282, | |
| "loss": 0.5827, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7748455923638405, | |
| "grad_norm": 0.3540654480457306, | |
| "learning_rate": 0.00014812084828383425, | |
| "loss": 0.5417, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7770915216170691, | |
| "grad_norm": 0.3125968277454376, | |
| "learning_rate": 0.0001480108898243014, | |
| "loss": 0.5676, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7793374508702976, | |
| "grad_norm": 0.2534315884113312, | |
| "learning_rate": 0.0001479004660016727, | |
| "loss": 0.5724, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7815833801235261, | |
| "grad_norm": 0.30985814332962036, | |
| "learning_rate": 0.0001477895775715209, | |
| "loss": 0.5682, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7838293093767547, | |
| "grad_norm": 0.334831178188324, | |
| "learning_rate": 0.00014767822529259772, | |
| "loss": 0.5653, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.7860752386299832, | |
| "grad_norm": 0.29639920592308044, | |
| "learning_rate": 0.00014756640992682883, | |
| "loss": 0.5959, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7883211678832117, | |
| "grad_norm": 0.33278346061706543, | |
| "learning_rate": 0.00014745413223930858, | |
| "loss": 0.57, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7905670971364402, | |
| "grad_norm": 0.26434555649757385, | |
| "learning_rate": 0.00014734139299829466, | |
| "loss": 0.5847, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7928130263896688, | |
| "grad_norm": 0.295564204454422, | |
| "learning_rate": 0.00014722819297520296, | |
| "loss": 0.5345, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7950589556428973, | |
| "grad_norm": 0.32043787837028503, | |
| "learning_rate": 0.00014711453294460235, | |
| "loss": 0.5751, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7973048848961257, | |
| "grad_norm": 0.35145339369773865, | |
| "learning_rate": 0.00014700041368420914, | |
| "loss": 0.5782, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7995508141493542, | |
| "grad_norm": 0.2663813531398773, | |
| "learning_rate": 0.00014688583597488204, | |
| "loss": 0.5457, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.8017967434025828, | |
| "grad_norm": 0.3394940197467804, | |
| "learning_rate": 0.00014677080060061662, | |
| "loss": 0.5669, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.8040426726558113, | |
| "grad_norm": 0.28702473640441895, | |
| "learning_rate": 0.00014665530834854002, | |
| "loss": 0.5715, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.8062886019090398, | |
| "grad_norm": 0.3419654071331024, | |
| "learning_rate": 0.0001465393600089056, | |
| "loss": 0.5804, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.8085345311622684, | |
| "grad_norm": 0.35292762517929077, | |
| "learning_rate": 0.00014642295637508742, | |
| "loss": 0.5666, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8107804604154969, | |
| "grad_norm": 0.31325843930244446, | |
| "learning_rate": 0.00014630609824357494, | |
| "loss": 0.5857, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.8130263896687254, | |
| "grad_norm": 0.27262774109840393, | |
| "learning_rate": 0.00014618878641396748, | |
| "loss": 0.5797, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.815272318921954, | |
| "grad_norm": 0.2780674397945404, | |
| "learning_rate": 0.00014607102168896882, | |
| "loss": 0.5552, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.8175182481751825, | |
| "grad_norm": 0.2732245922088623, | |
| "learning_rate": 0.00014595280487438158, | |
| "loss": 0.5716, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.819764177428411, | |
| "grad_norm": 0.33612555265426636, | |
| "learning_rate": 0.0001458341367791019, | |
| "loss": 0.5756, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.8220101066816395, | |
| "grad_norm": 0.267904669046402, | |
| "learning_rate": 0.0001457150182151137, | |
| "loss": 0.5694, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8242560359348681, | |
| "grad_norm": 0.2547987401485443, | |
| "learning_rate": 0.0001455954499974833, | |
| "loss": 0.5718, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.8265019651880966, | |
| "grad_norm": 0.2813619375228882, | |
| "learning_rate": 0.00014547543294435376, | |
| "loss": 0.5521, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8287478944413251, | |
| "grad_norm": 0.2692398428916931, | |
| "learning_rate": 0.0001453549678769392, | |
| "loss": 0.5644, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.8309938236945537, | |
| "grad_norm": 0.24875199794769287, | |
| "learning_rate": 0.0001452340556195194, | |
| "loss": 0.5563, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8332397529477822, | |
| "grad_norm": 0.24863849580287933, | |
| "learning_rate": 0.00014511269699943392, | |
| "loss": 0.5479, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.8354856822010107, | |
| "grad_norm": 0.2492000311613083, | |
| "learning_rate": 0.00014499089284707658, | |
| "loss": 0.5742, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8377316114542392, | |
| "grad_norm": 0.2373623251914978, | |
| "learning_rate": 0.0001448686439958898, | |
| "loss": 0.5688, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.8399775407074677, | |
| "grad_norm": 0.265248566865921, | |
| "learning_rate": 0.00014474595128235876, | |
| "loss": 0.5616, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8422234699606962, | |
| "grad_norm": 0.2871013879776001, | |
| "learning_rate": 0.00014462281554600577, | |
| "loss": 0.556, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8444693992139247, | |
| "grad_norm": 0.31418806314468384, | |
| "learning_rate": 0.00014449923762938462, | |
| "loss": 0.5644, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8467153284671532, | |
| "grad_norm": 0.3332020044326782, | |
| "learning_rate": 0.00014437521837807455, | |
| "loss": 0.5611, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.8489612577203818, | |
| "grad_norm": 0.2672823965549469, | |
| "learning_rate": 0.00014425075864067473, | |
| "loss": 0.5575, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8512071869736103, | |
| "grad_norm": 0.23632559180259705, | |
| "learning_rate": 0.00014412585926879833, | |
| "loss": 0.578, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.8534531162268388, | |
| "grad_norm": 0.31967830657958984, | |
| "learning_rate": 0.00014400052111706668, | |
| "loss": 0.5738, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8556990454800674, | |
| "grad_norm": 0.3274000287055969, | |
| "learning_rate": 0.0001438747450431035, | |
| "loss": 0.5606, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8579449747332959, | |
| "grad_norm": 0.32115650177001953, | |
| "learning_rate": 0.00014374853190752892, | |
| "loss": 0.601, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8601909039865244, | |
| "grad_norm": 0.3195722997188568, | |
| "learning_rate": 0.00014362188257395367, | |
| "loss": 0.5794, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.862436833239753, | |
| "grad_norm": 0.32217174768447876, | |
| "learning_rate": 0.00014349479790897325, | |
| "loss": 0.5687, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8646827624929815, | |
| "grad_norm": 0.3338417410850525, | |
| "learning_rate": 0.00014336727878216178, | |
| "loss": 0.5513, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.86692869174621, | |
| "grad_norm": 0.2939014732837677, | |
| "learning_rate": 0.00014323932606606624, | |
| "loss": 0.5845, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8691746209994385, | |
| "grad_norm": 0.34269392490386963, | |
| "learning_rate": 0.00014311094063620036, | |
| "loss": 0.5721, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.8714205502526671, | |
| "grad_norm": 0.3684992492198944, | |
| "learning_rate": 0.00014298212337103888, | |
| "loss": 0.5924, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8736664795058956, | |
| "grad_norm": 0.27671441435813904, | |
| "learning_rate": 0.0001428528751520112, | |
| "loss": 0.5536, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 0.3508271276950836, | |
| "learning_rate": 0.0001427231968634955, | |
| "loss": 0.5499, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8781583380123527, | |
| "grad_norm": 0.3735405504703522, | |
| "learning_rate": 0.00014259308939281292, | |
| "loss": 0.5472, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8804042672655812, | |
| "grad_norm": 0.30313754081726074, | |
| "learning_rate": 0.00014246255363022095, | |
| "loss": 0.5598, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8826501965188096, | |
| "grad_norm": 0.28613924980163574, | |
| "learning_rate": 0.00014233159046890792, | |
| "loss": 0.5589, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.8848961257720381, | |
| "grad_norm": 0.3396552503108978, | |
| "learning_rate": 0.00014220020080498648, | |
| "loss": 0.5722, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8871420550252667, | |
| "grad_norm": 0.24562208354473114, | |
| "learning_rate": 0.00014206838553748773, | |
| "loss": 0.5617, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8893879842784952, | |
| "grad_norm": 0.26731806993484497, | |
| "learning_rate": 0.00014193614556835482, | |
| "loss": 0.5876, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8916339135317237, | |
| "grad_norm": 0.30024391412734985, | |
| "learning_rate": 0.00014180348180243706, | |
| "loss": 0.5457, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8938798427849522, | |
| "grad_norm": 0.23074807226657867, | |
| "learning_rate": 0.0001416703951474834, | |
| "loss": 0.5767, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8961257720381808, | |
| "grad_norm": 0.2882399260997772, | |
| "learning_rate": 0.00014153688651413662, | |
| "loss": 0.548, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8983717012914093, | |
| "grad_norm": 0.30070793628692627, | |
| "learning_rate": 0.00014140295681592667, | |
| "loss": 0.5483, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9006176305446378, | |
| "grad_norm": 0.261349081993103, | |
| "learning_rate": 0.00014126860696926473, | |
| "loss": 0.5568, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.9028635597978664, | |
| "grad_norm": 0.2514486610889435, | |
| "learning_rate": 0.00014113383789343686, | |
| "loss": 0.5656, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.9051094890510949, | |
| "grad_norm": 0.28470492362976074, | |
| "learning_rate": 0.00014099865051059765, | |
| "loss": 0.5877, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.9073554183043234, | |
| "grad_norm": 0.28581055998802185, | |
| "learning_rate": 0.00014086304574576394, | |
| "loss": 0.5703, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.909601347557552, | |
| "grad_norm": 0.22891870141029358, | |
| "learning_rate": 0.00014072702452680848, | |
| "loss": 0.5631, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.9118472768107805, | |
| "grad_norm": 0.27686670422554016, | |
| "learning_rate": 0.00014059058778445363, | |
| "loss": 0.542, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.914093206064009, | |
| "grad_norm": 0.27244243025779724, | |
| "learning_rate": 0.000140453736452265, | |
| "loss": 0.5444, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.9163391353172375, | |
| "grad_norm": 0.2376582771539688, | |
| "learning_rate": 0.00014031647146664494, | |
| "loss": 0.5624, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9185850645704661, | |
| "grad_norm": 0.29739177227020264, | |
| "learning_rate": 0.00014017879376682627, | |
| "loss": 0.5579, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.9208309938236946, | |
| "grad_norm": 0.24522463977336884, | |
| "learning_rate": 0.00014004070429486575, | |
| "loss": 0.5778, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.28420501947402954, | |
| "learning_rate": 0.00013990220399563775, | |
| "loss": 0.582, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.9253228523301515, | |
| "grad_norm": 0.3128701150417328, | |
| "learning_rate": 0.0001397632938168277, | |
| "loss": 0.5597, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.9275687815833801, | |
| "grad_norm": 0.2503584921360016, | |
| "learning_rate": 0.0001396239747089255, | |
| "loss": 0.557, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.9298147108366086, | |
| "grad_norm": 0.2346629947423935, | |
| "learning_rate": 0.00013948424762521937, | |
| "loss": 0.5567, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9320606400898371, | |
| "grad_norm": 0.26330509781837463, | |
| "learning_rate": 0.00013934411352178888, | |
| "loss": 0.5556, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.9343065693430657, | |
| "grad_norm": 0.2683373689651489, | |
| "learning_rate": 0.00013920357335749873, | |
| "loss": 0.5585, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.9365524985962942, | |
| "grad_norm": 0.21568100154399872, | |
| "learning_rate": 0.0001390626280939921, | |
| "loss": 0.5837, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.9387984278495227, | |
| "grad_norm": 0.2531348168849945, | |
| "learning_rate": 0.00013892127869568396, | |
| "loss": 0.5505, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9410443571027512, | |
| "grad_norm": 0.2542068362236023, | |
| "learning_rate": 0.00013877952612975465, | |
| "loss": 0.5834, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.9432902863559798, | |
| "grad_norm": 0.22806115448474884, | |
| "learning_rate": 0.00013863737136614318, | |
| "loss": 0.5648, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9455362156092083, | |
| "grad_norm": 0.3068370819091797, | |
| "learning_rate": 0.00013849481537754054, | |
| "loss": 0.5488, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.9477821448624368, | |
| "grad_norm": 0.31988707184791565, | |
| "learning_rate": 0.00013835185913938305, | |
| "loss": 0.5679, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9500280741156654, | |
| "grad_norm": 0.2480153888463974, | |
| "learning_rate": 0.00013820850362984585, | |
| "loss": 0.5481, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.9522740033688939, | |
| "grad_norm": 0.2835778295993805, | |
| "learning_rate": 0.00013806474982983602, | |
| "loss": 0.5575, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9545199326221224, | |
| "grad_norm": 0.28375929594039917, | |
| "learning_rate": 0.0001379205987229859, | |
| "loss": 0.5522, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.956765861875351, | |
| "grad_norm": 0.32451605796813965, | |
| "learning_rate": 0.00013777605129564649, | |
| "loss": 0.5531, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9590117911285795, | |
| "grad_norm": 0.2671431005001068, | |
| "learning_rate": 0.00013763110853688053, | |
| "loss": 0.5597, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.961257720381808, | |
| "grad_norm": 0.31597959995269775, | |
| "learning_rate": 0.0001374857714384558, | |
| "loss": 0.5668, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9635036496350365, | |
| "grad_norm": 0.3307490050792694, | |
| "learning_rate": 0.00013734004099483842, | |
| "loss": 0.5412, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9657495788882651, | |
| "grad_norm": 0.2594424784183502, | |
| "learning_rate": 0.00013719391820318585, | |
| "loss": 0.534, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9679955081414935, | |
| "grad_norm": 0.24068522453308105, | |
| "learning_rate": 0.00013704740406334027, | |
| "loss": 0.567, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.970241437394722, | |
| "grad_norm": 0.27227339148521423, | |
| "learning_rate": 0.00013690049957782162, | |
| "loss": 0.55, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9724873666479505, | |
| "grad_norm": 0.2331283837556839, | |
| "learning_rate": 0.0001367532057518208, | |
| "loss": 0.5296, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.9747332959011791, | |
| "grad_norm": 0.25519710779190063, | |
| "learning_rate": 0.00013660552359319274, | |
| "loss": 0.5759, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9769792251544076, | |
| "grad_norm": 0.25783583521842957, | |
| "learning_rate": 0.0001364574541124495, | |
| "loss": 0.5642, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9792251544076361, | |
| "grad_norm": 0.2404668927192688, | |
| "learning_rate": 0.00013630899832275348, | |
| "loss": 0.5566, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9814710836608647, | |
| "grad_norm": 0.29142558574676514, | |
| "learning_rate": 0.00013616015723991027, | |
| "loss": 0.5666, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9837170129140932, | |
| "grad_norm": 0.2782052755355835, | |
| "learning_rate": 0.00013601093188236188, | |
| "loss": 0.5507, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9859629421673217, | |
| "grad_norm": 0.21326802670955658, | |
| "learning_rate": 0.00013586132327117974, | |
| "loss": 0.5685, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9882088714205502, | |
| "grad_norm": 0.25155940651893616, | |
| "learning_rate": 0.00013571133243005763, | |
| "loss": 0.5803, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9904548006737788, | |
| "grad_norm": 0.2218320518732071, | |
| "learning_rate": 0.00013556096038530474, | |
| "loss": 0.5488, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9927007299270073, | |
| "grad_norm": 0.27737680077552795, | |
| "learning_rate": 0.00013541020816583869, | |
| "loss": 0.5651, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9949466591802358, | |
| "grad_norm": 0.2509002387523651, | |
| "learning_rate": 0.00013525907680317836, | |
| "loss": 0.5525, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9971925884334644, | |
| "grad_norm": 0.25884488224983215, | |
| "learning_rate": 0.000135107567331437, | |
| "loss": 0.567, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9994385176866929, | |
| "grad_norm": 0.2978728711605072, | |
| "learning_rate": 0.00013495568078731495, | |
| "loss": 0.5405, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.0016844469399213, | |
| "grad_norm": 0.31027480959892273, | |
| "learning_rate": 0.00013480341821009277, | |
| "loss": 0.5251, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.00393037619315, | |
| "grad_norm": 0.3249771296977997, | |
| "learning_rate": 0.00013465078064162393, | |
| "loss": 0.5197, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.0061763054463784, | |
| "grad_norm": 0.330244243144989, | |
| "learning_rate": 0.00013449776912632784, | |
| "loss": 0.5177, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.008422234699607, | |
| "grad_norm": 0.31560900807380676, | |
| "learning_rate": 0.00013434438471118262, | |
| "loss": 0.5108, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.0106681639528354, | |
| "grad_norm": 0.32275712490081787, | |
| "learning_rate": 0.00013419062844571784, | |
| "loss": 0.498, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.012914093206064, | |
| "grad_norm": 0.30424079298973083, | |
| "learning_rate": 0.0001340365013820077, | |
| "loss": 0.5394, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.0151600224592925, | |
| "grad_norm": 0.26794448494911194, | |
| "learning_rate": 0.00013388200457466326, | |
| "loss": 0.4944, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0174059517125211, | |
| "grad_norm": 0.31360936164855957, | |
| "learning_rate": 0.00013372713908082578, | |
| "loss": 0.5062, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.0196518809657495, | |
| "grad_norm": 0.33009976148605347, | |
| "learning_rate": 0.00013357190596015919, | |
| "loss": 0.5105, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0218978102189782, | |
| "grad_norm": 0.2470821887254715, | |
| "learning_rate": 0.00013341630627484286, | |
| "loss": 0.5185, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.0241437394722066, | |
| "grad_norm": 0.304426908493042, | |
| "learning_rate": 0.00013326034108956437, | |
| "loss": 0.5292, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.0263896687254352, | |
| "grad_norm": 0.3242713510990143, | |
| "learning_rate": 0.0001331040114715123, | |
| "loss": 0.5214, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.0286355979786637, | |
| "grad_norm": 0.31412094831466675, | |
| "learning_rate": 0.00013294731849036875, | |
| "loss": 0.5106, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0308815272318923, | |
| "grad_norm": 0.27217480540275574, | |
| "learning_rate": 0.0001327902632183022, | |
| "loss": 0.5344, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.0331274564851207, | |
| "grad_norm": 0.2789839208126068, | |
| "learning_rate": 0.00013263284672996009, | |
| "loss": 0.521, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.0353733857383491, | |
| "grad_norm": 0.27859795093536377, | |
| "learning_rate": 0.00013247507010246144, | |
| "loss": 0.5316, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.0376193149915778, | |
| "grad_norm": 0.30018481612205505, | |
| "learning_rate": 0.00013231693441538952, | |
| "loss": 0.5083, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0398652442448062, | |
| "grad_norm": 0.2683006525039673, | |
| "learning_rate": 0.0001321584407507845, | |
| "loss": 0.5378, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.0421111734980348, | |
| "grad_norm": 0.27185767889022827, | |
| "learning_rate": 0.000131999590193136, | |
| "loss": 0.5117, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0443571027512633, | |
| "grad_norm": 0.2839741110801697, | |
| "learning_rate": 0.0001318403838293756, | |
| "loss": 0.5282, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.046603032004492, | |
| "grad_norm": 0.2537892460823059, | |
| "learning_rate": 0.00013168082274886953, | |
| "loss": 0.5096, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0488489612577203, | |
| "grad_norm": 0.2625972032546997, | |
| "learning_rate": 0.00013152090804341118, | |
| "loss": 0.5188, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.051094890510949, | |
| "grad_norm": 0.3052925169467926, | |
| "learning_rate": 0.00013136064080721354, | |
| "loss": 0.5409, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0533408197641774, | |
| "grad_norm": 0.2866557538509369, | |
| "learning_rate": 0.00013120002213690192, | |
| "loss": 0.5101, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.055586749017406, | |
| "grad_norm": 0.26804205775260925, | |
| "learning_rate": 0.00013103905313150617, | |
| "loss": 0.5221, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0578326782706344, | |
| "grad_norm": 0.2677738070487976, | |
| "learning_rate": 0.00013087773489245334, | |
| "loss": 0.5203, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.060078607523863, | |
| "grad_norm": 0.273448646068573, | |
| "learning_rate": 0.00013071606852356013, | |
| "loss": 0.5349, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0623245367770915, | |
| "grad_norm": 0.27046024799346924, | |
| "learning_rate": 0.00013055405513102533, | |
| "loss": 0.5132, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.0645704660303201, | |
| "grad_norm": 0.25829020142555237, | |
| "learning_rate": 0.00013039169582342215, | |
| "loss": 0.4968, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0668163952835485, | |
| "grad_norm": 0.27012374997138977, | |
| "learning_rate": 0.0001302289917116908, | |
| "loss": 0.5166, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0690623245367772, | |
| "grad_norm": 0.2819938063621521, | |
| "learning_rate": 0.00013006594390913077, | |
| "loss": 0.5238, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0713082537900056, | |
| "grad_norm": 0.24958448112010956, | |
| "learning_rate": 0.00012990255353139324, | |
| "loss": 0.5031, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.073554183043234, | |
| "grad_norm": 0.23778881132602692, | |
| "learning_rate": 0.0001297388216964735, | |
| "loss": 0.5297, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0758001122964627, | |
| "grad_norm": 0.25948163866996765, | |
| "learning_rate": 0.00012957474952470313, | |
| "loss": 0.5146, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.078046041549691, | |
| "grad_norm": 0.22898133099079132, | |
| "learning_rate": 0.00012941033813874264, | |
| "loss": 0.5137, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0802919708029197, | |
| "grad_norm": 0.2507185637950897, | |
| "learning_rate": 0.00012924558866357343, | |
| "loss": 0.5241, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.0825379000561481, | |
| "grad_norm": 0.2403927892446518, | |
| "learning_rate": 0.00012908050222649043, | |
| "loss": 0.5036, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0847838293093768, | |
| "grad_norm": 0.23922879993915558, | |
| "learning_rate": 0.00012891507995709412, | |
| "loss": 0.528, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0870297585626052, | |
| "grad_norm": 0.2286342829465866, | |
| "learning_rate": 0.00012874932298728286, | |
| "loss": 0.5202, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0892756878158338, | |
| "grad_norm": 0.258478045463562, | |
| "learning_rate": 0.00012858323245124538, | |
| "loss": 0.5041, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.0915216170690623, | |
| "grad_norm": 0.27987441420555115, | |
| "learning_rate": 0.0001284168094854526, | |
| "loss": 0.5021, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.093767546322291, | |
| "grad_norm": 0.22872576117515564, | |
| "learning_rate": 0.00012825005522865027, | |
| "loss": 0.5243, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.0960134755755193, | |
| "grad_norm": 0.22990728914737701, | |
| "learning_rate": 0.00012808297082185087, | |
| "loss": 0.5186, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.098259404828748, | |
| "grad_norm": 0.21057239174842834, | |
| "learning_rate": 0.000127915557408326, | |
| "loss": 0.5074, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.1005053340819764, | |
| "grad_norm": 0.2562633752822876, | |
| "learning_rate": 0.00012774781613359841, | |
| "loss": 0.5205, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.102751263335205, | |
| "grad_norm": 0.23108799755573273, | |
| "learning_rate": 0.0001275797481454343, | |
| "loss": 0.5289, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.1049971925884334, | |
| "grad_norm": 0.2631300389766693, | |
| "learning_rate": 0.00012741135459383543, | |
| "loss": 0.5198, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.107243121841662, | |
| "grad_norm": 0.2443421483039856, | |
| "learning_rate": 0.00012724263663103108, | |
| "loss": 0.535, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.1094890510948905, | |
| "grad_norm": 0.22926633059978485, | |
| "learning_rate": 0.00012707359541147043, | |
| "loss": 0.4935, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.1117349803481191, | |
| "grad_norm": 0.25909942388534546, | |
| "learning_rate": 0.00012690423209181452, | |
| "loss": 0.4998, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.1139809096013475, | |
| "grad_norm": 0.24831925332546234, | |
| "learning_rate": 0.0001267345478309283, | |
| "loss": 0.5246, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.1162268388545762, | |
| "grad_norm": 0.26700034737586975, | |
| "learning_rate": 0.00012656454378987282, | |
| "loss": 0.5276, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.1184727681078046, | |
| "grad_norm": 0.24582357704639435, | |
| "learning_rate": 0.00012639422113189713, | |
| "loss": 0.5274, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.120718697361033, | |
| "grad_norm": 0.2464480996131897, | |
| "learning_rate": 0.00012622358102243054, | |
| "loss": 0.514, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.1229646266142617, | |
| "grad_norm": 0.28942957520484924, | |
| "learning_rate": 0.0001260526246290744, | |
| "loss": 0.5216, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.12521055586749, | |
| "grad_norm": 0.29417484998703003, | |
| "learning_rate": 0.00012588135312159427, | |
| "loss": 0.5214, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.1274564851207187, | |
| "grad_norm": 0.27026209235191345, | |
| "learning_rate": 0.00012570976767191188, | |
| "loss": 0.5206, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1297024143739471, | |
| "grad_norm": 0.2554686963558197, | |
| "learning_rate": 0.0001255378694540971, | |
| "loss": 0.5285, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.1319483436271758, | |
| "grad_norm": 0.28773826360702515, | |
| "learning_rate": 0.00012536565964435986, | |
| "loss": 0.4933, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1341942728804042, | |
| "grad_norm": 0.28885528445243835, | |
| "learning_rate": 0.00012519313942104224, | |
| "loss": 0.5392, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.1364402021336328, | |
| "grad_norm": 0.31166213750839233, | |
| "learning_rate": 0.00012502030996461023, | |
| "loss": 0.5333, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.1386861313868613, | |
| "grad_norm": 0.3064601719379425, | |
| "learning_rate": 0.00012484717245764585, | |
| "loss": 0.5261, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.14093206064009, | |
| "grad_norm": 0.3036741018295288, | |
| "learning_rate": 0.00012467372808483882, | |
| "loss": 0.5309, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.1431779898933183, | |
| "grad_norm": 0.2402871996164322, | |
| "learning_rate": 0.00012449997803297866, | |
| "loss": 0.4906, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.145423919146547, | |
| "grad_norm": 0.26572084426879883, | |
| "learning_rate": 0.0001243259234909465, | |
| "loss": 0.5152, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1476698483997754, | |
| "grad_norm": 0.26166555285453796, | |
| "learning_rate": 0.00012415156564970687, | |
| "loss": 0.5266, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.149915777653004, | |
| "grad_norm": 0.26020121574401855, | |
| "learning_rate": 0.0001239769057022997, | |
| "loss": 0.5063, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.1521617069062324, | |
| "grad_norm": 0.2840318977832794, | |
| "learning_rate": 0.00012380194484383201, | |
| "loss": 0.5301, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.154407636159461, | |
| "grad_norm": 0.2320166826248169, | |
| "learning_rate": 0.00012362668427146986, | |
| "loss": 0.5074, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1566535654126895, | |
| "grad_norm": 0.26712101697921753, | |
| "learning_rate": 0.00012345112518443008, | |
| "loss": 0.5247, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.158899494665918, | |
| "grad_norm": 0.2772868871688843, | |
| "learning_rate": 0.000123275268783972, | |
| "loss": 0.5113, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1611454239191465, | |
| "grad_norm": 0.23757833242416382, | |
| "learning_rate": 0.00012309911627338943, | |
| "loss": 0.5383, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.1633913531723752, | |
| "grad_norm": 0.24388740956783295, | |
| "learning_rate": 0.00012292266885800221, | |
| "loss": 0.5404, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1656372824256036, | |
| "grad_norm": 0.32931777834892273, | |
| "learning_rate": 0.00012274592774514812, | |
| "loss": 0.5304, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.167883211678832, | |
| "grad_norm": 0.2616422176361084, | |
| "learning_rate": 0.00012256889414417456, | |
| "loss": 0.5111, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1701291409320607, | |
| "grad_norm": 0.20813870429992676, | |
| "learning_rate": 0.0001223915692664302, | |
| "loss": 0.4817, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.172375070185289, | |
| "grad_norm": 0.2631247639656067, | |
| "learning_rate": 0.00012221395432525687, | |
| "loss": 0.5119, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1746209994385177, | |
| "grad_norm": 0.22986264526844025, | |
| "learning_rate": 0.0001220360505359811, | |
| "loss": 0.5136, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.1768669286917461, | |
| "grad_norm": 0.23806849122047424, | |
| "learning_rate": 0.00012185785911590583, | |
| "loss": 0.5247, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1791128579449748, | |
| "grad_norm": 0.2917364537715912, | |
| "learning_rate": 0.00012167938128430216, | |
| "loss": 0.5286, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.1813587871982032, | |
| "grad_norm": 0.24546997249126434, | |
| "learning_rate": 0.00012150061826240091, | |
| "loss": 0.5197, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1836047164514318, | |
| "grad_norm": 0.22644369304180145, | |
| "learning_rate": 0.00012132157127338435, | |
| "loss": 0.5369, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.1858506457046603, | |
| "grad_norm": 0.2547290623188019, | |
| "learning_rate": 0.00012114224154237777, | |
| "loss": 0.5108, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.188096574957889, | |
| "grad_norm": 0.2384437471628189, | |
| "learning_rate": 0.00012096263029644112, | |
| "loss": 0.528, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.1903425042111173, | |
| "grad_norm": 0.2654406726360321, | |
| "learning_rate": 0.0001207827387645606, | |
| "loss": 0.5179, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.192588433464346, | |
| "grad_norm": 0.19757139682769775, | |
| "learning_rate": 0.00012060256817764025, | |
| "loss": 0.5126, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.1948343627175744, | |
| "grad_norm": 0.21663667261600494, | |
| "learning_rate": 0.00012042211976849356, | |
| "loss": 0.5136, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.197080291970803, | |
| "grad_norm": 0.21993404626846313, | |
| "learning_rate": 0.00012024139477183504, | |
| "loss": 0.5185, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.1993262212240314, | |
| "grad_norm": 0.2317759096622467, | |
| "learning_rate": 0.00012006039442427167, | |
| "loss": 0.5139, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.20157215047726, | |
| "grad_norm": 0.21483832597732544, | |
| "learning_rate": 0.0001198791199642946, | |
| "loss": 0.5231, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.2038180797304885, | |
| "grad_norm": 0.2653373181819916, | |
| "learning_rate": 0.0001196975726322705, | |
| "loss": 0.5177, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.206064008983717, | |
| "grad_norm": 0.19980397820472717, | |
| "learning_rate": 0.00011951575367043321, | |
| "loss": 0.5081, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.2083099382369455, | |
| "grad_norm": 0.2335788607597351, | |
| "learning_rate": 0.00011933366432287522, | |
| "loss": 0.5283, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.210555867490174, | |
| "grad_norm": 0.20896217226982117, | |
| "learning_rate": 0.00011915130583553906, | |
| "loss": 0.5009, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.2128017967434026, | |
| "grad_norm": 0.2064492404460907, | |
| "learning_rate": 0.00011896867945620891, | |
| "loss": 0.5072, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.215047725996631, | |
| "grad_norm": 0.22994771599769592, | |
| "learning_rate": 0.00011878578643450191, | |
| "loss": 0.506, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.2172936552498597, | |
| "grad_norm": 0.21593116223812103, | |
| "learning_rate": 0.00011860262802185982, | |
| "loss": 0.5304, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.219539584503088, | |
| "grad_norm": 0.21689918637275696, | |
| "learning_rate": 0.0001184192054715402, | |
| "loss": 0.5163, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.2217855137563167, | |
| "grad_norm": 0.20837046205997467, | |
| "learning_rate": 0.00011823552003860805, | |
| "loss": 0.5247, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2240314430095451, | |
| "grad_norm": 0.2125036120414734, | |
| "learning_rate": 0.00011805157297992715, | |
| "loss": 0.5118, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.2262773722627738, | |
| "grad_norm": 0.21233297884464264, | |
| "learning_rate": 0.00011786736555415134, | |
| "loss": 0.5091, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2285233015160022, | |
| "grad_norm": 0.2236490547657013, | |
| "learning_rate": 0.00011768289902171612, | |
| "loss": 0.5168, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 0.2149861603975296, | |
| "learning_rate": 0.00011749817464482995, | |
| "loss": 0.5221, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2330151600224593, | |
| "grad_norm": 0.23652967810630798, | |
| "learning_rate": 0.00011731319368746545, | |
| "loss": 0.5132, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.235261089275688, | |
| "grad_norm": 0.2397671788930893, | |
| "learning_rate": 0.00011712795741535098, | |
| "loss": 0.5085, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2375070185289163, | |
| "grad_norm": 0.1940278857946396, | |
| "learning_rate": 0.00011694246709596195, | |
| "loss": 0.5429, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.239752947782145, | |
| "grad_norm": 0.24372558295726776, | |
| "learning_rate": 0.00011675672399851188, | |
| "loss": 0.5091, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2419988770353734, | |
| "grad_norm": 0.21898634731769562, | |
| "learning_rate": 0.00011657072939394413, | |
| "loss": 0.5164, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.2442448062886018, | |
| "grad_norm": 0.2210114300251007, | |
| "learning_rate": 0.00011638448455492287, | |
| "loss": 0.5133, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2464907355418304, | |
| "grad_norm": 0.2156367301940918, | |
| "learning_rate": 0.00011619799075582452, | |
| "loss": 0.5109, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.248736664795059, | |
| "grad_norm": 0.1969204545021057, | |
| "learning_rate": 0.00011601124927272906, | |
| "loss": 0.5143, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.2509825940482875, | |
| "grad_norm": 0.19980621337890625, | |
| "learning_rate": 0.00011582426138341111, | |
| "loss": 0.5087, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.253228523301516, | |
| "grad_norm": 0.2064458578824997, | |
| "learning_rate": 0.00011563702836733152, | |
| "loss": 0.505, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2554744525547445, | |
| "grad_norm": 0.24166250228881836, | |
| "learning_rate": 0.00011544955150562819, | |
| "loss": 0.5204, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.2577203818079732, | |
| "grad_norm": 0.251028448343277, | |
| "learning_rate": 0.0001152618320811077, | |
| "loss": 0.5071, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.2599663110612016, | |
| "grad_norm": 0.1982237845659256, | |
| "learning_rate": 0.0001150738713782363, | |
| "loss": 0.5059, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.26221224031443, | |
| "grad_norm": 0.26162639260292053, | |
| "learning_rate": 0.00011488567068313114, | |
| "loss": 0.5172, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2644581695676587, | |
| "grad_norm": 0.2098427712917328, | |
| "learning_rate": 0.0001146972312835516, | |
| "loss": 0.5135, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.266704098820887, | |
| "grad_norm": 0.2430814802646637, | |
| "learning_rate": 0.00011450855446889031, | |
| "loss": 0.5125, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2689500280741157, | |
| "grad_norm": 0.21262916922569275, | |
| "learning_rate": 0.00011431964153016444, | |
| "loss": 0.5114, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.2711959573273441, | |
| "grad_norm": 0.20545636117458344, | |
| "learning_rate": 0.00011413049376000686, | |
| "loss": 0.5095, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2734418865805728, | |
| "grad_norm": 0.23621973395347595, | |
| "learning_rate": 0.00011394111245265724, | |
| "loss": 0.5231, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.2756878158338012, | |
| "grad_norm": 0.21574462950229645, | |
| "learning_rate": 0.00011375149890395321, | |
| "loss": 0.5292, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2779337450870298, | |
| "grad_norm": 0.22070422768592834, | |
| "learning_rate": 0.00011356165441132152, | |
| "loss": 0.5157, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.2801796743402583, | |
| "grad_norm": 0.19420836865901947, | |
| "learning_rate": 0.00011337158027376918, | |
| "loss": 0.5179, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2824256035934867, | |
| "grad_norm": 0.26924458146095276, | |
| "learning_rate": 0.0001131812777918745, | |
| "loss": 0.5408, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.2846715328467153, | |
| "grad_norm": 0.22928448021411896, | |
| "learning_rate": 0.00011299074826777824, | |
| "loss": 0.5146, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.286917462099944, | |
| "grad_norm": 0.24480290710926056, | |
| "learning_rate": 0.00011279999300517471, | |
| "loss": 0.5151, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.2891633913531724, | |
| "grad_norm": 0.2365870326757431, | |
| "learning_rate": 0.0001126090133093028, | |
| "loss": 0.5088, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2914093206064008, | |
| "grad_norm": 0.2634016275405884, | |
| "learning_rate": 0.0001124178104869371, | |
| "loss": 0.519, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.2936552498596294, | |
| "grad_norm": 0.275654673576355, | |
| "learning_rate": 0.00011222638584637897, | |
| "loss": 0.5276, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.295901179112858, | |
| "grad_norm": 0.2414851039648056, | |
| "learning_rate": 0.00011203474069744747, | |
| "loss": 0.4996, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.2981471083660865, | |
| "grad_norm": 0.23619700968265533, | |
| "learning_rate": 0.00011184287635147058, | |
| "loss": 0.5116, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.300393037619315, | |
| "grad_norm": 0.25254112482070923, | |
| "learning_rate": 0.00011165079412127607, | |
| "loss": 0.5133, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.3026389668725435, | |
| "grad_norm": 0.21320711076259613, | |
| "learning_rate": 0.00011145849532118258, | |
| "loss": 0.5049, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.304884896125772, | |
| "grad_norm": 0.24191851913928986, | |
| "learning_rate": 0.00011126598126699068, | |
| "loss": 0.5226, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.3071308253790006, | |
| "grad_norm": 0.20547953248023987, | |
| "learning_rate": 0.00011107325327597372, | |
| "loss": 0.5196, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.309376754632229, | |
| "grad_norm": 0.2211044281721115, | |
| "learning_rate": 0.00011088031266686902, | |
| "loss": 0.5135, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.3116226838854577, | |
| "grad_norm": 0.226315438747406, | |
| "learning_rate": 0.00011068716075986863, | |
| "loss": 0.5155, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.313868613138686, | |
| "grad_norm": 0.1992364525794983, | |
| "learning_rate": 0.00011049379887661044, | |
| "loss": 0.5135, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.3161145423919147, | |
| "grad_norm": 0.20736606419086456, | |
| "learning_rate": 0.00011030022834016916, | |
| "loss": 0.5107, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.3183604716451431, | |
| "grad_norm": 0.20780953764915466, | |
| "learning_rate": 0.00011010645047504712, | |
| "loss": 0.5072, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.3206064008983718, | |
| "grad_norm": 0.20156902074813843, | |
| "learning_rate": 0.0001099124666071653, | |
| "loss": 0.5037, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3228523301516002, | |
| "grad_norm": 0.18280163407325745, | |
| "learning_rate": 0.00010971827806385431, | |
| "loss": 0.5308, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.3250982594048288, | |
| "grad_norm": 0.20286300778388977, | |
| "learning_rate": 0.00010952388617384519, | |
| "loss": 0.5239, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.3273441886580573, | |
| "grad_norm": 0.20476078987121582, | |
| "learning_rate": 0.00010932929226726041, | |
| "loss": 0.5339, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.3295901179112857, | |
| "grad_norm": 0.19983462989330292, | |
| "learning_rate": 0.00010913449767560468, | |
| "loss": 0.5166, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.3318360471645143, | |
| "grad_norm": 0.22195865213871002, | |
| "learning_rate": 0.00010893950373175597, | |
| "loss": 0.514, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.334081976417743, | |
| "grad_norm": 0.20715545117855072, | |
| "learning_rate": 0.00010874431176995627, | |
| "loss": 0.5296, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3363279056709714, | |
| "grad_norm": 0.21173766255378723, | |
| "learning_rate": 0.00010854892312580249, | |
| "loss": 0.4918, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.3385738349241998, | |
| "grad_norm": 0.2034001350402832, | |
| "learning_rate": 0.0001083533391362374, | |
| "loss": 0.5176, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3408197641774284, | |
| "grad_norm": 0.23540934920310974, | |
| "learning_rate": 0.00010815756113954031, | |
| "loss": 0.5145, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.343065693430657, | |
| "grad_norm": 0.19440345466136932, | |
| "learning_rate": 0.00010796159047531811, | |
| "loss": 0.5167, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3453116226838855, | |
| "grad_norm": 0.2172805666923523, | |
| "learning_rate": 0.00010776542848449602, | |
| "loss": 0.5235, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.347557551937114, | |
| "grad_norm": 0.19153092801570892, | |
| "learning_rate": 0.00010756907650930831, | |
| "loss": 0.4961, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3498034811903425, | |
| "grad_norm": 0.2150796353816986, | |
| "learning_rate": 0.00010737253589328933, | |
| "loss": 0.5154, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.352049410443571, | |
| "grad_norm": 0.21939396858215332, | |
| "learning_rate": 0.0001071758079812641, | |
| "loss": 0.5387, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3542953396967996, | |
| "grad_norm": 0.20470492541790009, | |
| "learning_rate": 0.00010697889411933928, | |
| "loss": 0.4978, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.356541268950028, | |
| "grad_norm": 0.21058504283428192, | |
| "learning_rate": 0.00010678179565489388, | |
| "loss": 0.5096, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.3587871982032567, | |
| "grad_norm": 0.1950283795595169, | |
| "learning_rate": 0.00010658451393656999, | |
| "loss": 0.5089, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.361033127456485, | |
| "grad_norm": 0.21830430626869202, | |
| "learning_rate": 0.00010638705031426371, | |
| "loss": 0.4892, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3632790567097137, | |
| "grad_norm": 0.19007915258407593, | |
| "learning_rate": 0.00010618940613911576, | |
| "loss": 0.5309, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.3655249859629421, | |
| "grad_norm": 0.20983009040355682, | |
| "learning_rate": 0.0001059915827635022, | |
| "loss": 0.5171, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.3677709152161706, | |
| "grad_norm": 0.20747217535972595, | |
| "learning_rate": 0.00010579358154102548, | |
| "loss": 0.4915, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.3700168444693992, | |
| "grad_norm": 0.20381350815296173, | |
| "learning_rate": 0.00010559540382650474, | |
| "loss": 0.503, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3722627737226278, | |
| "grad_norm": 0.2014596313238144, | |
| "learning_rate": 0.00010539705097596689, | |
| "loss": 0.5124, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.3745087029758563, | |
| "grad_norm": 0.2117050141096115, | |
| "learning_rate": 0.00010519852434663721, | |
| "loss": 0.4996, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3767546322290847, | |
| "grad_norm": 0.21098558604717255, | |
| "learning_rate": 0.00010499982529692996, | |
| "loss": 0.492, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.3790005614823133, | |
| "grad_norm": 0.22107858955860138, | |
| "learning_rate": 0.00010480095518643929, | |
| "loss": 0.5165, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.381246490735542, | |
| "grad_norm": 0.22238287329673767, | |
| "learning_rate": 0.00010460191537592977, | |
| "loss": 0.5095, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.3834924199887704, | |
| "grad_norm": 0.20342691242694855, | |
| "learning_rate": 0.00010440270722732714, | |
| "loss": 0.5141, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3857383492419988, | |
| "grad_norm": 0.22299018502235413, | |
| "learning_rate": 0.00010420333210370903, | |
| "loss": 0.5133, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.3879842784952274, | |
| "grad_norm": 0.20717273652553558, | |
| "learning_rate": 0.00010400379136929557, | |
| "loss": 0.5143, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3902302077484558, | |
| "grad_norm": 0.20377473533153534, | |
| "learning_rate": 0.00010380408638944007, | |
| "loss": 0.4835, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.3924761370016845, | |
| "grad_norm": 0.22891288995742798, | |
| "learning_rate": 0.00010360421853061966, | |
| "loss": 0.5122, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.394722066254913, | |
| "grad_norm": 0.19375132024288177, | |
| "learning_rate": 0.00010340418916042603, | |
| "loss": 0.5052, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3969679955081415, | |
| "grad_norm": 0.191814586520195, | |
| "learning_rate": 0.00010320399964755596, | |
| "loss": 0.4988, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.39921392476137, | |
| "grad_norm": 0.1985396444797516, | |
| "learning_rate": 0.00010300365136180201, | |
| "loss": 0.5049, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.4014598540145986, | |
| "grad_norm": 0.18780378997325897, | |
| "learning_rate": 0.0001028031456740432, | |
| "loss": 0.5002, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.403705783267827, | |
| "grad_norm": 0.21660645306110382, | |
| "learning_rate": 0.00010260248395623548, | |
| "loss": 0.5184, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.4059517125210557, | |
| "grad_norm": 0.19068920612335205, | |
| "learning_rate": 0.00010240166758140245, | |
| "loss": 0.5032, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.408197641774284, | |
| "grad_norm": 0.2113179713487625, | |
| "learning_rate": 0.00010220069792362601, | |
| "loss": 0.5152, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.4104435710275127, | |
| "grad_norm": 0.18784399330615997, | |
| "learning_rate": 0.00010199957635803684, | |
| "loss": 0.5261, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.4126895002807411, | |
| "grad_norm": 0.1969737708568573, | |
| "learning_rate": 0.00010179830426080504, | |
| "loss": 0.5152, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.4149354295339696, | |
| "grad_norm": 0.18799488246440887, | |
| "learning_rate": 0.00010159688300913076, | |
| "loss": 0.5111, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.4171813587871982, | |
| "grad_norm": 0.18792767822742462, | |
| "learning_rate": 0.0001013953139812347, | |
| "loss": 0.5092, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.4194272880404268, | |
| "grad_norm": 0.21675904095172882, | |
| "learning_rate": 0.00010119359855634876, | |
| "loss": 0.5076, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.4216732172936553, | |
| "grad_norm": 0.19109146296977997, | |
| "learning_rate": 0.00010099173811470652, | |
| "loss": 0.507, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.4239191465468837, | |
| "grad_norm": 0.1930873841047287, | |
| "learning_rate": 0.00010078973403753383, | |
| "loss": 0.5195, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.4261650758001123, | |
| "grad_norm": 0.18737006187438965, | |
| "learning_rate": 0.00010058758770703938, | |
| "loss": 0.5233, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.428411005053341, | |
| "grad_norm": 0.1958773285150528, | |
| "learning_rate": 0.00010038530050640522, | |
| "loss": 0.5031, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.4306569343065694, | |
| "grad_norm": 0.18015055358409882, | |
| "learning_rate": 0.00010018287381977732, | |
| "loss": 0.5138, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.4329028635597978, | |
| "grad_norm": 0.18713940680027008, | |
| "learning_rate": 9.998030903225603e-05, | |
| "loss": 0.5084, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.4351487928130264, | |
| "grad_norm": 0.20459598302841187, | |
| "learning_rate": 9.977760752988671e-05, | |
| "loss": 0.5409, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.4373947220662548, | |
| "grad_norm": 0.17716822028160095, | |
| "learning_rate": 9.957477069965018e-05, | |
| "loss": 0.509, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4396406513194835, | |
| "grad_norm": 0.1981070339679718, | |
| "learning_rate": 9.93717999294532e-05, | |
| "loss": 0.4953, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.441886580572712, | |
| "grad_norm": 0.19121180474758148, | |
| "learning_rate": 9.916869660811906e-05, | |
| "loss": 0.5109, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4441325098259405, | |
| "grad_norm": 0.20929452776908875, | |
| "learning_rate": 9.896546212537793e-05, | |
| "loss": 0.517, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.446378439079169, | |
| "grad_norm": 0.19593368470668793, | |
| "learning_rate": 9.87620978718576e-05, | |
| "loss": 0.5071, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.4486243683323976, | |
| "grad_norm": 0.21035808324813843, | |
| "learning_rate": 9.855860523907372e-05, | |
| "loss": 0.5198, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.450870297585626, | |
| "grad_norm": 0.19853971898555756, | |
| "learning_rate": 9.835498561942036e-05, | |
| "loss": 0.5437, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4531162268388544, | |
| "grad_norm": 0.1949443370103836, | |
| "learning_rate": 9.815124040616056e-05, | |
| "loss": 0.5076, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.455362156092083, | |
| "grad_norm": 0.20280544459819794, | |
| "learning_rate": 9.794737099341664e-05, | |
| "loss": 0.5093, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.4576080853453117, | |
| "grad_norm": 0.21078361570835114, | |
| "learning_rate": 9.774337877616083e-05, | |
| "loss": 0.5081, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.4598540145985401, | |
| "grad_norm": 0.1961338371038437, | |
| "learning_rate": 9.753926515020567e-05, | |
| "loss": 0.5096, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4620999438517686, | |
| "grad_norm": 0.19009891152381897, | |
| "learning_rate": 9.733503151219433e-05, | |
| "loss": 0.4999, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.4643458731049972, | |
| "grad_norm": 0.18627040088176727, | |
| "learning_rate": 9.713067925959126e-05, | |
| "loss": 0.5056, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4665918023582258, | |
| "grad_norm": 0.1938895285129547, | |
| "learning_rate": 9.692620979067245e-05, | |
| "loss": 0.5137, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.4688377316114543, | |
| "grad_norm": 0.2050761729478836, | |
| "learning_rate": 9.672162450451602e-05, | |
| "loss": 0.5051, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4710836608646827, | |
| "grad_norm": 0.19880592823028564, | |
| "learning_rate": 9.651692480099251e-05, | |
| "loss": 0.5055, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.4733295901179113, | |
| "grad_norm": 0.18447960913181305, | |
| "learning_rate": 9.631211208075534e-05, | |
| "loss": 0.5296, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4755755193711397, | |
| "grad_norm": 0.19004195928573608, | |
| "learning_rate": 9.610718774523137e-05, | |
| "loss": 0.5258, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.4778214486243684, | |
| "grad_norm": 0.19954320788383484, | |
| "learning_rate": 9.590215319661097e-05, | |
| "loss": 0.5011, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4800673778775968, | |
| "grad_norm": 0.17005719244480133, | |
| "learning_rate": 9.569700983783885e-05, | |
| "loss": 0.5062, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.4823133071308254, | |
| "grad_norm": 0.21068550646305084, | |
| "learning_rate": 9.549175907260415e-05, | |
| "loss": 0.5044, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4845592363840538, | |
| "grad_norm": 0.18736523389816284, | |
| "learning_rate": 9.528640230533093e-05, | |
| "loss": 0.521, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.4868051656372825, | |
| "grad_norm": 0.19477304816246033, | |
| "learning_rate": 9.508094094116863e-05, | |
| "loss": 0.5065, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.489051094890511, | |
| "grad_norm": 0.20427975058555603, | |
| "learning_rate": 9.48753763859823e-05, | |
| "loss": 0.5208, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.4912970241437395, | |
| "grad_norm": 0.20408067107200623, | |
| "learning_rate": 9.466971004634316e-05, | |
| "loss": 0.4917, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.493542953396968, | |
| "grad_norm": 0.22063596546649933, | |
| "learning_rate": 9.446394332951885e-05, | |
| "loss": 0.5097, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.4957888826501966, | |
| "grad_norm": 0.20878678560256958, | |
| "learning_rate": 9.425807764346383e-05, | |
| "loss": 0.505, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.498034811903425, | |
| "grad_norm": 0.21228721737861633, | |
| "learning_rate": 9.405211439680975e-05, | |
| "loss": 0.5249, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.5002807411566534, | |
| "grad_norm": 0.21478019654750824, | |
| "learning_rate": 9.384605499885586e-05, | |
| "loss": 0.516, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.502526670409882, | |
| "grad_norm": 0.23727190494537354, | |
| "learning_rate": 9.363990085955929e-05, | |
| "loss": 0.5128, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.5047725996631107, | |
| "grad_norm": 0.211452454328537, | |
| "learning_rate": 9.343365338952544e-05, | |
| "loss": 0.5141, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.5070185289163391, | |
| "grad_norm": 0.24813149869441986, | |
| "learning_rate": 9.322731399999829e-05, | |
| "loss": 0.5286, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.5092644581695676, | |
| "grad_norm": 0.19929581880569458, | |
| "learning_rate": 9.302088410285084e-05, | |
| "loss": 0.5065, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.5115103874227962, | |
| "grad_norm": 0.23539748787879944, | |
| "learning_rate": 9.281436511057538e-05, | |
| "loss": 0.5045, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.5137563166760248, | |
| "grad_norm": 0.18617475032806396, | |
| "learning_rate": 9.260775843627378e-05, | |
| "loss": 0.4943, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.5160022459292533, | |
| "grad_norm": 0.22366289794445038, | |
| "learning_rate": 9.24010654936479e-05, | |
| "loss": 0.5136, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.5182481751824817, | |
| "grad_norm": 0.21610277891159058, | |
| "learning_rate": 9.219428769698991e-05, | |
| "loss": 0.4968, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.5204941044357103, | |
| "grad_norm": 0.19368857145309448, | |
| "learning_rate": 9.198742646117254e-05, | |
| "loss": 0.5129, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.522740033688939, | |
| "grad_norm": 0.20865383744239807, | |
| "learning_rate": 9.178048320163954e-05, | |
| "loss": 0.5136, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.5249859629421674, | |
| "grad_norm": 0.18743731081485748, | |
| "learning_rate": 9.15734593343958e-05, | |
| "loss": 0.5149, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.5272318921953958, | |
| "grad_norm": 0.22473086416721344, | |
| "learning_rate": 9.136635627599783e-05, | |
| "loss": 0.5155, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5294778214486242, | |
| "grad_norm": 0.1838371306657791, | |
| "learning_rate": 9.115917544354398e-05, | |
| "loss": 0.5102, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.5317237507018528, | |
| "grad_norm": 0.19203968346118927, | |
| "learning_rate": 9.095191825466481e-05, | |
| "loss": 0.5225, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.5339696799550815, | |
| "grad_norm": 0.21374920010566711, | |
| "learning_rate": 9.074458612751329e-05, | |
| "loss": 0.5165, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.53621560920831, | |
| "grad_norm": 0.19073887169361115, | |
| "learning_rate": 9.053718048075516e-05, | |
| "loss": 0.5082, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.21084338426589966, | |
| "learning_rate": 9.032970273355926e-05, | |
| "loss": 0.4975, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.540707467714767, | |
| "grad_norm": 0.20061564445495605, | |
| "learning_rate": 9.012215430558776e-05, | |
| "loss": 0.5048, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5429533969679956, | |
| "grad_norm": 0.17530708014965057, | |
| "learning_rate": 8.991453661698641e-05, | |
| "loss": 0.51, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.545199326221224, | |
| "grad_norm": 0.2152005285024643, | |
| "learning_rate": 8.970685108837497e-05, | |
| "loss": 0.5224, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5474452554744524, | |
| "grad_norm": 0.1882491558790207, | |
| "learning_rate": 8.949909914083732e-05, | |
| "loss": 0.5271, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.549691184727681, | |
| "grad_norm": 0.21567484736442566, | |
| "learning_rate": 8.92912821959118e-05, | |
| "loss": 0.5156, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5519371139809097, | |
| "grad_norm": 0.19783969223499298, | |
| "learning_rate": 8.908340167558154e-05, | |
| "loss": 0.4966, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.5541830432341381, | |
| "grad_norm": 0.20946729183197021, | |
| "learning_rate": 8.88754590022647e-05, | |
| "loss": 0.4923, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5564289724873666, | |
| "grad_norm": 0.19118967652320862, | |
| "learning_rate": 8.866745559880464e-05, | |
| "loss": 0.5136, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.5586749017405952, | |
| "grad_norm": 0.2122071534395218, | |
| "learning_rate": 8.845939288846032e-05, | |
| "loss": 0.5155, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5609208309938238, | |
| "grad_norm": 0.1733548641204834, | |
| "learning_rate": 8.825127229489653e-05, | |
| "loss": 0.4971, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.5631667602470523, | |
| "grad_norm": 0.2194015234708786, | |
| "learning_rate": 8.804309524217408e-05, | |
| "loss": 0.4942, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5654126895002807, | |
| "grad_norm": 0.1795753836631775, | |
| "learning_rate": 8.783486315474008e-05, | |
| "loss": 0.5032, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.5676586187535093, | |
| "grad_norm": 0.21514686942100525, | |
| "learning_rate": 8.762657745741831e-05, | |
| "loss": 0.5036, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5699045480067377, | |
| "grad_norm": 0.20286062359809875, | |
| "learning_rate": 8.741823957539926e-05, | |
| "loss": 0.5097, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.5721504772599664, | |
| "grad_norm": 0.19607621431350708, | |
| "learning_rate": 8.720985093423053e-05, | |
| "loss": 0.498, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5743964065131948, | |
| "grad_norm": 0.23368516564369202, | |
| "learning_rate": 8.700141295980711e-05, | |
| "loss": 0.529, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.5766423357664232, | |
| "grad_norm": 0.21203581988811493, | |
| "learning_rate": 8.679292707836149e-05, | |
| "loss": 0.4959, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5788882650196518, | |
| "grad_norm": 0.26587924361228943, | |
| "learning_rate": 8.658439471645391e-05, | |
| "loss": 0.5201, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.5811341942728805, | |
| "grad_norm": 0.1834084540605545, | |
| "learning_rate": 8.637581730096275e-05, | |
| "loss": 0.504, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.583380123526109, | |
| "grad_norm": 0.24840541183948517, | |
| "learning_rate": 8.616719625907463e-05, | |
| "loss": 0.5149, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.5856260527793373, | |
| "grad_norm": 0.18650217354297638, | |
| "learning_rate": 8.595853301827469e-05, | |
| "loss": 0.4866, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.587871982032566, | |
| "grad_norm": 0.21472761034965515, | |
| "learning_rate": 8.574982900633676e-05, | |
| "loss": 0.513, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.5901179112857946, | |
| "grad_norm": 0.20243674516677856, | |
| "learning_rate": 8.554108565131373e-05, | |
| "loss": 0.5073, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.592363840539023, | |
| "grad_norm": 0.18156473338603973, | |
| "learning_rate": 8.533230438152765e-05, | |
| "loss": 0.5117, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.5946097697922514, | |
| "grad_norm": 0.18785932660102844, | |
| "learning_rate": 8.512348662555996e-05, | |
| "loss": 0.5184, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.59685569904548, | |
| "grad_norm": 0.19026771187782288, | |
| "learning_rate": 8.49146338122419e-05, | |
| "loss": 0.493, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.5991016282987087, | |
| "grad_norm": 0.1765296906232834, | |
| "learning_rate": 8.47057473706444e-05, | |
| "loss": 0.4921, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.6013475575519371, | |
| "grad_norm": 0.18513350188732147, | |
| "learning_rate": 8.449682873006862e-05, | |
| "loss": 0.5043, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.6035934868051656, | |
| "grad_norm": 0.1919069141149521, | |
| "learning_rate": 8.4287879320036e-05, | |
| "loss": 0.4893, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.6058394160583942, | |
| "grad_norm": 0.18348896503448486, | |
| "learning_rate": 8.40789005702785e-05, | |
| "loss": 0.5287, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.6080853453116228, | |
| "grad_norm": 0.19792461395263672, | |
| "learning_rate": 8.386989391072892e-05, | |
| "loss": 0.518, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.6103312745648513, | |
| "grad_norm": 0.2027343064546585, | |
| "learning_rate": 8.366086077151091e-05, | |
| "loss": 0.5109, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.6125772038180797, | |
| "grad_norm": 0.2016996443271637, | |
| "learning_rate": 8.34518025829294e-05, | |
| "loss": 0.5169, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.614823133071308, | |
| "grad_norm": 0.20013925433158875, | |
| "learning_rate": 8.324272077546064e-05, | |
| "loss": 0.4997, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.6170690623245367, | |
| "grad_norm": 0.18940746784210205, | |
| "learning_rate": 8.30336167797426e-05, | |
| "loss": 0.4962, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.6193149915777654, | |
| "grad_norm": 0.20259737968444824, | |
| "learning_rate": 8.282449202656496e-05, | |
| "loss": 0.524, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.6215609208309938, | |
| "grad_norm": 0.22202381491661072, | |
| "learning_rate": 8.261534794685952e-05, | |
| "loss": 0.4966, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.6238068500842222, | |
| "grad_norm": 0.19881968200206757, | |
| "learning_rate": 8.240618597169029e-05, | |
| "loss": 0.5065, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.6260527793374508, | |
| "grad_norm": 0.1963961273431778, | |
| "learning_rate": 8.219700753224371e-05, | |
| "loss": 0.5027, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.6282987085906795, | |
| "grad_norm": 0.20289023220539093, | |
| "learning_rate": 8.198781405981888e-05, | |
| "loss": 0.5123, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.630544637843908, | |
| "grad_norm": 0.20166555047035217, | |
| "learning_rate": 8.177860698581778e-05, | |
| "loss": 0.4844, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.6327905670971363, | |
| "grad_norm": 0.21527273952960968, | |
| "learning_rate": 8.156938774173548e-05, | |
| "loss": 0.4884, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.635036496350365, | |
| "grad_norm": 0.19657008349895477, | |
| "learning_rate": 8.136015775915025e-05, | |
| "loss": 0.5046, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.6372824256035936, | |
| "grad_norm": 0.1984531283378601, | |
| "learning_rate": 8.11509184697139e-05, | |
| "loss": 0.5075, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.639528354856822, | |
| "grad_norm": 0.18290367722511292, | |
| "learning_rate": 8.094167130514195e-05, | |
| "loss": 0.5094, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.6417742841100504, | |
| "grad_norm": 0.18201418220996857, | |
| "learning_rate": 8.073241769720371e-05, | |
| "loss": 0.4916, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.644020213363279, | |
| "grad_norm": 0.17987395823001862, | |
| "learning_rate": 8.052315907771262e-05, | |
| "loss": 0.5107, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6462661426165077, | |
| "grad_norm": 0.17415151000022888, | |
| "learning_rate": 8.031389687851647e-05, | |
| "loss": 0.4787, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.6485120718697361, | |
| "grad_norm": 0.18529638648033142, | |
| "learning_rate": 8.010463253148746e-05, | |
| "loss": 0.4942, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6507580011229646, | |
| "grad_norm": 0.18021097779273987, | |
| "learning_rate": 7.989536746851255e-05, | |
| "loss": 0.5244, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.6530039303761932, | |
| "grad_norm": 0.18884895741939545, | |
| "learning_rate": 7.968610312148354e-05, | |
| "loss": 0.5067, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6552498596294218, | |
| "grad_norm": 0.17446008324623108, | |
| "learning_rate": 7.94768409222874e-05, | |
| "loss": 0.4919, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.6574957888826503, | |
| "grad_norm": 0.16754934191703796, | |
| "learning_rate": 7.926758230279634e-05, | |
| "loss": 0.504, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.6597417181358787, | |
| "grad_norm": 0.17202447354793549, | |
| "learning_rate": 7.905832869485808e-05, | |
| "loss": 0.5118, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.661987647389107, | |
| "grad_norm": 0.17612679302692413, | |
| "learning_rate": 7.88490815302861e-05, | |
| "loss": 0.4997, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6642335766423357, | |
| "grad_norm": 0.1580231636762619, | |
| "learning_rate": 7.863984224084977e-05, | |
| "loss": 0.477, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.6664795058955644, | |
| "grad_norm": 0.1829080730676651, | |
| "learning_rate": 7.843061225826455e-05, | |
| "loss": 0.5091, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6687254351487928, | |
| "grad_norm": 0.17909185588359833, | |
| "learning_rate": 7.822139301418226e-05, | |
| "loss": 0.5197, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.6709713644020212, | |
| "grad_norm": 0.18631631135940552, | |
| "learning_rate": 7.801218594018115e-05, | |
| "loss": 0.5069, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6732172936552498, | |
| "grad_norm": 0.17326535284519196, | |
| "learning_rate": 7.78029924677563e-05, | |
| "loss": 0.5088, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.6754632229084785, | |
| "grad_norm": 0.20143157243728638, | |
| "learning_rate": 7.759381402830973e-05, | |
| "loss": 0.528, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.677709152161707, | |
| "grad_norm": 0.1783144623041153, | |
| "learning_rate": 7.738465205314048e-05, | |
| "loss": 0.4956, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.6799550814149353, | |
| "grad_norm": 0.19444549083709717, | |
| "learning_rate": 7.717550797343506e-05, | |
| "loss": 0.4859, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.682201010668164, | |
| "grad_norm": 0.18391017615795135, | |
| "learning_rate": 7.696638322025744e-05, | |
| "loss": 0.5036, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.6844469399213926, | |
| "grad_norm": 0.2030087262392044, | |
| "learning_rate": 7.675727922453939e-05, | |
| "loss": 0.5032, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.686692869174621, | |
| "grad_norm": 0.17419691383838654, | |
| "learning_rate": 7.654819741707065e-05, | |
| "loss": 0.5055, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.6889387984278494, | |
| "grad_norm": 0.1854201853275299, | |
| "learning_rate": 7.633913922848912e-05, | |
| "loss": 0.5, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.691184727681078, | |
| "grad_norm": 0.19161422550678253, | |
| "learning_rate": 7.613010608927113e-05, | |
| "loss": 0.4888, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.6934306569343067, | |
| "grad_norm": 0.1729954481124878, | |
| "learning_rate": 7.592109942972152e-05, | |
| "loss": 0.5028, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6956765861875351, | |
| "grad_norm": 0.19286830723285675, | |
| "learning_rate": 7.571212067996402e-05, | |
| "loss": 0.5133, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.6979225154407636, | |
| "grad_norm": 0.17671585083007812, | |
| "learning_rate": 7.550317126993141e-05, | |
| "loss": 0.5035, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.700168444693992, | |
| "grad_norm": 0.1909675896167755, | |
| "learning_rate": 7.529425262935561e-05, | |
| "loss": 0.5147, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.7024143739472206, | |
| "grad_norm": 0.1676298975944519, | |
| "learning_rate": 7.508536618775814e-05, | |
| "loss": 0.488, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.7046603032004493, | |
| "grad_norm": 0.1871660202741623, | |
| "learning_rate": 7.487651337444005e-05, | |
| "loss": 0.4986, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.7069062324536777, | |
| "grad_norm": 0.17889705300331116, | |
| "learning_rate": 7.466769561847239e-05, | |
| "loss": 0.5103, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.709152161706906, | |
| "grad_norm": 0.18187767267227173, | |
| "learning_rate": 7.445891434868628e-05, | |
| "loss": 0.477, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.7113980909601347, | |
| "grad_norm": 0.17818237841129303, | |
| "learning_rate": 7.425017099366326e-05, | |
| "loss": 0.5143, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.7136440202133634, | |
| "grad_norm": 0.1854383796453476, | |
| "learning_rate": 7.404146698172536e-05, | |
| "loss": 0.5286, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.7158899494665918, | |
| "grad_norm": 0.1802191585302353, | |
| "learning_rate": 7.383280374092538e-05, | |
| "loss": 0.493, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.7181358787198202, | |
| "grad_norm": 0.17232070863246918, | |
| "learning_rate": 7.362418269903728e-05, | |
| "loss": 0.5124, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.7203818079730488, | |
| "grad_norm": 0.2103428691625595, | |
| "learning_rate": 7.34156052835461e-05, | |
| "loss": 0.5372, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.7226277372262775, | |
| "grad_norm": 0.1758391559123993, | |
| "learning_rate": 7.320707292163853e-05, | |
| "loss": 0.5019, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.724873666479506, | |
| "grad_norm": 0.19223737716674805, | |
| "learning_rate": 7.299858704019291e-05, | |
| "loss": 0.4956, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.7271195957327343, | |
| "grad_norm": 0.17237992584705353, | |
| "learning_rate": 7.279014906576949e-05, | |
| "loss": 0.4991, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.729365524985963, | |
| "grad_norm": 0.17996814846992493, | |
| "learning_rate": 7.258176042460077e-05, | |
| "loss": 0.4882, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.7316114542391916, | |
| "grad_norm": 0.17651812732219696, | |
| "learning_rate": 7.237342254258173e-05, | |
| "loss": 0.5167, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.73385738349242, | |
| "grad_norm": 0.19715122878551483, | |
| "learning_rate": 7.216513684525992e-05, | |
| "loss": 0.516, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.7361033127456484, | |
| "grad_norm": 0.16534049808979034, | |
| "learning_rate": 7.195690475782596e-05, | |
| "loss": 0.5241, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.738349241998877, | |
| "grad_norm": 0.20934666693210602, | |
| "learning_rate": 7.174872770510348e-05, | |
| "loss": 0.4848, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.7405951712521057, | |
| "grad_norm": 0.17493613064289093, | |
| "learning_rate": 7.15406071115397e-05, | |
| "loss": 0.509, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.7428411005053341, | |
| "grad_norm": 0.19224363565444946, | |
| "learning_rate": 7.133254440119538e-05, | |
| "loss": 0.5166, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.7450870297585626, | |
| "grad_norm": 0.17673024535179138, | |
| "learning_rate": 7.11245409977353e-05, | |
| "loss": 0.4919, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.747332959011791, | |
| "grad_norm": 0.17207755148410797, | |
| "learning_rate": 7.091659832441848e-05, | |
| "loss": 0.5325, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.7495788882650196, | |
| "grad_norm": 0.17099009454250336, | |
| "learning_rate": 7.070871780408824e-05, | |
| "loss": 0.4918, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.7518248175182483, | |
| "grad_norm": 0.16904598474502563, | |
| "learning_rate": 7.05009008591627e-05, | |
| "loss": 0.4883, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7540707467714767, | |
| "grad_norm": 0.17518097162246704, | |
| "learning_rate": 7.029314891162504e-05, | |
| "loss": 0.5112, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.756316676024705, | |
| "grad_norm": 0.1848541796207428, | |
| "learning_rate": 7.008546338301358e-05, | |
| "loss": 0.522, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.7585626052779337, | |
| "grad_norm": 0.18024159967899323, | |
| "learning_rate": 6.987784569441228e-05, | |
| "loss": 0.5163, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.7608085345311624, | |
| "grad_norm": 0.16730569303035736, | |
| "learning_rate": 6.967029726644075e-05, | |
| "loss": 0.4693, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7630544637843908, | |
| "grad_norm": 0.18763582408428192, | |
| "learning_rate": 6.946281951924487e-05, | |
| "loss": 0.5143, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.7653003930376192, | |
| "grad_norm": 0.16916576027870178, | |
| "learning_rate": 6.925541387248674e-05, | |
| "loss": 0.5188, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.7675463222908478, | |
| "grad_norm": 0.19620057940483093, | |
| "learning_rate": 6.904808174533521e-05, | |
| "loss": 0.5024, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.7697922515440765, | |
| "grad_norm": 0.16816137731075287, | |
| "learning_rate": 6.884082455645606e-05, | |
| "loss": 0.4878, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.772038180797305, | |
| "grad_norm": 0.1925499141216278, | |
| "learning_rate": 6.863364372400221e-05, | |
| "loss": 0.4922, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.7742841100505333, | |
| "grad_norm": 0.15602745115756989, | |
| "learning_rate": 6.842654066560422e-05, | |
| "loss": 0.4888, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.776530039303762, | |
| "grad_norm": 0.17124199867248535, | |
| "learning_rate": 6.821951679836049e-05, | |
| "loss": 0.4795, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.7787759685569906, | |
| "grad_norm": 0.17022277414798737, | |
| "learning_rate": 6.801257353882746e-05, | |
| "loss": 0.4966, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.781021897810219, | |
| "grad_norm": 0.15725384652614594, | |
| "learning_rate": 6.78057123030101e-05, | |
| "loss": 0.4905, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.7832678270634474, | |
| "grad_norm": 0.17000839114189148, | |
| "learning_rate": 6.759893450635213e-05, | |
| "loss": 0.498, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7855137563166759, | |
| "grad_norm": 0.15647220611572266, | |
| "learning_rate": 6.739224156372625e-05, | |
| "loss": 0.4948, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.7877596855699045, | |
| "grad_norm": 0.17224030196666718, | |
| "learning_rate": 6.718563488942463e-05, | |
| "loss": 0.4995, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7900056148231331, | |
| "grad_norm": 0.17135286331176758, | |
| "learning_rate": 6.697911589714917e-05, | |
| "loss": 0.5028, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.7922515440763616, | |
| "grad_norm": 0.1629776656627655, | |
| "learning_rate": 6.677268600000172e-05, | |
| "loss": 0.5004, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.79449747332959, | |
| "grad_norm": 0.19575197994709015, | |
| "learning_rate": 6.656634661047461e-05, | |
| "loss": 0.5112, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.7967434025828186, | |
| "grad_norm": 0.15462997555732727, | |
| "learning_rate": 6.636009914044074e-05, | |
| "loss": 0.5036, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.7989893318360473, | |
| "grad_norm": 0.19468659162521362, | |
| "learning_rate": 6.615394500114417e-05, | |
| "loss": 0.5062, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.8012352610892757, | |
| "grad_norm": 0.15850648283958435, | |
| "learning_rate": 6.594788560319025e-05, | |
| "loss": 0.5103, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.803481190342504, | |
| "grad_norm": 0.16901031136512756, | |
| "learning_rate": 6.574192235653619e-05, | |
| "loss": 0.4964, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.8057271195957327, | |
| "grad_norm": 0.16941389441490173, | |
| "learning_rate": 6.553605667048119e-05, | |
| "loss": 0.4956, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.8079730488489614, | |
| "grad_norm": 0.1633678376674652, | |
| "learning_rate": 6.533028995365687e-05, | |
| "loss": 0.4844, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.8102189781021898, | |
| "grad_norm": 0.16450218856334686, | |
| "learning_rate": 6.51246236140177e-05, | |
| "loss": 0.5039, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.8124649073554182, | |
| "grad_norm": 0.1649266928434372, | |
| "learning_rate": 6.49190590588314e-05, | |
| "loss": 0.5237, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.8147108366086468, | |
| "grad_norm": 0.17138883471488953, | |
| "learning_rate": 6.471359769466907e-05, | |
| "loss": 0.5086, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.8169567658618755, | |
| "grad_norm": 0.17378132045269012, | |
| "learning_rate": 6.450824092739589e-05, | |
| "loss": 0.5091, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.819202695115104, | |
| "grad_norm": 0.17285092175006866, | |
| "learning_rate": 6.430299016216119e-05, | |
| "loss": 0.5055, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.8214486243683323, | |
| "grad_norm": 0.1718919575214386, | |
| "learning_rate": 6.409784680338905e-05, | |
| "loss": 0.4842, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.823694553621561, | |
| "grad_norm": 0.16790670156478882, | |
| "learning_rate": 6.389281225476867e-05, | |
| "loss": 0.5004, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.8259404828747896, | |
| "grad_norm": 0.1849760264158249, | |
| "learning_rate": 6.368788791924467e-05, | |
| "loss": 0.4939, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.828186412128018, | |
| "grad_norm": 0.16113969683647156, | |
| "learning_rate": 6.348307519900753e-05, | |
| "loss": 0.5024, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.8304323413812464, | |
| "grad_norm": 0.1709127277135849, | |
| "learning_rate": 6.3278375495484e-05, | |
| "loss": 0.4977, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.8326782706344749, | |
| "grad_norm": 0.1758309006690979, | |
| "learning_rate": 6.307379020932758e-05, | |
| "loss": 0.4689, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.8349241998877035, | |
| "grad_norm": 0.16264449059963226, | |
| "learning_rate": 6.286932074040876e-05, | |
| "loss": 0.4974, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.8371701291409321, | |
| "grad_norm": 0.17811472713947296, | |
| "learning_rate": 6.266496848780567e-05, | |
| "loss": 0.4987, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.8394160583941606, | |
| "grad_norm": 0.17399878799915314, | |
| "learning_rate": 6.246073484979436e-05, | |
| "loss": 0.4867, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.841661987647389, | |
| "grad_norm": 0.17421691119670868, | |
| "learning_rate": 6.225662122383918e-05, | |
| "loss": 0.5162, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.8439079169006176, | |
| "grad_norm": 0.17920304834842682, | |
| "learning_rate": 6.205262900658339e-05, | |
| "loss": 0.5058, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 0.16607870161533356, | |
| "learning_rate": 6.184875959383947e-05, | |
| "loss": 0.5063, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.8483997754070747, | |
| "grad_norm": 0.19281108677387238, | |
| "learning_rate": 6.164501438057965e-05, | |
| "loss": 0.4936, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.850645704660303, | |
| "grad_norm": 0.16037489473819733, | |
| "learning_rate": 6.144139476092631e-05, | |
| "loss": 0.4949, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.8528916339135317, | |
| "grad_norm": 0.19559049606323242, | |
| "learning_rate": 6.123790212814241e-05, | |
| "loss": 0.4981, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.8551375631667604, | |
| "grad_norm": 0.15469707548618317, | |
| "learning_rate": 6.1034537874622085e-05, | |
| "loss": 0.5021, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.8573834924199888, | |
| "grad_norm": 0.18738782405853271, | |
| "learning_rate": 6.0831303391880975e-05, | |
| "loss": 0.4846, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.8596294216732172, | |
| "grad_norm": 0.16658605635166168, | |
| "learning_rate": 6.0628200070546796e-05, | |
| "loss": 0.4945, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.8618753509264458, | |
| "grad_norm": 0.16776609420776367, | |
| "learning_rate": 6.042522930034984e-05, | |
| "loss": 0.4992, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.8641212801796745, | |
| "grad_norm": 0.17124858498573303, | |
| "learning_rate": 6.022239247011331e-05, | |
| "loss": 0.4915, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.866367209432903, | |
| "grad_norm": 0.15521185100078583, | |
| "learning_rate": 6.001969096774399e-05, | |
| "loss": 0.5134, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.8686131386861313, | |
| "grad_norm": 0.1691064089536667, | |
| "learning_rate": 5.981712618022272e-05, | |
| "loss": 0.5018, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.87085906793936, | |
| "grad_norm": 0.15585891902446747, | |
| "learning_rate": 5.96146994935948e-05, | |
| "loss": 0.5071, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.8731049971925884, | |
| "grad_norm": 0.1678674966096878, | |
| "learning_rate": 5.9412412292960656e-05, | |
| "loss": 0.5123, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.875350926445817, | |
| "grad_norm": 0.15515373647212982, | |
| "learning_rate": 5.92102659624662e-05, | |
| "loss": 0.495, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.8775968556990454, | |
| "grad_norm": 0.17066361010074615, | |
| "learning_rate": 5.900826188529351e-05, | |
| "loss": 0.4982, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8798427849522739, | |
| "grad_norm": 0.14784766733646393, | |
| "learning_rate": 5.880640144365124e-05, | |
| "loss": 0.492, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.8820887142055025, | |
| "grad_norm": 0.1624741405248642, | |
| "learning_rate": 5.86046860187653e-05, | |
| "loss": 0.4985, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.8843346434587311, | |
| "grad_norm": 0.14903901517391205, | |
| "learning_rate": 5.840311699086928e-05, | |
| "loss": 0.4887, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.8865805727119596, | |
| "grad_norm": 0.16569632291793823, | |
| "learning_rate": 5.820169573919499e-05, | |
| "loss": 0.5031, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.888826501965188, | |
| "grad_norm": 0.15516996383666992, | |
| "learning_rate": 5.800042364196319e-05, | |
| "loss": 0.4974, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.8910724312184166, | |
| "grad_norm": 0.1705656498670578, | |
| "learning_rate": 5.779930207637401e-05, | |
| "loss": 0.5064, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8933183604716453, | |
| "grad_norm": 0.16612909734249115, | |
| "learning_rate": 5.759833241859755e-05, | |
| "loss": 0.4928, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.8955642897248737, | |
| "grad_norm": 0.15691480040550232, | |
| "learning_rate": 5.7397516043764564e-05, | |
| "loss": 0.4992, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.897810218978102, | |
| "grad_norm": 0.15925776958465576, | |
| "learning_rate": 5.719685432595681e-05, | |
| "loss": 0.503, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.9000561482313307, | |
| "grad_norm": 0.1793777197599411, | |
| "learning_rate": 5.6996348638198e-05, | |
| "loss": 0.5015, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.9023020774845594, | |
| "grad_norm": 0.15224167704582214, | |
| "learning_rate": 5.6796000352444056e-05, | |
| "loss": 0.4791, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.9045480067377878, | |
| "grad_norm": 0.17081177234649658, | |
| "learning_rate": 5.6595810839574e-05, | |
| "loss": 0.4925, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.9067939359910162, | |
| "grad_norm": 0.1512937992811203, | |
| "learning_rate": 5.6395781469380354e-05, | |
| "loss": 0.4901, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.9090398652442448, | |
| "grad_norm": 0.15645167231559753, | |
| "learning_rate": 5.619591361055998e-05, | |
| "loss": 0.5001, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.9112857944974735, | |
| "grad_norm": 0.17164252698421478, | |
| "learning_rate": 5.5996208630704445e-05, | |
| "loss": 0.4956, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.913531723750702, | |
| "grad_norm": 0.15667004883289337, | |
| "learning_rate": 5.579666789629098e-05, | |
| "loss": 0.4906, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.9157776530039303, | |
| "grad_norm": 0.16768649220466614, | |
| "learning_rate": 5.559729277267286e-05, | |
| "loss": 0.5099, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.9180235822571587, | |
| "grad_norm": 0.16727498173713684, | |
| "learning_rate": 5.539808462407026e-05, | |
| "loss": 0.503, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.9202695115103874, | |
| "grad_norm": 0.16755451261997223, | |
| "learning_rate": 5.519904481356076e-05, | |
| "loss": 0.5099, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.922515440763616, | |
| "grad_norm": 0.16387148201465607, | |
| "learning_rate": 5.500017470307007e-05, | |
| "loss": 0.4957, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.9247613700168444, | |
| "grad_norm": 0.15775729715824127, | |
| "learning_rate": 5.480147565336282e-05, | |
| "loss": 0.4976, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.9270072992700729, | |
| "grad_norm": 0.1581815481185913, | |
| "learning_rate": 5.4602949024033116e-05, | |
| "loss": 0.4949, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.9292532285233015, | |
| "grad_norm": 0.15002784132957458, | |
| "learning_rate": 5.4404596173495265e-05, | |
| "loss": 0.5099, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.9314991577765301, | |
| "grad_norm": 0.15235161781311035, | |
| "learning_rate": 5.420641845897455e-05, | |
| "loss": 0.4809, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.9337450870297586, | |
| "grad_norm": 0.16005192697048187, | |
| "learning_rate": 5.4008417236497815e-05, | |
| "loss": 0.493, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.935991016282987, | |
| "grad_norm": 0.15347884595394135, | |
| "learning_rate": 5.381059386088428e-05, | |
| "loss": 0.5071, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.9382369455362156, | |
| "grad_norm": 0.15472036600112915, | |
| "learning_rate": 5.361294968573629e-05, | |
| "loss": 0.4924, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.9404828747894443, | |
| "grad_norm": 0.17055299878120422, | |
| "learning_rate": 5.341548606343001e-05, | |
| "loss": 0.5057, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.9427288040426727, | |
| "grad_norm": 0.15424910187721252, | |
| "learning_rate": 5.321820434510617e-05, | |
| "loss": 0.5041, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.944974733295901, | |
| "grad_norm": 0.15976421535015106, | |
| "learning_rate": 5.302110588066075e-05, | |
| "loss": 0.4742, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.9472206625491297, | |
| "grad_norm": 0.15673977136611938, | |
| "learning_rate": 5.282419201873593e-05, | |
| "loss": 0.49, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.9494665918023584, | |
| "grad_norm": 0.14829935133457184, | |
| "learning_rate": 5.262746410671071e-05, | |
| "loss": 0.5017, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.9517125210555868, | |
| "grad_norm": 0.14897191524505615, | |
| "learning_rate": 5.243092349069169e-05, | |
| "loss": 0.4803, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.9539584503088152, | |
| "grad_norm": 0.15609802305698395, | |
| "learning_rate": 5.223457151550402e-05, | |
| "loss": 0.4961, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.9562043795620438, | |
| "grad_norm": 0.15764057636260986, | |
| "learning_rate": 5.203840952468191e-05, | |
| "loss": 0.5003, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.9584503088152723, | |
| "grad_norm": 0.16121333837509155, | |
| "learning_rate": 5.184243886045971e-05, | |
| "loss": 0.5054, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.960696238068501, | |
| "grad_norm": 0.15507447719573975, | |
| "learning_rate": 5.164666086376262e-05, | |
| "loss": 0.4954, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.9629421673217293, | |
| "grad_norm": 0.16584189236164093, | |
| "learning_rate": 5.145107687419751e-05, | |
| "loss": 0.4924, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.9651880965749577, | |
| "grad_norm": 0.15702944993972778, | |
| "learning_rate": 5.1255688230043766e-05, | |
| "loss": 0.5004, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.9674340258281864, | |
| "grad_norm": 0.17031584680080414, | |
| "learning_rate": 5.106049626824405e-05, | |
| "loss": 0.5139, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.969679955081415, | |
| "grad_norm": 0.16193878650665283, | |
| "learning_rate": 5.0865502324395345e-05, | |
| "loss": 0.4849, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.9719258843346434, | |
| "grad_norm": 0.16150209307670593, | |
| "learning_rate": 5.067070773273962e-05, | |
| "loss": 0.4719, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.9741718135878719, | |
| "grad_norm": 0.1520845890045166, | |
| "learning_rate": 5.047611382615481e-05, | |
| "loss": 0.4995, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.9764177428411005, | |
| "grad_norm": 0.16827571392059326, | |
| "learning_rate": 5.0281721936145713e-05, | |
| "loss": 0.4908, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9786636720943291, | |
| "grad_norm": 0.15770889818668365, | |
| "learning_rate": 5.008753339283471e-05, | |
| "loss": 0.5116, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.9809096013475576, | |
| "grad_norm": 0.1623336225748062, | |
| "learning_rate": 4.98935495249529e-05, | |
| "loss": 0.492, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.983155530600786, | |
| "grad_norm": 0.16279038786888123, | |
| "learning_rate": 4.9699771659830855e-05, | |
| "loss": 0.5021, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.9854014598540146, | |
| "grad_norm": 0.16874343156814575, | |
| "learning_rate": 4.950620112338955e-05, | |
| "loss": 0.4876, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9876473891072433, | |
| "grad_norm": 0.15390436351299286, | |
| "learning_rate": 4.931283924013141e-05, | |
| "loss": 0.4879, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.9898933183604717, | |
| "grad_norm": 0.17372553050518036, | |
| "learning_rate": 4.911968733313101e-05, | |
| "loss": 0.4876, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9921392476137, | |
| "grad_norm": 0.15854312479496002, | |
| "learning_rate": 4.892674672402631e-05, | |
| "loss": 0.5128, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.9943851768669287, | |
| "grad_norm": 0.1635546237230301, | |
| "learning_rate": 4.873401873300934e-05, | |
| "loss": 0.4946, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9966311061201574, | |
| "grad_norm": 0.15970109403133392, | |
| "learning_rate": 4.8541504678817435e-05, | |
| "loss": 0.501, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.9988770353733858, | |
| "grad_norm": 0.1637182980775833, | |
| "learning_rate": 4.834920587872397e-05, | |
| "loss": 0.4807, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.001122964626614, | |
| "grad_norm": 0.17495502531528473, | |
| "learning_rate": 4.815712364852945e-05, | |
| "loss": 0.4725, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 2.0033688938798426, | |
| "grad_norm": 0.20412583649158478, | |
| "learning_rate": 4.7965259302552546e-05, | |
| "loss": 0.4545, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.0056148231330715, | |
| "grad_norm": 0.1694943606853485, | |
| "learning_rate": 4.777361415362106e-05, | |
| "loss": 0.4561, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 2.0078607523863, | |
| "grad_norm": 0.20532694458961487, | |
| "learning_rate": 4.75821895130629e-05, | |
| "loss": 0.4585, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.0101066816395283, | |
| "grad_norm": 0.21771076321601868, | |
| "learning_rate": 4.739098669069723e-05, | |
| "loss": 0.4609, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 2.0123526108927567, | |
| "grad_norm": 0.19157661497592926, | |
| "learning_rate": 4.7200006994825314e-05, | |
| "loss": 0.4533, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.0145985401459856, | |
| "grad_norm": 0.1829356700181961, | |
| "learning_rate": 4.700925173222178e-05, | |
| "loss": 0.4401, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 2.016844469399214, | |
| "grad_norm": 0.1815447062253952, | |
| "learning_rate": 4.681872220812551e-05, | |
| "loss": 0.4497, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.0190903986524424, | |
| "grad_norm": 0.16822156310081482, | |
| "learning_rate": 4.662841972623084e-05, | |
| "loss": 0.4573, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 2.021336327905671, | |
| "grad_norm": 0.18054281175136566, | |
| "learning_rate": 4.643834558867852e-05, | |
| "loss": 0.4589, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.0235822571588993, | |
| "grad_norm": 0.18319673836231232, | |
| "learning_rate": 4.6248501096046827e-05, | |
| "loss": 0.4376, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 2.025828186412128, | |
| "grad_norm": 0.1645708829164505, | |
| "learning_rate": 4.605888754734278e-05, | |
| "loss": 0.4304, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.0280741156653566, | |
| "grad_norm": 0.17893430590629578, | |
| "learning_rate": 4.586950623999314e-05, | |
| "loss": 0.4526, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 2.030320044918585, | |
| "grad_norm": 0.17927826941013336, | |
| "learning_rate": 4.568035846983558e-05, | |
| "loss": 0.4616, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.0325659741718134, | |
| "grad_norm": 0.1680602878332138, | |
| "learning_rate": 4.549144553110974e-05, | |
| "loss": 0.4611, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 2.0348119034250423, | |
| "grad_norm": 0.1612085998058319, | |
| "learning_rate": 4.5302768716448434e-05, | |
| "loss": 0.4567, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.0370578326782707, | |
| "grad_norm": 0.1724167913198471, | |
| "learning_rate": 4.5114329316868875e-05, | |
| "loss": 0.4666, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 2.039303761931499, | |
| "grad_norm": 0.15838028490543365, | |
| "learning_rate": 4.492612862176371e-05, | |
| "loss": 0.4529, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.0415496911847275, | |
| "grad_norm": 0.15649183094501495, | |
| "learning_rate": 4.473816791889228e-05, | |
| "loss": 0.4462, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 2.0437956204379564, | |
| "grad_norm": 0.16123028099536896, | |
| "learning_rate": 4.455044849437182e-05, | |
| "loss": 0.4345, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.046041549691185, | |
| "grad_norm": 0.16162772476673126, | |
| "learning_rate": 4.436297163266853e-05, | |
| "loss": 0.4585, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 2.048287478944413, | |
| "grad_norm": 0.1522200107574463, | |
| "learning_rate": 4.4175738616588894e-05, | |
| "loss": 0.4614, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.0505334081976416, | |
| "grad_norm": 0.16501305997371674, | |
| "learning_rate": 4.398875072727097e-05, | |
| "loss": 0.4486, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 2.0527793374508705, | |
| "grad_norm": 0.15927040576934814, | |
| "learning_rate": 4.380200924417548e-05, | |
| "loss": 0.4574, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.055025266704099, | |
| "grad_norm": 0.1553938090801239, | |
| "learning_rate": 4.361551544507713e-05, | |
| "loss": 0.4446, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.0572711959573273, | |
| "grad_norm": 0.16552136838436127, | |
| "learning_rate": 4.3429270606055895e-05, | |
| "loss": 0.4583, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.0595171252105557, | |
| "grad_norm": 0.1564835011959076, | |
| "learning_rate": 4.3243276001488156e-05, | |
| "loss": 0.4476, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.0617630544637846, | |
| "grad_norm": 0.1577308475971222, | |
| "learning_rate": 4.305753290403809e-05, | |
| "loss": 0.4632, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.064008983717013, | |
| "grad_norm": 0.15984061360359192, | |
| "learning_rate": 4.2872042584649015e-05, | |
| "loss": 0.4624, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.0662549129702414, | |
| "grad_norm": 0.16448809206485748, | |
| "learning_rate": 4.268680631253455e-05, | |
| "loss": 0.4436, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.06850084222347, | |
| "grad_norm": 0.16196516156196594, | |
| "learning_rate": 4.250182535517008e-05, | |
| "loss": 0.4375, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.0707467714766983, | |
| "grad_norm": 0.15193282067775726, | |
| "learning_rate": 4.231710097828388e-05, | |
| "loss": 0.4287, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.072992700729927, | |
| "grad_norm": 0.16018415987491608, | |
| "learning_rate": 4.2132634445848704e-05, | |
| "loss": 0.4543, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.0752386299831556, | |
| "grad_norm": 0.16128796339035034, | |
| "learning_rate": 4.194842702007289e-05, | |
| "loss": 0.4621, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.077484559236384, | |
| "grad_norm": 0.15342706441879272, | |
| "learning_rate": 4.176447996139196e-05, | |
| "loss": 0.4355, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.0797304884896124, | |
| "grad_norm": 0.1577060967683792, | |
| "learning_rate": 4.1580794528459834e-05, | |
| "loss": 0.4521, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.0819764177428413, | |
| "grad_norm": 0.16120131313800812, | |
| "learning_rate": 4.13973719781402e-05, | |
| "loss": 0.4501, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.0842223469960697, | |
| "grad_norm": 0.16163085401058197, | |
| "learning_rate": 4.1214213565498086e-05, | |
| "loss": 0.4518, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.086468276249298, | |
| "grad_norm": 0.1605272889137268, | |
| "learning_rate": 4.10313205437911e-05, | |
| "loss": 0.4334, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.0887142055025265, | |
| "grad_norm": 0.16757291555404663, | |
| "learning_rate": 4.084869416446095e-05, | |
| "loss": 0.4579, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.0909601347557554, | |
| "grad_norm": 0.1572689265012741, | |
| "learning_rate": 4.0666335677124816e-05, | |
| "loss": 0.4462, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.093206064008984, | |
| "grad_norm": 0.1841953992843628, | |
| "learning_rate": 4.048424632956681e-05, | |
| "loss": 0.4241, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.095451993262212, | |
| "grad_norm": 0.1640552282333374, | |
| "learning_rate": 4.030242736772952e-05, | |
| "loss": 0.4495, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.0976979225154406, | |
| "grad_norm": 0.15904076397418976, | |
| "learning_rate": 4.0120880035705416e-05, | |
| "loss": 0.4513, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.0999438517686695, | |
| "grad_norm": 0.17605750262737274, | |
| "learning_rate": 3.9939605575728315e-05, | |
| "loss": 0.4444, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.102189781021898, | |
| "grad_norm": 0.15149734914302826, | |
| "learning_rate": 3.975860522816497e-05, | |
| "loss": 0.4423, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.1044357102751263, | |
| "grad_norm": 0.15931731462478638, | |
| "learning_rate": 3.957788023150647e-05, | |
| "loss": 0.4558, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.1066816395283547, | |
| "grad_norm": 0.1513037383556366, | |
| "learning_rate": 3.939743182235978e-05, | |
| "loss": 0.4451, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.108927568781583, | |
| "grad_norm": 0.1563446819782257, | |
| "learning_rate": 3.921726123543942e-05, | |
| "loss": 0.4438, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.111173498034812, | |
| "grad_norm": 0.14871710538864136, | |
| "learning_rate": 3.9037369703558876e-05, | |
| "loss": 0.449, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.1134194272880404, | |
| "grad_norm": 0.14909642934799194, | |
| "learning_rate": 3.8857758457622246e-05, | |
| "loss": 0.4643, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.115665356541269, | |
| "grad_norm": 0.15018634498119354, | |
| "learning_rate": 3.867842872661565e-05, | |
| "loss": 0.4483, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.1179112857944973, | |
| "grad_norm": 0.16879071295261383, | |
| "learning_rate": 3.8499381737599124e-05, | |
| "loss": 0.4726, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.120157215047726, | |
| "grad_norm": 0.1683071106672287, | |
| "learning_rate": 3.832061871569787e-05, | |
| "loss": 0.4499, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.1224031443009546, | |
| "grad_norm": 0.15678516030311584, | |
| "learning_rate": 3.814214088409419e-05, | |
| "loss": 0.4484, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.124649073554183, | |
| "grad_norm": 0.1773703545331955, | |
| "learning_rate": 3.7963949464018945e-05, | |
| "loss": 0.4605, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.1268950028074114, | |
| "grad_norm": 0.1767614483833313, | |
| "learning_rate": 3.778604567474314e-05, | |
| "loss": 0.4574, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.1291409320606403, | |
| "grad_norm": 0.15908308327198029, | |
| "learning_rate": 3.760843073356981e-05, | |
| "loss": 0.4357, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.1313868613138687, | |
| "grad_norm": 0.1637633740901947, | |
| "learning_rate": 3.743110585582549e-05, | |
| "loss": 0.4566, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.133632790567097, | |
| "grad_norm": 0.1657618135213852, | |
| "learning_rate": 3.725407225485191e-05, | |
| "loss": 0.4497, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.1358787198203255, | |
| "grad_norm": 0.15281249582767487, | |
| "learning_rate": 3.707733114199783e-05, | |
| "loss": 0.4494, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.1381246490735544, | |
| "grad_norm": 0.16828225553035736, | |
| "learning_rate": 3.690088372661061e-05, | |
| "loss": 0.4412, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.140370578326783, | |
| "grad_norm": 0.16215671598911285, | |
| "learning_rate": 3.672473121602801e-05, | |
| "loss": 0.449, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.142616507580011, | |
| "grad_norm": 0.14198768138885498, | |
| "learning_rate": 3.654887481556993e-05, | |
| "loss": 0.4556, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.1448624368332396, | |
| "grad_norm": 0.1703426092863083, | |
| "learning_rate": 3.6373315728530145e-05, | |
| "loss": 0.4456, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.147108366086468, | |
| "grad_norm": 0.15878015756607056, | |
| "learning_rate": 3.6198055156168025e-05, | |
| "loss": 0.4593, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.149354295339697, | |
| "grad_norm": 0.15779636800289154, | |
| "learning_rate": 3.602309429770034e-05, | |
| "loss": 0.4543, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.1516002245929253, | |
| "grad_norm": 0.15963739156723022, | |
| "learning_rate": 3.584843435029316e-05, | |
| "loss": 0.4363, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 0.15662063658237457, | |
| "learning_rate": 3.567407650905353e-05, | |
| "loss": 0.458, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.156092083099382, | |
| "grad_norm": 0.14531637728214264, | |
| "learning_rate": 3.5500021967021344e-05, | |
| "loss": 0.4474, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.158338012352611, | |
| "grad_norm": 0.15317556262016296, | |
| "learning_rate": 3.5326271915161205e-05, | |
| "loss": 0.439, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.1605839416058394, | |
| "grad_norm": 0.15082910656929016, | |
| "learning_rate": 3.515282754235419e-05, | |
| "loss": 0.4497, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.162829870859068, | |
| "grad_norm": 0.14299066364765167, | |
| "learning_rate": 3.4979690035389774e-05, | |
| "loss": 0.4468, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.1650758001122963, | |
| "grad_norm": 0.1458815485239029, | |
| "learning_rate": 3.480686057895778e-05, | |
| "loss": 0.453, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.167321729365525, | |
| "grad_norm": 0.1518121361732483, | |
| "learning_rate": 3.4634340355640136e-05, | |
| "loss": 0.4393, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.1695676586187536, | |
| "grad_norm": 0.14630930125713348, | |
| "learning_rate": 3.446213054590291e-05, | |
| "loss": 0.4617, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.171813587871982, | |
| "grad_norm": 0.15554536879062653, | |
| "learning_rate": 3.4290232328088136e-05, | |
| "loss": 0.4555, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.1740595171252104, | |
| "grad_norm": 0.1420973539352417, | |
| "learning_rate": 3.4118646878405755e-05, | |
| "loss": 0.4575, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.1763054463784393, | |
| "grad_norm": 0.15307992696762085, | |
| "learning_rate": 3.394737537092562e-05, | |
| "loss": 0.466, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.1785513756316677, | |
| "grad_norm": 0.14762836694717407, | |
| "learning_rate": 3.377641897756947e-05, | |
| "loss": 0.4653, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.180797304884896, | |
| "grad_norm": 0.14197176694869995, | |
| "learning_rate": 3.360577886810286e-05, | |
| "loss": 0.4534, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.1830432341381245, | |
| "grad_norm": 0.14574755728244781, | |
| "learning_rate": 3.343545621012721e-05, | |
| "loss": 0.4436, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.1852891633913534, | |
| "grad_norm": 0.1501995027065277, | |
| "learning_rate": 3.326545216907171e-05, | |
| "loss": 0.4551, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.187535092644582, | |
| "grad_norm": 0.15226097404956818, | |
| "learning_rate": 3.309576790818551e-05, | |
| "loss": 0.4458, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.18978102189781, | |
| "grad_norm": 0.14684434235095978, | |
| "learning_rate": 3.292640458852958e-05, | |
| "loss": 0.4494, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.1920269511510386, | |
| "grad_norm": 0.14523442089557648, | |
| "learning_rate": 3.275736336896893e-05, | |
| "loss": 0.4445, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1942728804042675, | |
| "grad_norm": 0.1518959403038025, | |
| "learning_rate": 3.25886454061646e-05, | |
| "loss": 0.4649, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.196518809657496, | |
| "grad_norm": 0.1398971676826477, | |
| "learning_rate": 3.2420251854565704e-05, | |
| "loss": 0.4563, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.1987647389107243, | |
| "grad_norm": 0.13926076889038086, | |
| "learning_rate": 3.22521838664016e-05, | |
| "loss": 0.4479, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.2010106681639527, | |
| "grad_norm": 0.14644260704517365, | |
| "learning_rate": 3.2084442591674024e-05, | |
| "loss": 0.4349, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.203256597417181, | |
| "grad_norm": 0.14670224487781525, | |
| "learning_rate": 3.191702917814916e-05, | |
| "loss": 0.4532, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.20550252667041, | |
| "grad_norm": 0.13720498979091644, | |
| "learning_rate": 3.174994477134978e-05, | |
| "loss": 0.431, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.2077484559236384, | |
| "grad_norm": 0.13734634220600128, | |
| "learning_rate": 3.158319051454743e-05, | |
| "loss": 0.437, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.209994385176867, | |
| "grad_norm": 0.14033032953739166, | |
| "learning_rate": 3.141676754875465e-05, | |
| "loss": 0.4487, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.2122403144300953, | |
| "grad_norm": 0.1471083164215088, | |
| "learning_rate": 3.1250677012717135e-05, | |
| "loss": 0.4544, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.214486243683324, | |
| "grad_norm": 0.13971002399921417, | |
| "learning_rate": 3.10849200429059e-05, | |
| "loss": 0.4535, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.2167321729365526, | |
| "grad_norm": 0.1465609073638916, | |
| "learning_rate": 3.091949777350958e-05, | |
| "loss": 0.4482, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.218978102189781, | |
| "grad_norm": 0.14760175347328186, | |
| "learning_rate": 3.075441133642659e-05, | |
| "loss": 0.4461, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.2212240314430094, | |
| "grad_norm": 0.1456819474697113, | |
| "learning_rate": 3.05896618612574e-05, | |
| "loss": 0.4468, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.2234699606962383, | |
| "grad_norm": 0.14734943211078644, | |
| "learning_rate": 3.0425250475296883e-05, | |
| "loss": 0.433, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.2257158899494667, | |
| "grad_norm": 0.13213606178760529, | |
| "learning_rate": 3.0261178303526536e-05, | |
| "loss": 0.4395, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.227961819202695, | |
| "grad_norm": 0.14420166611671448, | |
| "learning_rate": 3.0097446468606785e-05, | |
| "loss": 0.4391, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.2302077484559235, | |
| "grad_norm": 0.14115062355995178, | |
| "learning_rate": 2.9934056090869242e-05, | |
| "loss": 0.4371, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.2324536777091524, | |
| "grad_norm": 0.14169073104858398, | |
| "learning_rate": 2.9771008288309224e-05, | |
| "loss": 0.4334, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.234699606962381, | |
| "grad_norm": 0.14184604585170746, | |
| "learning_rate": 2.9608304176577872e-05, | |
| "loss": 0.4442, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.236945536215609, | |
| "grad_norm": 0.14200329780578613, | |
| "learning_rate": 2.9445944868974688e-05, | |
| "loss": 0.465, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.2391914654688376, | |
| "grad_norm": 0.14416737854480743, | |
| "learning_rate": 2.9283931476439886e-05, | |
| "loss": 0.4423, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.241437394722066, | |
| "grad_norm": 0.14188611507415771, | |
| "learning_rate": 2.9122265107546677e-05, | |
| "loss": 0.4647, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.243683323975295, | |
| "grad_norm": 0.14122439920902252, | |
| "learning_rate": 2.8960946868493843e-05, | |
| "loss": 0.4317, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.2459292532285233, | |
| "grad_norm": 0.14019352197647095, | |
| "learning_rate": 2.87999778630981e-05, | |
| "loss": 0.4415, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.2481751824817517, | |
| "grad_norm": 0.1378793567419052, | |
| "learning_rate": 2.863935919278645e-05, | |
| "loss": 0.4537, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.25042111173498, | |
| "grad_norm": 0.14002038538455963, | |
| "learning_rate": 2.847909195658886e-05, | |
| "loss": 0.4427, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.252667040988209, | |
| "grad_norm": 0.1482112854719162, | |
| "learning_rate": 2.8319177251130495e-05, | |
| "loss": 0.4465, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.2549129702414374, | |
| "grad_norm": 0.1393243372440338, | |
| "learning_rate": 2.815961617062442e-05, | |
| "loss": 0.4405, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.257158899494666, | |
| "grad_norm": 0.14361439645290375, | |
| "learning_rate": 2.8000409806864007e-05, | |
| "loss": 0.4672, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.2594048287478943, | |
| "grad_norm": 0.13548092544078827, | |
| "learning_rate": 2.7841559249215503e-05, | |
| "loss": 0.4557, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.261650758001123, | |
| "grad_norm": 0.13999567925930023, | |
| "learning_rate": 2.768306558461051e-05, | |
| "loss": 0.4577, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.2638966872543516, | |
| "grad_norm": 0.14704839885234833, | |
| "learning_rate": 2.75249298975386e-05, | |
| "loss": 0.4556, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.26614261650758, | |
| "grad_norm": 0.1454869657754898, | |
| "learning_rate": 2.7367153270039934e-05, | |
| "loss": 0.4656, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.2683885457608084, | |
| "grad_norm": 0.14805535972118378, | |
| "learning_rate": 2.720973678169781e-05, | |
| "loss": 0.4463, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2706344750140373, | |
| "grad_norm": 0.14422546327114105, | |
| "learning_rate": 2.705268150963125e-05, | |
| "loss": 0.4463, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.2728804042672657, | |
| "grad_norm": 0.14471085369586945, | |
| "learning_rate": 2.6895988528487724e-05, | |
| "loss": 0.4499, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.275126333520494, | |
| "grad_norm": 0.14727704226970673, | |
| "learning_rate": 2.6739658910435663e-05, | |
| "loss": 0.4498, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.2773722627737225, | |
| "grad_norm": 0.13678747415542603, | |
| "learning_rate": 2.6583693725157176e-05, | |
| "loss": 0.4396, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.279618192026951, | |
| "grad_norm": 0.14493557810783386, | |
| "learning_rate": 2.6428094039840827e-05, | |
| "loss": 0.4493, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.28186412128018, | |
| "grad_norm": 0.14464671909809113, | |
| "learning_rate": 2.6272860919174223e-05, | |
| "loss": 0.4586, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.284110050533408, | |
| "grad_norm": 0.13754825294017792, | |
| "learning_rate": 2.6117995425336774e-05, | |
| "loss": 0.4587, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.2863559797866366, | |
| "grad_norm": 0.14128117263317108, | |
| "learning_rate": 2.596349861799235e-05, | |
| "loss": 0.4578, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.2886019090398655, | |
| "grad_norm": 0.14357365667819977, | |
| "learning_rate": 2.5809371554282177e-05, | |
| "loss": 0.4492, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.290847838293094, | |
| "grad_norm": 0.1328091323375702, | |
| "learning_rate": 2.565561528881744e-05, | |
| "loss": 0.4526, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.2930937675463223, | |
| "grad_norm": 0.13385091722011566, | |
| "learning_rate": 2.5502230873672177e-05, | |
| "loss": 0.4692, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.2953396967995507, | |
| "grad_norm": 0.13780003786087036, | |
| "learning_rate": 2.5349219358376082e-05, | |
| "loss": 0.4652, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.297585626052779, | |
| "grad_norm": 0.1325894445180893, | |
| "learning_rate": 2.519658178990727e-05, | |
| "loss": 0.4384, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.299831555306008, | |
| "grad_norm": 0.13235574960708618, | |
| "learning_rate": 2.5044319212685066e-05, | |
| "loss": 0.454, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.3020774845592364, | |
| "grad_norm": 0.13442382216453552, | |
| "learning_rate": 2.4892432668563017e-05, | |
| "loss": 0.4449, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.304323413812465, | |
| "grad_norm": 0.1442955881357193, | |
| "learning_rate": 2.4740923196821653e-05, | |
| "loss": 0.4764, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.3065693430656933, | |
| "grad_norm": 0.13242414593696594, | |
| "learning_rate": 2.4589791834161324e-05, | |
| "loss": 0.44, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.308815272318922, | |
| "grad_norm": 0.1390787959098816, | |
| "learning_rate": 2.443903961469528e-05, | |
| "loss": 0.4671, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.3110612015721506, | |
| "grad_norm": 0.14238110184669495, | |
| "learning_rate": 2.4288667569942402e-05, | |
| "loss": 0.4375, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.313307130825379, | |
| "grad_norm": 0.14821192622184753, | |
| "learning_rate": 2.4138676728820274e-05, | |
| "loss": 0.4575, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.3155530600786074, | |
| "grad_norm": 0.1424325704574585, | |
| "learning_rate": 2.3989068117638114e-05, | |
| "loss": 0.4418, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.317798989331836, | |
| "grad_norm": 0.1394152194261551, | |
| "learning_rate": 2.383984276008975e-05, | |
| "loss": 0.4298, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.3200449185850647, | |
| "grad_norm": 0.1432042270898819, | |
| "learning_rate": 2.3691001677246552e-05, | |
| "loss": 0.4409, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.322290847838293, | |
| "grad_norm": 0.14173389971256256, | |
| "learning_rate": 2.354254588755051e-05, | |
| "loss": 0.4557, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.3245367770915215, | |
| "grad_norm": 0.1387631595134735, | |
| "learning_rate": 2.339447640680728e-05, | |
| "loss": 0.4562, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.3267827063447504, | |
| "grad_norm": 0.14601486921310425, | |
| "learning_rate": 2.3246794248179203e-05, | |
| "loss": 0.4496, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.329028635597979, | |
| "grad_norm": 0.13562379777431488, | |
| "learning_rate": 2.309950042217838e-05, | |
| "loss": 0.4385, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.331274564851207, | |
| "grad_norm": 0.14119566977024078, | |
| "learning_rate": 2.2952595936659757e-05, | |
| "loss": 0.4468, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.3335204941044356, | |
| "grad_norm": 0.13435381650924683, | |
| "learning_rate": 2.2806081796814193e-05, | |
| "loss": 0.4479, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.335766423357664, | |
| "grad_norm": 0.14311861991882324, | |
| "learning_rate": 2.2659959005161617e-05, | |
| "loss": 0.4466, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.338012352610893, | |
| "grad_norm": 0.13565625250339508, | |
| "learning_rate": 2.25142285615442e-05, | |
| "loss": 0.4656, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.3402582818641213, | |
| "grad_norm": 0.1413930356502533, | |
| "learning_rate": 2.2368891463119473e-05, | |
| "loss": 0.4426, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.3425042111173497, | |
| "grad_norm": 0.14812184870243073, | |
| "learning_rate": 2.222394870435352e-05, | |
| "loss": 0.4617, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.344750140370578, | |
| "grad_norm": 0.1381373107433319, | |
| "learning_rate": 2.2079401277014102e-05, | |
| "loss": 0.4506, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.346996069623807, | |
| "grad_norm": 0.1399037092924118, | |
| "learning_rate": 2.193525017016402e-05, | |
| "loss": 0.4427, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.3492419988770354, | |
| "grad_norm": 0.14365847408771515, | |
| "learning_rate": 2.1791496370154173e-05, | |
| "loss": 0.4575, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.351487928130264, | |
| "grad_norm": 0.13773076236248016, | |
| "learning_rate": 2.1648140860616974e-05, | |
| "loss": 0.4501, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.3537338573834923, | |
| "grad_norm": 0.13768814504146576, | |
| "learning_rate": 2.1505184622459517e-05, | |
| "loss": 0.4754, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.3559797866367207, | |
| "grad_norm": 0.13707469403743744, | |
| "learning_rate": 2.1362628633856836e-05, | |
| "loss": 0.4243, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.3582257158899496, | |
| "grad_norm": 0.1411537230014801, | |
| "learning_rate": 2.1220473870245347e-05, | |
| "loss": 0.463, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.360471645143178, | |
| "grad_norm": 0.1276266723871231, | |
| "learning_rate": 2.1078721304316064e-05, | |
| "loss": 0.4492, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.3627175743964064, | |
| "grad_norm": 0.13482601940631866, | |
| "learning_rate": 2.093737190600793e-05, | |
| "loss": 0.451, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.3649635036496353, | |
| "grad_norm": 0.13639169931411743, | |
| "learning_rate": 2.0796426642501305e-05, | |
| "loss": 0.4458, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.3672094329028637, | |
| "grad_norm": 0.128794863820076, | |
| "learning_rate": 2.065588647821116e-05, | |
| "loss": 0.452, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.369455362156092, | |
| "grad_norm": 0.13202716410160065, | |
| "learning_rate": 2.0515752374780664e-05, | |
| "loss": 0.4405, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.3717012914093205, | |
| "grad_norm": 0.15147733688354492, | |
| "learning_rate": 2.03760252910745e-05, | |
| "loss": 0.451, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.373947220662549, | |
| "grad_norm": 0.13587650656700134, | |
| "learning_rate": 2.023670618317235e-05, | |
| "loss": 0.4373, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.376193149915778, | |
| "grad_norm": 0.1358175277709961, | |
| "learning_rate": 2.009779600436228e-05, | |
| "loss": 0.4628, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.378439079169006, | |
| "grad_norm": 0.13308054208755493, | |
| "learning_rate": 1.995929570513427e-05, | |
| "loss": 0.4517, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.3806850084222346, | |
| "grad_norm": 0.14447179436683655, | |
| "learning_rate": 1.9821206233173756e-05, | |
| "loss": 0.464, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.382930937675463, | |
| "grad_norm": 0.1535249650478363, | |
| "learning_rate": 1.9683528533355077e-05, | |
| "loss": 0.4783, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.385176866928692, | |
| "grad_norm": 0.13172586262226105, | |
| "learning_rate": 1.9546263547735006e-05, | |
| "loss": 0.4451, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.3874227961819203, | |
| "grad_norm": 0.13454264402389526, | |
| "learning_rate": 1.9409412215546385e-05, | |
| "loss": 0.4326, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.3896687254351487, | |
| "grad_norm": 0.13548077642917633, | |
| "learning_rate": 1.9272975473191566e-05, | |
| "loss": 0.4725, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.391914654688377, | |
| "grad_norm": 0.1396332085132599, | |
| "learning_rate": 1.91369542542361e-05, | |
| "loss": 0.4433, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.394160583941606, | |
| "grad_norm": 0.13676691055297852, | |
| "learning_rate": 1.9001349489402374e-05, | |
| "loss": 0.4533, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.3964065131948344, | |
| "grad_norm": 0.138559028506279, | |
| "learning_rate": 1.886616210656314e-05, | |
| "loss": 0.4546, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.398652442448063, | |
| "grad_norm": 0.14537115395069122, | |
| "learning_rate": 1.873139303073529e-05, | |
| "loss": 0.4505, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.4008983717012913, | |
| "grad_norm": 0.14567793905735016, | |
| "learning_rate": 1.859704318407336e-05, | |
| "loss": 0.4494, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.40314430095452, | |
| "grad_norm": 0.16292881965637207, | |
| "learning_rate": 1.8463113485863423e-05, | |
| "loss": 0.4493, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.4053902302077486, | |
| "grad_norm": 0.1402868777513504, | |
| "learning_rate": 1.832960485251661e-05, | |
| "loss": 0.4546, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.407636159460977, | |
| "grad_norm": 0.13375958800315857, | |
| "learning_rate": 1.819651819756297e-05, | |
| "loss": 0.4469, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.4098820887142054, | |
| "grad_norm": 0.14132662117481232, | |
| "learning_rate": 1.80638544316452e-05, | |
| "loss": 0.4505, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.412128017967434, | |
| "grad_norm": 0.13755889236927032, | |
| "learning_rate": 1.7931614462512293e-05, | |
| "loss": 0.4704, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.4143739472206627, | |
| "grad_norm": 0.13184499740600586, | |
| "learning_rate": 1.7799799195013526e-05, | |
| "loss": 0.4369, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.416619876473891, | |
| "grad_norm": 0.13104869425296783, | |
| "learning_rate": 1.7668409531092097e-05, | |
| "loss": 0.4521, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.4188658057271195, | |
| "grad_norm": 0.135769784450531, | |
| "learning_rate": 1.7537446369779072e-05, | |
| "loss": 0.4674, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.421111734980348, | |
| "grad_norm": 0.13897131383419037, | |
| "learning_rate": 1.740691060718712e-05, | |
| "loss": 0.4401, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.423357664233577, | |
| "grad_norm": 0.12773634493350983, | |
| "learning_rate": 1.72768031365045e-05, | |
| "loss": 0.4339, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.425603593486805, | |
| "grad_norm": 0.13083034753799438, | |
| "learning_rate": 1.7147124847988834e-05, | |
| "loss": 0.451, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.4278495227400336, | |
| "grad_norm": 0.13339859247207642, | |
| "learning_rate": 1.7017876628961126e-05, | |
| "loss": 0.4495, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.430095451993262, | |
| "grad_norm": 0.13018065690994263, | |
| "learning_rate": 1.6889059363799623e-05, | |
| "loss": 0.4483, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.432341381246491, | |
| "grad_norm": 0.13034923374652863, | |
| "learning_rate": 1.67606739339338e-05, | |
| "loss": 0.4381, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.4345873104997193, | |
| "grad_norm": 0.1323402225971222, | |
| "learning_rate": 1.6632721217838258e-05, | |
| "loss": 0.4414, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.4368332397529477, | |
| "grad_norm": 0.13824905455112457, | |
| "learning_rate": 1.650520209102677e-05, | |
| "loss": 0.4469, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.439079169006176, | |
| "grad_norm": 0.12723715603351593, | |
| "learning_rate": 1.6378117426046332e-05, | |
| "loss": 0.4551, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.441325098259405, | |
| "grad_norm": 0.12957409024238586, | |
| "learning_rate": 1.6251468092471093e-05, | |
| "loss": 0.4435, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.4435710275126334, | |
| "grad_norm": 0.13387183845043182, | |
| "learning_rate": 1.612525495689651e-05, | |
| "loss": 0.4321, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.445816956765862, | |
| "grad_norm": 0.13002759218215942, | |
| "learning_rate": 1.5999478882933325e-05, | |
| "loss": 0.4461, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.4480628860190903, | |
| "grad_norm": 0.13771192729473114, | |
| "learning_rate": 1.5874140731201694e-05, | |
| "loss": 0.4337, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.4503088152723187, | |
| "grad_norm": 0.13762550055980682, | |
| "learning_rate": 1.574924135932529e-05, | |
| "loss": 0.4435, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.4525547445255476, | |
| "grad_norm": 0.13518671691417694, | |
| "learning_rate": 1.5624781621925462e-05, | |
| "loss": 0.4457, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.454800673778776, | |
| "grad_norm": 0.13244876265525818, | |
| "learning_rate": 1.5500762370615392e-05, | |
| "loss": 0.4466, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.4570466030320044, | |
| "grad_norm": 0.1363506317138672, | |
| "learning_rate": 1.5377184453994232e-05, | |
| "loss": 0.4397, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.4592925322852333, | |
| "grad_norm": 0.13642770051956177, | |
| "learning_rate": 1.5254048717641268e-05, | |
| "loss": 0.4525, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 0.129640594124794, | |
| "learning_rate": 1.5131356004110234e-05, | |
| "loss": 0.4743, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.46378439079169, | |
| "grad_norm": 0.12901091575622559, | |
| "learning_rate": 1.500910715292343e-05, | |
| "loss": 0.4579, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.4660303200449185, | |
| "grad_norm": 0.1358920782804489, | |
| "learning_rate": 1.4887303000566103e-05, | |
| "loss": 0.4218, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.468276249298147, | |
| "grad_norm": 0.13251328468322754, | |
| "learning_rate": 1.4765944380480633e-05, | |
| "loss": 0.454, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.470522178551376, | |
| "grad_norm": 0.13257341086864471, | |
| "learning_rate": 1.464503212306081e-05, | |
| "loss": 0.4534, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.472768107804604, | |
| "grad_norm": 0.135364830493927, | |
| "learning_rate": 1.4524567055646261e-05, | |
| "loss": 0.4535, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.4750140370578326, | |
| "grad_norm": 0.13053563237190247, | |
| "learning_rate": 1.4404550002516709e-05, | |
| "loss": 0.469, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.477259966311061, | |
| "grad_norm": 0.12724533677101135, | |
| "learning_rate": 1.4284981784886314e-05, | |
| "loss": 0.4409, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.47950589556429, | |
| "grad_norm": 0.13512974977493286, | |
| "learning_rate": 1.4165863220898132e-05, | |
| "loss": 0.4644, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.4817518248175183, | |
| "grad_norm": 0.1417611837387085, | |
| "learning_rate": 1.404719512561843e-05, | |
| "loss": 0.4507, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.4839977540707467, | |
| "grad_norm": 0.13797731697559357, | |
| "learning_rate": 1.3928978311031194e-05, | |
| "loss": 0.4427, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.486243683323975, | |
| "grad_norm": 0.13513045012950897, | |
| "learning_rate": 1.3811213586032506e-05, | |
| "loss": 0.4495, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.4884896125772036, | |
| "grad_norm": 0.13863462209701538, | |
| "learning_rate": 1.369390175642507e-05, | |
| "loss": 0.4447, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.4907355418304324, | |
| "grad_norm": 0.13138817250728607, | |
| "learning_rate": 1.3577043624912602e-05, | |
| "loss": 0.4433, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.492981471083661, | |
| "grad_norm": 0.13766634464263916, | |
| "learning_rate": 1.3460639991094423e-05, | |
| "loss": 0.4569, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.4952274003368893, | |
| "grad_norm": 0.13439221680164337, | |
| "learning_rate": 1.3344691651459987e-05, | |
| "loss": 0.4527, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.497473329590118, | |
| "grad_norm": 0.12861117720603943, | |
| "learning_rate": 1.3229199399383395e-05, | |
| "loss": 0.4226, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.4997192588433466, | |
| "grad_norm": 0.135506734251976, | |
| "learning_rate": 1.3114164025117968e-05, | |
| "loss": 0.4355, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.501965188096575, | |
| "grad_norm": 0.13466140627861023, | |
| "learning_rate": 1.299958631579088e-05, | |
| "loss": 0.4613, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.5042111173498034, | |
| "grad_norm": 0.131247416138649, | |
| "learning_rate": 1.2885467055397691e-05, | |
| "loss": 0.4421, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.506457046603032, | |
| "grad_norm": 0.13447698950767517, | |
| "learning_rate": 1.2771807024797052e-05, | |
| "loss": 0.438, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.5087029758562607, | |
| "grad_norm": 0.14003418385982513, | |
| "learning_rate": 1.2658607001705359e-05, | |
| "loss": 0.4327, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.510948905109489, | |
| "grad_norm": 0.13097427785396576, | |
| "learning_rate": 1.254586776069143e-05, | |
| "loss": 0.4427, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.5131948343627175, | |
| "grad_norm": 0.1318497210741043, | |
| "learning_rate": 1.2433590073171175e-05, | |
| "loss": 0.4516, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.5154407636159464, | |
| "grad_norm": 0.12292584031820297, | |
| "learning_rate": 1.23217747074023e-05, | |
| "loss": 0.4355, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.517686692869175, | |
| "grad_norm": 0.12714707851409912, | |
| "learning_rate": 1.2210422428479122e-05, | |
| "loss": 0.4457, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.519932622122403, | |
| "grad_norm": 0.13449381291866302, | |
| "learning_rate": 1.2099533998327328e-05, | |
| "loss": 0.443, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.5221785513756316, | |
| "grad_norm": 0.1288016140460968, | |
| "learning_rate": 1.1989110175698629e-05, | |
| "loss": 0.4488, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.52442448062886, | |
| "grad_norm": 0.12953847646713257, | |
| "learning_rate": 1.1879151716165782e-05, | |
| "loss": 0.4327, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.5266704098820885, | |
| "grad_norm": 0.1303713619709015, | |
| "learning_rate": 1.1769659372117208e-05, | |
| "loss": 0.4452, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.5289163391353173, | |
| "grad_norm": 0.12560470402240753, | |
| "learning_rate": 1.1660633892752018e-05, | |
| "loss": 0.453, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.5311622683885457, | |
| "grad_norm": 0.1277565062046051, | |
| "learning_rate": 1.1552076024074767e-05, | |
| "loss": 0.4342, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.533408197641774, | |
| "grad_norm": 0.13078947365283966, | |
| "learning_rate": 1.1443986508890438e-05, | |
| "loss": 0.4529, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.535654126895003, | |
| "grad_norm": 0.13425932824611664, | |
| "learning_rate": 1.1336366086799262e-05, | |
| "loss": 0.4608, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.5379000561482314, | |
| "grad_norm": 0.12628474831581116, | |
| "learning_rate": 1.1229215494191724e-05, | |
| "loss": 0.4679, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.54014598540146, | |
| "grad_norm": 0.12629267573356628, | |
| "learning_rate": 1.112253546424352e-05, | |
| "loss": 0.4525, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.5423919146546883, | |
| "grad_norm": 0.1339423656463623, | |
| "learning_rate": 1.1016326726910554e-05, | |
| "loss": 0.4601, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.5446378439079167, | |
| "grad_norm": 0.12335141748189926, | |
| "learning_rate": 1.0910590008923871e-05, | |
| "loss": 0.444, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.5468837731611456, | |
| "grad_norm": 0.12865717709064484, | |
| "learning_rate": 1.0805326033784804e-05, | |
| "loss": 0.4384, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.549129702414374, | |
| "grad_norm": 0.13087087869644165, | |
| "learning_rate": 1.0700535521759874e-05, | |
| "loss": 0.4367, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.5513756316676024, | |
| "grad_norm": 0.12297067791223526, | |
| "learning_rate": 1.0596219189875963e-05, | |
| "loss": 0.4431, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.5536215609208313, | |
| "grad_norm": 0.13361698389053345, | |
| "learning_rate": 1.049237775191542e-05, | |
| "loss": 0.4345, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.5558674901740597, | |
| "grad_norm": 0.1307375282049179, | |
| "learning_rate": 1.0389011918411103e-05, | |
| "loss": 0.469, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.558113419427288, | |
| "grad_norm": 0.13051824271678925, | |
| "learning_rate": 1.0286122396641587e-05, | |
| "loss": 0.464, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.5603593486805165, | |
| "grad_norm": 0.13012929260730743, | |
| "learning_rate": 1.0183709890626301e-05, | |
| "loss": 0.4517, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.562605277933745, | |
| "grad_norm": 0.13006287813186646, | |
| "learning_rate": 1.0081775101120645e-05, | |
| "loss": 0.4565, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.5648512071869733, | |
| "grad_norm": 0.12601535022258759, | |
| "learning_rate": 9.980318725611294e-06, | |
| "loss": 0.4355, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.567097136440202, | |
| "grad_norm": 0.13367784023284912, | |
| "learning_rate": 9.879341458311394e-06, | |
| "loss": 0.459, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.5693430656934306, | |
| "grad_norm": 0.13120903074741364, | |
| "learning_rate": 9.778843990155784e-06, | |
| "loss": 0.4516, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.571588994946659, | |
| "grad_norm": 0.12156583368778229, | |
| "learning_rate": 9.67882700879632e-06, | |
| "loss": 0.4366, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.573834924199888, | |
| "grad_norm": 0.12496156245470047, | |
| "learning_rate": 9.57929119859708e-06, | |
| "loss": 0.4503, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5760808534531163, | |
| "grad_norm": 0.1285991668701172, | |
| "learning_rate": 9.480237240629794e-06, | |
| "loss": 0.4546, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.5783267827063447, | |
| "grad_norm": 0.12715794146060944, | |
| "learning_rate": 9.381665812669074e-06, | |
| "loss": 0.4353, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.580572711959573, | |
| "grad_norm": 0.12791746854782104, | |
| "learning_rate": 9.283577589187884e-06, | |
| "loss": 0.4783, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.5828186412128016, | |
| "grad_norm": 0.12204549461603165, | |
| "learning_rate": 9.185973241352859e-06, | |
| "loss": 0.4475, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5850645704660304, | |
| "grad_norm": 0.12769286334514618, | |
| "learning_rate": 9.088853437019688e-06, | |
| "loss": 0.44, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.587310499719259, | |
| "grad_norm": 0.12649452686309814, | |
| "learning_rate": 8.99221884072862e-06, | |
| "loss": 0.44, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.5895564289724873, | |
| "grad_norm": 0.12873002886772156, | |
| "learning_rate": 8.896070113699874e-06, | |
| "loss": 0.4356, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.591802358225716, | |
| "grad_norm": 0.12493567168712616, | |
| "learning_rate": 8.800407913829088e-06, | |
| "loss": 0.456, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.5940482874789446, | |
| "grad_norm": 0.12773042917251587, | |
| "learning_rate": 8.705232895682906e-06, | |
| "loss": 0.4502, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.596294216732173, | |
| "grad_norm": 0.1301664263010025, | |
| "learning_rate": 8.610545710494356e-06, | |
| "loss": 0.441, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.5985401459854014, | |
| "grad_norm": 0.136691614985466, | |
| "learning_rate": 8.516347006158567e-06, | |
| "loss": 0.4451, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.60078607523863, | |
| "grad_norm": 0.12582361698150635, | |
| "learning_rate": 8.422637427228193e-06, | |
| "loss": 0.4477, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.6030320044918582, | |
| "grad_norm": 0.12166401743888855, | |
| "learning_rate": 8.329417614909094e-06, | |
| "loss": 0.4402, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.605277933745087, | |
| "grad_norm": 0.12802627682685852, | |
| "learning_rate": 8.236688207055885e-06, | |
| "loss": 0.4545, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.6075238629983155, | |
| "grad_norm": 0.1304531693458557, | |
| "learning_rate": 8.144449838167579e-06, | |
| "loss": 0.4655, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.609769792251544, | |
| "grad_norm": 0.12477454543113708, | |
| "learning_rate": 8.052703139383315e-06, | |
| "loss": 0.4568, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.612015721504773, | |
| "grad_norm": 0.12605507671833038, | |
| "learning_rate": 7.96144873847796e-06, | |
| "loss": 0.4558, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.614261650758001, | |
| "grad_norm": 0.12706461548805237, | |
| "learning_rate": 7.870687259857858e-06, | |
| "loss": 0.4343, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.6165075800112296, | |
| "grad_norm": 0.12751144170761108, | |
| "learning_rate": 7.78041932455655e-06, | |
| "loss": 0.4554, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.618753509264458, | |
| "grad_norm": 0.12677204608917236, | |
| "learning_rate": 7.690645550230482e-06, | |
| "loss": 0.4587, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.6209994385176865, | |
| "grad_norm": 0.12588229775428772, | |
| "learning_rate": 7.6013665511548114e-06, | |
| "loss": 0.4358, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.6232453677709153, | |
| "grad_norm": 0.12063749879598618, | |
| "learning_rate": 7.512582938219259e-06, | |
| "loss": 0.4384, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.6254912970241437, | |
| "grad_norm": 0.12080162763595581, | |
| "learning_rate": 7.424295318923831e-06, | |
| "loss": 0.4542, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.627737226277372, | |
| "grad_norm": 0.12560433149337769, | |
| "learning_rate": 7.336504297374749e-06, | |
| "loss": 0.4493, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.629983155530601, | |
| "grad_norm": 9.130139350891113, | |
| "learning_rate": 7.249210474280208e-06, | |
| "loss": 0.4636, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.6322290847838294, | |
| "grad_norm": 0.12350396066904068, | |
| "learning_rate": 7.162414446946395e-06, | |
| "loss": 0.4543, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.634475014037058, | |
| "grad_norm": 0.12666672468185425, | |
| "learning_rate": 7.076116809273323e-06, | |
| "loss": 0.4633, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.6367209432902863, | |
| "grad_norm": 0.12505994737148285, | |
| "learning_rate": 6.990318151750757e-06, | |
| "loss": 0.4401, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.6389668725435147, | |
| "grad_norm": 0.1194506362080574, | |
| "learning_rate": 6.9050190614542565e-06, | |
| "loss": 0.4625, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.6412128017967436, | |
| "grad_norm": 0.12401262670755386, | |
| "learning_rate": 6.8202201220410255e-06, | |
| "loss": 0.4357, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.643458731049972, | |
| "grad_norm": 0.12455414235591888, | |
| "learning_rate": 6.73592191374607e-06, | |
| "loss": 0.4494, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.6457046603032004, | |
| "grad_norm": 0.12066637724637985, | |
| "learning_rate": 6.652125013378108e-06, | |
| "loss": 0.4565, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.647950589556429, | |
| "grad_norm": 0.12697719037532806, | |
| "learning_rate": 6.5688299943157e-06, | |
| "loss": 0.4434, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.6501965188096577, | |
| "grad_norm": 0.12216756492853165, | |
| "learning_rate": 6.486037426503276e-06, | |
| "loss": 0.4461, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.652442448062886, | |
| "grad_norm": 0.12145403027534485, | |
| "learning_rate": 6.403747876447232e-06, | |
| "loss": 0.4506, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.6546883773161145, | |
| "grad_norm": 0.11756281554698944, | |
| "learning_rate": 6.321961907212109e-06, | |
| "loss": 0.463, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.656934306569343, | |
| "grad_norm": 0.12291593104600906, | |
| "learning_rate": 6.240680078416699e-06, | |
| "loss": 0.4538, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.6591802358225713, | |
| "grad_norm": 0.12477383762598038, | |
| "learning_rate": 6.15990294623023e-06, | |
| "loss": 0.456, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.6614261650758, | |
| "grad_norm": 0.12275049090385437, | |
| "learning_rate": 6.079631063368547e-06, | |
| "loss": 0.4443, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.6636720943290286, | |
| "grad_norm": 0.12498319894075394, | |
| "learning_rate": 5.999864979090326e-06, | |
| "loss": 0.4487, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.665918023582257, | |
| "grad_norm": 0.11939443647861481, | |
| "learning_rate": 5.92060523919332e-06, | |
| "loss": 0.4285, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 2.668163952835486, | |
| "grad_norm": 0.12449135631322861, | |
| "learning_rate": 5.8418523860106665e-06, | |
| "loss": 0.4609, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.6704098820887143, | |
| "grad_norm": 0.12374921143054962, | |
| "learning_rate": 5.763606958407116e-06, | |
| "loss": 0.4441, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 2.6726558113419427, | |
| "grad_norm": 0.11954803764820099, | |
| "learning_rate": 5.6858694917754e-06, | |
| "loss": 0.4566, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.674901740595171, | |
| "grad_norm": 0.12245208770036697, | |
| "learning_rate": 5.6086405180324665e-06, | |
| "loss": 0.4519, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 2.6771476698483996, | |
| "grad_norm": 0.1250237375497818, | |
| "learning_rate": 5.531920565616e-06, | |
| "loss": 0.4364, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.6793935991016284, | |
| "grad_norm": 0.12335599958896637, | |
| "learning_rate": 5.455710159480649e-06, | |
| "loss": 0.4513, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 2.681639528354857, | |
| "grad_norm": 0.12619943916797638, | |
| "learning_rate": 5.380009821094536e-06, | |
| "loss": 0.4531, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.6838854576080853, | |
| "grad_norm": 0.1240544244647026, | |
| "learning_rate": 5.30482006843565e-06, | |
| "loss": 0.4396, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.686131386861314, | |
| "grad_norm": 0.12158697843551636, | |
| "learning_rate": 5.230141415988312e-06, | |
| "loss": 0.4426, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.6883773161145426, | |
| "grad_norm": 0.12433162331581116, | |
| "learning_rate": 5.155974374739634e-06, | |
| "loss": 0.447, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 2.690623245367771, | |
| "grad_norm": 0.12310656160116196, | |
| "learning_rate": 5.082319452176068e-06, | |
| "loss": 0.4359, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.6928691746209994, | |
| "grad_norm": 0.11813896149396896, | |
| "learning_rate": 5.009177152279865e-06, | |
| "loss": 0.4538, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 2.695115103874228, | |
| "grad_norm": 0.12028888612985611, | |
| "learning_rate": 4.936547975525692e-06, | |
| "loss": 0.4334, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.6973610331274562, | |
| "grad_norm": 0.1224963515996933, | |
| "learning_rate": 4.864432418877192e-06, | |
| "loss": 0.4454, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 2.699606962380685, | |
| "grad_norm": 0.12296409159898758, | |
| "learning_rate": 4.792830975783531e-06, | |
| "loss": 0.4439, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.7018528916339135, | |
| "grad_norm": 0.11706443876028061, | |
| "learning_rate": 4.721744136176103e-06, | |
| "loss": 0.4288, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 2.704098820887142, | |
| "grad_norm": 0.12277070432901382, | |
| "learning_rate": 4.651172386465152e-06, | |
| "loss": 0.454, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.706344750140371, | |
| "grad_norm": 0.12013454735279083, | |
| "learning_rate": 4.581116209536358e-06, | |
| "loss": 0.4405, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.708590679393599, | |
| "grad_norm": 0.12198374420404434, | |
| "learning_rate": 4.511576084747696e-06, | |
| "loss": 0.4646, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.7108366086468276, | |
| "grad_norm": 0.11776817589998245, | |
| "learning_rate": 4.442552487925982e-06, | |
| "loss": 0.4494, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 2.713082537900056, | |
| "grad_norm": 0.12356902658939362, | |
| "learning_rate": 4.3740458913637605e-06, | |
| "loss": 0.4578, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.7153284671532845, | |
| "grad_norm": 0.11953306198120117, | |
| "learning_rate": 4.3060567638159775e-06, | |
| "loss": 0.4379, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 2.7175743964065133, | |
| "grad_norm": 0.12432871758937836, | |
| "learning_rate": 4.238585570496847e-06, | |
| "loss": 0.4441, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.7198203256597417, | |
| "grad_norm": 0.11917420476675034, | |
| "learning_rate": 4.171632773076581e-06, | |
| "loss": 0.4477, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 2.72206625491297, | |
| "grad_norm": 0.11728362739086151, | |
| "learning_rate": 4.105198829678285e-06, | |
| "loss": 0.4638, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.724312184166199, | |
| "grad_norm": 0.1192561611533165, | |
| "learning_rate": 4.039284194874862e-06, | |
| "loss": 0.427, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 2.7265581134194274, | |
| "grad_norm": 0.11842131614685059, | |
| "learning_rate": 3.973889319685809e-06, | |
| "loss": 0.4321, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.728804042672656, | |
| "grad_norm": 0.11767691373825073, | |
| "learning_rate": 3.909014651574197e-06, | |
| "loss": 0.4407, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.7310499719258843, | |
| "grad_norm": 0.1151251420378685, | |
| "learning_rate": 3.844660634443616e-06, | |
| "loss": 0.4472, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.7332959011791127, | |
| "grad_norm": 0.1207621842622757, | |
| "learning_rate": 3.7808277086350464e-06, | |
| "loss": 0.4326, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 2.735541830432341, | |
| "grad_norm": 0.11696569621562958, | |
| "learning_rate": 3.7175163109239855e-06, | |
| "loss": 0.4421, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.73778775968557, | |
| "grad_norm": 0.11953862756490707, | |
| "learning_rate": 3.6547268745173247e-06, | |
| "loss": 0.4382, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 2.7400336889387984, | |
| "grad_norm": 0.12477323412895203, | |
| "learning_rate": 3.5924598290504855e-06, | |
| "loss": 0.4477, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.742279618192027, | |
| "grad_norm": 0.11988485604524612, | |
| "learning_rate": 3.530715600584449e-06, | |
| "loss": 0.4432, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 2.7445255474452557, | |
| "grad_norm": 0.12258612364530563, | |
| "learning_rate": 3.469494611602775e-06, | |
| "loss": 0.4575, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.746771476698484, | |
| "grad_norm": 0.125362828373909, | |
| "learning_rate": 3.4087972810088287e-06, | |
| "loss": 0.4358, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 2.7490174059517125, | |
| "grad_norm": 0.11876025050878525, | |
| "learning_rate": 3.348624024122824e-06, | |
| "loss": 0.4377, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.751263335204941, | |
| "grad_norm": 0.1166381686925888, | |
| "learning_rate": 3.2889752526790165e-06, | |
| "loss": 0.4348, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.7535092644581693, | |
| "grad_norm": 0.1194562315940857, | |
| "learning_rate": 3.2298513748228787e-06, | |
| "loss": 0.4443, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.755755193711398, | |
| "grad_norm": 0.11869972944259644, | |
| "learning_rate": 3.1712527951083126e-06, | |
| "loss": 0.4479, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 2.7580011229646266, | |
| "grad_norm": 0.11969739198684692, | |
| "learning_rate": 3.1131799144948683e-06, | |
| "loss": 0.454, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.760247052217855, | |
| "grad_norm": 0.12087547779083252, | |
| "learning_rate": 3.0556331303450437e-06, | |
| "loss": 0.4341, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 2.762492981471084, | |
| "grad_norm": 0.12332521378993988, | |
| "learning_rate": 2.998612836421506e-06, | |
| "loss": 0.4513, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.7647389107243123, | |
| "grad_norm": 0.12205971032381058, | |
| "learning_rate": 2.9421194228844084e-06, | |
| "loss": 0.4501, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 2.7669848399775407, | |
| "grad_norm": 0.12263938784599304, | |
| "learning_rate": 2.88615327628877e-06, | |
| "loss": 0.4504, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.12588439881801605, | |
| "learning_rate": 2.830714779581776e-06, | |
| "loss": 0.4397, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 2.7714766984839976, | |
| "grad_norm": 0.12059302628040314, | |
| "learning_rate": 2.7758043121001834e-06, | |
| "loss": 0.4354, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.7737226277372264, | |
| "grad_norm": 0.11515524238348007, | |
| "learning_rate": 2.721422249567729e-06, | |
| "loss": 0.4495, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.775968556990455, | |
| "grad_norm": 0.11858617514371872, | |
| "learning_rate": 2.667568964092544e-06, | |
| "loss": 0.4572, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.7782144862436833, | |
| "grad_norm": 0.1132800504565239, | |
| "learning_rate": 2.6142448241646046e-06, | |
| "loss": 0.4492, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 2.7804604154969117, | |
| "grad_norm": 0.12041954696178436, | |
| "learning_rate": 2.561450194653219e-06, | |
| "loss": 0.444, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.7827063447501406, | |
| "grad_norm": 0.1182764321565628, | |
| "learning_rate": 2.509185436804549e-06, | |
| "loss": 0.4338, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 2.784952274003369, | |
| "grad_norm": 0.13408203423023224, | |
| "learning_rate": 2.457450908239114e-06, | |
| "loss": 0.4487, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.7871982032565974, | |
| "grad_norm": 0.12381100654602051, | |
| "learning_rate": 2.4062469629493412e-06, | |
| "loss": 0.4364, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 2.789444132509826, | |
| "grad_norm": 0.12140806019306183, | |
| "learning_rate": 2.3555739512971565e-06, | |
| "loss": 0.4296, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.7916900617630542, | |
| "grad_norm": 0.12192130833864212, | |
| "learning_rate": 2.3054322200115963e-06, | |
| "loss": 0.4537, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 2.793935991016283, | |
| "grad_norm": 0.11975108832120895, | |
| "learning_rate": 2.255822112186401e-06, | |
| "loss": 0.4735, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.7961819202695115, | |
| "grad_norm": 0.12235341221094131, | |
| "learning_rate": 2.2067439672777047e-06, | |
| "loss": 0.451, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.79842784952274, | |
| "grad_norm": 0.12043313682079315, | |
| "learning_rate": 2.158198121101691e-06, | |
| "loss": 0.4381, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.800673778775969, | |
| "grad_norm": 0.12066707760095596, | |
| "learning_rate": 2.1101849058322932e-06, | |
| "loss": 0.4435, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 2.802919708029197, | |
| "grad_norm": 0.1135956272482872, | |
| "learning_rate": 2.062704649998937e-06, | |
| "loss": 0.4406, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.8051656372824256, | |
| "grad_norm": 0.12277340143918991, | |
| "learning_rate": 2.0157576784843024e-06, | |
| "loss": 0.4661, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 2.807411566535654, | |
| "grad_norm": 0.11728162318468094, | |
| "learning_rate": 1.9693443125220346e-06, | |
| "loss": 0.4431, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.8096574957888825, | |
| "grad_norm": 0.11668264865875244, | |
| "learning_rate": 1.9234648696946354e-06, | |
| "loss": 0.4388, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 2.8119034250421113, | |
| "grad_norm": 0.11695986986160278, | |
| "learning_rate": 1.878119663931246e-06, | |
| "loss": 0.4494, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.8141493542953397, | |
| "grad_norm": 0.1159198209643364, | |
| "learning_rate": 1.833309005505477e-06, | |
| "loss": 0.4311, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 2.816395283548568, | |
| "grad_norm": 0.1172918975353241, | |
| "learning_rate": 1.7890332010333233e-06, | |
| "loss": 0.4621, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.8186412128017966, | |
| "grad_norm": 0.12139487266540527, | |
| "learning_rate": 1.7452925534710763e-06, | |
| "loss": 0.4393, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.8208871420550254, | |
| "grad_norm": 0.11837179213762283, | |
| "learning_rate": 1.7020873621131738e-06, | |
| "loss": 0.447, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.823133071308254, | |
| "grad_norm": 0.12008003145456314, | |
| "learning_rate": 1.6594179225902652e-06, | |
| "loss": 0.4516, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 2.8253790005614823, | |
| "grad_norm": 0.11927176266908646, | |
| "learning_rate": 1.617284526867078e-06, | |
| "loss": 0.4404, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.8276249298147107, | |
| "grad_norm": 0.11693605035543442, | |
| "learning_rate": 1.5756874632405095e-06, | |
| "loss": 0.4438, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 2.829870859067939, | |
| "grad_norm": 0.11941110342741013, | |
| "learning_rate": 1.534627016337593e-06, | |
| "loss": 0.4426, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.832116788321168, | |
| "grad_norm": 0.11750718951225281, | |
| "learning_rate": 1.494103467113588e-06, | |
| "loss": 0.4322, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 2.8343627175743964, | |
| "grad_norm": 0.116007000207901, | |
| "learning_rate": 1.4541170928500248e-06, | |
| "loss": 0.4621, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.836608646827625, | |
| "grad_norm": 0.11818964034318924, | |
| "learning_rate": 1.4146681671528418e-06, | |
| "loss": 0.4638, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 2.8388545760808537, | |
| "grad_norm": 0.11916031688451767, | |
| "learning_rate": 1.3757569599504917e-06, | |
| "loss": 0.4425, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.841100505334082, | |
| "grad_norm": 0.11497969180345535, | |
| "learning_rate": 1.3373837374920862e-06, | |
| "loss": 0.4425, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.8433464345873105, | |
| "grad_norm": 0.11767168343067169, | |
| "learning_rate": 1.2995487623456194e-06, | |
| "loss": 0.4532, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.845592363840539, | |
| "grad_norm": 0.115963876247406, | |
| "learning_rate": 1.2622522933961112e-06, | |
| "loss": 0.4344, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 2.8478382930937673, | |
| "grad_norm": 0.11714527010917664, | |
| "learning_rate": 1.225494585843876e-06, | |
| "loss": 0.4678, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.850084222346996, | |
| "grad_norm": 0.11749914288520813, | |
| "learning_rate": 1.1892758912027546e-06, | |
| "loss": 0.4445, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 2.8523301516002246, | |
| "grad_norm": 0.11820235848426819, | |
| "learning_rate": 1.1535964572984093e-06, | |
| "loss": 0.4659, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.854576080853453, | |
| "grad_norm": 0.11582965403795242, | |
| "learning_rate": 1.118456528266636e-06, | |
| "loss": 0.4441, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.856822010106682, | |
| "grad_norm": 0.11765659600496292, | |
| "learning_rate": 1.0838563445516503e-06, | |
| "loss": 0.4441, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.8590679393599103, | |
| "grad_norm": 0.11495634913444519, | |
| "learning_rate": 1.0497961429044979e-06, | |
| "loss": 0.4397, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 2.8613138686131387, | |
| "grad_norm": 0.13958555459976196, | |
| "learning_rate": 1.0162761563813927e-06, | |
| "loss": 0.4332, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.863559797866367, | |
| "grad_norm": 0.11957214772701263, | |
| "learning_rate": 9.832966143421551e-07, | |
| "loss": 0.4476, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.8658057271195956, | |
| "grad_norm": 0.12185267359018326, | |
| "learning_rate": 9.508577424486031e-07, | |
| "loss": 0.4571, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.868051656372824, | |
| "grad_norm": 0.1512320339679718, | |
| "learning_rate": 9.18959762663043e-07, | |
| "loss": 0.4322, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 2.870297585626053, | |
| "grad_norm": 0.11999038606882095, | |
| "learning_rate": 8.876028932467417e-07, | |
| "loss": 0.4399, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.8725435148792813, | |
| "grad_norm": 0.11745017766952515, | |
| "learning_rate": 8.567873487584077e-07, | |
| "loss": 0.444, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 2.8747894441325097, | |
| "grad_norm": 0.12399045377969742, | |
| "learning_rate": 8.265133400527881e-07, | |
| "loss": 0.4421, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.8770353733857386, | |
| "grad_norm": 0.11552898585796356, | |
| "learning_rate": 7.967810742791404e-07, | |
| "loss": 0.4369, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 2.879281302638967, | |
| "grad_norm": 0.1216784194111824, | |
| "learning_rate": 7.675907548798744e-07, | |
| "loss": 0.4597, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.8815272318921954, | |
| "grad_norm": 0.12029793858528137, | |
| "learning_rate": 7.389425815891394e-07, | |
| "loss": 0.4455, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 2.883773161145424, | |
| "grad_norm": 0.11763288825750351, | |
| "learning_rate": 7.108367504314651e-07, | |
| "loss": 0.4422, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.8860190903986522, | |
| "grad_norm": 0.11679881066083908, | |
| "learning_rate": 6.832734537204299e-07, | |
| "loss": 0.4525, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.888265019651881, | |
| "grad_norm": 0.11851628869771957, | |
| "learning_rate": 6.562528800572931e-07, | |
| "loss": 0.4435, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.8905109489051095, | |
| "grad_norm": 0.11480539292097092, | |
| "learning_rate": 6.297752143297864e-07, | |
| "loss": 0.4484, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 2.892756878158338, | |
| "grad_norm": 0.12258218973875046, | |
| "learning_rate": 6.03840637710782e-07, | |
| "loss": 0.4504, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.895002807411567, | |
| "grad_norm": 0.12160119414329529, | |
| "learning_rate": 5.784493276570669e-07, | |
| "loss": 0.4401, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 2.897248736664795, | |
| "grad_norm": 0.1183420866727829, | |
| "learning_rate": 5.536014579081617e-07, | |
| "loss": 0.4523, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.8994946659180236, | |
| "grad_norm": 0.1185230165719986, | |
| "learning_rate": 5.292971984850948e-07, | |
| "loss": 0.4497, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 2.901740595171252, | |
| "grad_norm": 0.11411769688129425, | |
| "learning_rate": 5.055367156892654e-07, | |
| "loss": 0.4436, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.9039865244244805, | |
| "grad_norm": 0.11810418963432312, | |
| "learning_rate": 4.823201721012538e-07, | |
| "loss": 0.4435, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 2.906232453677709, | |
| "grad_norm": 0.11871050298213959, | |
| "learning_rate": 4.5964772657980827e-07, | |
| "loss": 0.4512, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.9084783829309377, | |
| "grad_norm": 0.12631046772003174, | |
| "learning_rate": 4.375195342606464e-07, | |
| "loss": 0.4352, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.910724312184166, | |
| "grad_norm": 0.11332812160253525, | |
| "learning_rate": 4.159357465554603e-07, | |
| "loss": 0.4344, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.9129702414373946, | |
| "grad_norm": 0.11570383608341217, | |
| "learning_rate": 3.9489651115087734e-07, | |
| "loss": 0.4491, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 2.9152161706906234, | |
| "grad_norm": 0.1170554980635643, | |
| "learning_rate": 3.7440197200741214e-07, | |
| "loss": 0.4314, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.917462099943852, | |
| "grad_norm": 0.11701026558876038, | |
| "learning_rate": 3.544522693585428e-07, | |
| "loss": 0.449, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.9197080291970803, | |
| "grad_norm": 0.11610274761915207, | |
| "learning_rate": 3.3504753970968083e-07, | |
| "loss": 0.4493, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.9219539584503087, | |
| "grad_norm": 0.1187182143330574, | |
| "learning_rate": 3.1618791583729157e-07, | |
| "loss": 0.4714, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 2.924199887703537, | |
| "grad_norm": 0.11808615922927856, | |
| "learning_rate": 2.97873526787944e-07, | |
| "loss": 0.4494, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.926445816956766, | |
| "grad_norm": 0.11943615227937698, | |
| "learning_rate": 2.801044978774758e-07, | |
| "loss": 0.444, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 2.9286917462099944, | |
| "grad_norm": 0.11159630864858627, | |
| "learning_rate": 2.6288095069009647e-07, | |
| "loss": 0.4365, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.930937675463223, | |
| "grad_norm": 0.11759793758392334, | |
| "learning_rate": 2.4620300307756975e-07, | |
| "loss": 0.4449, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.9331836047164517, | |
| "grad_norm": 0.11761987954378128, | |
| "learning_rate": 2.30070769158397e-07, | |
| "loss": 0.4392, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.93542953396968, | |
| "grad_norm": 0.11657937616109848, | |
| "learning_rate": 2.1448435931705315e-07, | |
| "loss": 0.4361, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 2.9376754632229085, | |
| "grad_norm": 0.11725448071956635, | |
| "learning_rate": 1.994438802032228e-07, | |
| "loss": 0.4267, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.939921392476137, | |
| "grad_norm": 0.12075719982385635, | |
| "learning_rate": 1.8494943473108095e-07, | |
| "loss": 0.4495, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 2.9421673217293653, | |
| "grad_norm": 0.11142679303884506, | |
| "learning_rate": 1.710011220785557e-07, | |
| "loss": 0.4275, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.944413250982594, | |
| "grad_norm": 0.1148485466837883, | |
| "learning_rate": 1.575990376866976e-07, | |
| "loss": 0.4362, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 2.9466591802358226, | |
| "grad_norm": 0.11960410326719284, | |
| "learning_rate": 1.4474327325897818e-07, | |
| "loss": 0.4507, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.948905109489051, | |
| "grad_norm": 0.11774080991744995, | |
| "learning_rate": 1.324339167607036e-07, | |
| "loss": 0.4571, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 2.9511510387422795, | |
| "grad_norm": 0.11617586016654968, | |
| "learning_rate": 1.2067105241839294e-07, | |
| "loss": 0.4501, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.9533969679955083, | |
| "grad_norm": 0.11817507445812225, | |
| "learning_rate": 1.0945476071918316e-07, | |
| "loss": 0.4471, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.9556428972487367, | |
| "grad_norm": 0.12056715786457062, | |
| "learning_rate": 9.878511841034056e-08, | |
| "loss": 0.4382, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.957888826501965, | |
| "grad_norm": 0.11966580897569656, | |
| "learning_rate": 8.866219849864799e-08, | |
| "loss": 0.4471, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 2.9601347557551936, | |
| "grad_norm": 0.11672661453485489, | |
| "learning_rate": 7.908607024999626e-08, | |
| "loss": 0.4636, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.962380685008422, | |
| "grad_norm": 0.12275572121143341, | |
| "learning_rate": 7.005679918882457e-08, | |
| "loss": 0.4388, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 2.964626614261651, | |
| "grad_norm": 0.12186376005411148, | |
| "learning_rate": 6.157444709773863e-08, | |
| "loss": 0.4538, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.9668725435148793, | |
| "grad_norm": 0.11584927141666412, | |
| "learning_rate": 5.3639072017057647e-08, | |
| "loss": 0.4464, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 2.9691184727681077, | |
| "grad_norm": 0.11662715673446655, | |
| "learning_rate": 4.625072824441468e-08, | |
| "loss": 0.439, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.9713644020213366, | |
| "grad_norm": 0.11534745246171951, | |
| "learning_rate": 3.940946633440135e-08, | |
| "loss": 0.4496, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 2.973610331274565, | |
| "grad_norm": 0.11853344738483429, | |
| "learning_rate": 3.3115333098212576e-08, | |
| "loss": 0.4498, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.9758562605277934, | |
| "grad_norm": 0.11632394790649414, | |
| "learning_rate": 2.7368371603326838e-08, | |
| "loss": 0.4311, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.978102189781022, | |
| "grad_norm": 0.11567545682191849, | |
| "learning_rate": 2.216862117319529e-08, | |
| "loss": 0.4427, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.9803481190342502, | |
| "grad_norm": 0.11598379909992218, | |
| "learning_rate": 1.7516117387010866e-08, | |
| "loss": 0.4452, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.982594048287479, | |
| "grad_norm": 0.12003415077924728, | |
| "learning_rate": 1.3410892079432914e-08, | |
| "loss": 0.4408, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.9848399775407075, | |
| "grad_norm": 0.11351985484361649, | |
| "learning_rate": 9.85297334037405e-09, | |
| "loss": 0.4529, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.987085906793936, | |
| "grad_norm": 0.11411769688129425, | |
| "learning_rate": 6.842385514831407e-09, | |
| "loss": 0.434, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.9893318360471643, | |
| "grad_norm": 0.12261338531970978, | |
| "learning_rate": 4.3791492026734604e-09, | |
| "loss": 0.4534, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.991577765300393, | |
| "grad_norm": 0.11999525874853134, | |
| "learning_rate": 2.463281258560102e-09, | |
| "loss": 0.448, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.9938236945536216, | |
| "grad_norm": 0.11168470978736877, | |
| "learning_rate": 1.094794791764997e-09, | |
| "loss": 0.4408, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.99606962380685, | |
| "grad_norm": 0.11737479269504547, | |
| "learning_rate": 2.736991661400623e-10, | |
| "loss": 0.4357, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.9983155530600785, | |
| "grad_norm": 0.11935008317232132, | |
| "learning_rate": 0.0, | |
| "loss": 0.4366, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.9983155530600785, | |
| "step": 1335, | |
| "total_flos": 4.209303851158733e+19, | |
| "train_loss": 0.5228641195690141, | |
| "train_runtime": 86845.4923, | |
| "train_samples_per_second": 3.937, | |
| "train_steps_per_second": 0.015 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1335, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.209303851158733e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |