Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-118 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-118 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-118") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-118") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-118") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-118 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-118" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-118", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-118
- SGLang
How to use furproxy/9b-118 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-118" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-118", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-118" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-118", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-118 with Docker Model Runner:
docker model run hf.co/furproxy/9b-118
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 3276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002442002442002442, | |
| "grad_norm": 2.5476107597351074, | |
| "learning_rate": 3.0487804878048784e-08, | |
| "loss": 1.9267934560775757, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004884004884004884, | |
| "grad_norm": 2.350306749343872, | |
| "learning_rate": 9.146341463414634e-08, | |
| "loss": 2.0976288318634033, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007326007326007326, | |
| "grad_norm": 22.22303009033203, | |
| "learning_rate": 1.5243902439024392e-07, | |
| "loss": 2.4150097370147705, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.009768009768009768, | |
| "grad_norm": 4.77632999420166, | |
| "learning_rate": 2.134146341463415e-07, | |
| "loss": 1.9595110416412354, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01221001221001221, | |
| "grad_norm": 6.901440620422363, | |
| "learning_rate": 2.7439024390243906e-07, | |
| "loss": 2.162900447845459, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014652014652014652, | |
| "grad_norm": 11.045926094055176, | |
| "learning_rate": 3.3536585365853663e-07, | |
| "loss": 1.9404582977294922, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.017094017094017096, | |
| "grad_norm": 2.2156028747558594, | |
| "learning_rate": 3.963414634146342e-07, | |
| "loss": 1.6843563318252563, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.019536019536019536, | |
| "grad_norm": 26.805221557617188, | |
| "learning_rate": 4.573170731707317e-07, | |
| "loss": 2.0623722076416016, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 17.565683364868164, | |
| "learning_rate": 5.182926829268293e-07, | |
| "loss": 2.7407174110412598, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02442002442002442, | |
| "grad_norm": 4.266391277313232, | |
| "learning_rate": 5.79268292682927e-07, | |
| "loss": 1.88368821144104, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.026862026862026864, | |
| "grad_norm": 8.910398483276367, | |
| "learning_rate": 6.402439024390244e-07, | |
| "loss": 2.084914445877075, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.029304029304029304, | |
| "grad_norm": 2.130563497543335, | |
| "learning_rate": 7.012195121951221e-07, | |
| "loss": 2.019660234451294, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.031746031746031744, | |
| "grad_norm": 6.1367716789245605, | |
| "learning_rate": 7.621951219512196e-07, | |
| "loss": 1.9338700771331787, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03418803418803419, | |
| "grad_norm": 6.56151008605957, | |
| "learning_rate": 8.231707317073172e-07, | |
| "loss": 2.0060365200042725, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 9.909710884094238, | |
| "learning_rate": 8.841463414634147e-07, | |
| "loss": 1.8657618761062622, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03907203907203907, | |
| "grad_norm": 2.332340955734253, | |
| "learning_rate": 9.451219512195123e-07, | |
| "loss": 1.385891318321228, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04151404151404151, | |
| "grad_norm": 2.9691996574401855, | |
| "learning_rate": 1.0060975609756098e-06, | |
| "loss": 1.872510552406311, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 3.3227553367614746, | |
| "learning_rate": 1.0670731707317073e-06, | |
| "loss": 1.4552903175354004, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0463980463980464, | |
| "grad_norm": 2.721830368041992, | |
| "learning_rate": 1.128048780487805e-06, | |
| "loss": 1.8594551086425781, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04884004884004884, | |
| "grad_norm": 5.821812152862549, | |
| "learning_rate": 1.1890243902439024e-06, | |
| "loss": 1.697621464729309, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 1.486113429069519, | |
| "learning_rate": 1.25e-06, | |
| "loss": 1.7849284410476685, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05372405372405373, | |
| "grad_norm": 4.890027046203613, | |
| "learning_rate": 1.3109756097560978e-06, | |
| "loss": 1.7496923208236694, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.05616605616605617, | |
| "grad_norm": 1.7662242650985718, | |
| "learning_rate": 1.3719512195121952e-06, | |
| "loss": 1.6558294296264648, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05860805860805861, | |
| "grad_norm": 19.32802963256836, | |
| "learning_rate": 1.4329268292682927e-06, | |
| "loss": 1.4527249336242676, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06105006105006105, | |
| "grad_norm": 9.77262020111084, | |
| "learning_rate": 1.4939024390243904e-06, | |
| "loss": 1.3586843013763428, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06349206349206349, | |
| "grad_norm": 6.361555576324463, | |
| "learning_rate": 1.5548780487804878e-06, | |
| "loss": 1.5780984163284302, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06593406593406594, | |
| "grad_norm": 15.675647735595703, | |
| "learning_rate": 1.6158536585365855e-06, | |
| "loss": 1.285346508026123, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06837606837606838, | |
| "grad_norm": 4.175439834594727, | |
| "learning_rate": 1.6768292682926832e-06, | |
| "loss": 1.5857115983963013, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07081807081807082, | |
| "grad_norm": 1.736680030822754, | |
| "learning_rate": 1.7378048780487804e-06, | |
| "loss": 1.5757516622543335, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 1.47886061668396, | |
| "learning_rate": 1.7987804878048781e-06, | |
| "loss": 1.6183691024780273, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0757020757020757, | |
| "grad_norm": 2.919388771057129, | |
| "learning_rate": 1.8597560975609758e-06, | |
| "loss": 1.164100170135498, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07814407814407814, | |
| "grad_norm": 1.240254282951355, | |
| "learning_rate": 1.9207317073170733e-06, | |
| "loss": 1.767830491065979, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08058608058608059, | |
| "grad_norm": 8.434248924255371, | |
| "learning_rate": 1.981707317073171e-06, | |
| "loss": 1.3761873245239258, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08302808302808302, | |
| "grad_norm": 1.5368638038635254, | |
| "learning_rate": 2.042682926829268e-06, | |
| "loss": 1.2026317119598389, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 0.9749501347541809, | |
| "learning_rate": 2.103658536585366e-06, | |
| "loss": 1.2645400762557983, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08791208791208792, | |
| "grad_norm": 1.7136712074279785, | |
| "learning_rate": 2.1646341463414635e-06, | |
| "loss": 1.5449546575546265, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09035409035409035, | |
| "grad_norm": 3.341733455657959, | |
| "learning_rate": 2.225609756097561e-06, | |
| "loss": 1.5047639608383179, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0927960927960928, | |
| "grad_norm": 5.009698390960693, | |
| "learning_rate": 2.286585365853659e-06, | |
| "loss": 1.4909131526947021, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 1.631039023399353, | |
| "learning_rate": 2.3475609756097563e-06, | |
| "loss": 1.3529361486434937, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.09768009768009768, | |
| "grad_norm": 6.177618026733398, | |
| "learning_rate": 2.408536585365854e-06, | |
| "loss": 1.254205346107483, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10012210012210013, | |
| "grad_norm": 5.524102210998535, | |
| "learning_rate": 2.4695121951219513e-06, | |
| "loss": 1.165070652961731, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 2.264727830886841, | |
| "learning_rate": 2.530487804878049e-06, | |
| "loss": 1.1751306056976318, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.10500610500610501, | |
| "grad_norm": 1.5993300676345825, | |
| "learning_rate": 2.5914634146341466e-06, | |
| "loss": 1.352165699005127, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.10744810744810745, | |
| "grad_norm": 1.8832273483276367, | |
| "learning_rate": 2.652439024390244e-06, | |
| "loss": 1.5243136882781982, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.7285981178283691, | |
| "learning_rate": 2.713414634146342e-06, | |
| "loss": 1.2205549478530884, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11233211233211234, | |
| "grad_norm": 2.318856716156006, | |
| "learning_rate": 2.7743902439024394e-06, | |
| "loss": 1.6029253005981445, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11477411477411477, | |
| "grad_norm": 7.182040691375732, | |
| "learning_rate": 2.8353658536585365e-06, | |
| "loss": 1.1754858493804932, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.11721611721611722, | |
| "grad_norm": 1.7051122188568115, | |
| "learning_rate": 2.8963414634146343e-06, | |
| "loss": 1.4834587574005127, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.11965811965811966, | |
| "grad_norm": 3.0200533866882324, | |
| "learning_rate": 2.957317073170732e-06, | |
| "loss": 1.4276564121246338, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1221001221001221, | |
| "grad_norm": 1.8382214307785034, | |
| "learning_rate": 3.0182926829268293e-06, | |
| "loss": 1.1662065982818604, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12454212454212454, | |
| "grad_norm": 2.296553611755371, | |
| "learning_rate": 3.079268292682927e-06, | |
| "loss": 1.3122981786727905, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.12698412698412698, | |
| "grad_norm": 3.1782186031341553, | |
| "learning_rate": 3.1402439024390246e-06, | |
| "loss": 1.0392099618911743, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.12942612942612944, | |
| "grad_norm": 1.1442056894302368, | |
| "learning_rate": 3.201219512195122e-06, | |
| "loss": 0.9646719694137573, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13186813186813187, | |
| "grad_norm": 4.929725170135498, | |
| "learning_rate": 3.26219512195122e-06, | |
| "loss": 1.420979619026184, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1343101343101343, | |
| "grad_norm": 2.712373971939087, | |
| "learning_rate": 3.3231707317073174e-06, | |
| "loss": 1.6603320837020874, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.13675213675213677, | |
| "grad_norm": 2.1611270904541016, | |
| "learning_rate": 3.3841463414634153e-06, | |
| "loss": 1.452590823173523, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1391941391941392, | |
| "grad_norm": 1.7481805086135864, | |
| "learning_rate": 3.4451219512195124e-06, | |
| "loss": 1.2166002988815308, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.14163614163614163, | |
| "grad_norm": 1.9498414993286133, | |
| "learning_rate": 3.50609756097561e-06, | |
| "loss": 1.3791627883911133, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.14407814407814407, | |
| "grad_norm": 5.3667497634887695, | |
| "learning_rate": 3.5670731707317073e-06, | |
| "loss": 1.2551401853561401, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 2.414433717727661, | |
| "learning_rate": 3.628048780487805e-06, | |
| "loss": 1.3578366041183472, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.14896214896214896, | |
| "grad_norm": 1.8076027631759644, | |
| "learning_rate": 3.6890243902439026e-06, | |
| "loss": 1.3795714378356934, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1514041514041514, | |
| "grad_norm": 2.020355701446533, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 1.3397819995880127, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 1.3748884201049805, | |
| "learning_rate": 3.810975609756098e-06, | |
| "loss": 1.1987930536270142, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1562881562881563, | |
| "grad_norm": 1.4875504970550537, | |
| "learning_rate": 3.8719512195121954e-06, | |
| "loss": 1.4347355365753174, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 1.2580325603485107, | |
| "learning_rate": 3.932926829268293e-06, | |
| "loss": 1.2884924411773682, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16117216117216118, | |
| "grad_norm": 4.422817707061768, | |
| "learning_rate": 3.99390243902439e-06, | |
| "loss": 1.0759848356246948, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.16361416361416362, | |
| "grad_norm": 2.910273790359497, | |
| "learning_rate": 4.054878048780488e-06, | |
| "loss": 1.1693415641784668, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.16605616605616605, | |
| "grad_norm": 2.8875091075897217, | |
| "learning_rate": 4.115853658536585e-06, | |
| "loss": 1.1773910522460938, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1684981684981685, | |
| "grad_norm": 3.0497472286224365, | |
| "learning_rate": 4.176829268292683e-06, | |
| "loss": 1.1259866952896118, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 2.7244997024536133, | |
| "learning_rate": 4.237804878048781e-06, | |
| "loss": 1.1096811294555664, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.17338217338217338, | |
| "grad_norm": 1.9807188510894775, | |
| "learning_rate": 4.298780487804878e-06, | |
| "loss": 1.374996304512024, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.17582417582417584, | |
| "grad_norm": 1.5548163652420044, | |
| "learning_rate": 4.359756097560976e-06, | |
| "loss": 1.116044521331787, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.17826617826617827, | |
| "grad_norm": 2.0115115642547607, | |
| "learning_rate": 4.420731707317074e-06, | |
| "loss": 0.9985978603363037, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1807081807081807, | |
| "grad_norm": 1.5195460319519043, | |
| "learning_rate": 4.481707317073171e-06, | |
| "loss": 0.9752452373504639, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 1.3534411191940308, | |
| "learning_rate": 4.542682926829269e-06, | |
| "loss": 1.3346309661865234, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1855921855921856, | |
| "grad_norm": 2.0687193870544434, | |
| "learning_rate": 4.603658536585367e-06, | |
| "loss": 1.4687234163284302, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.18803418803418803, | |
| "grad_norm": 1.2396481037139893, | |
| "learning_rate": 4.664634146341464e-06, | |
| "loss": 1.3415579795837402, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 2.4731335639953613, | |
| "learning_rate": 4.725609756097561e-06, | |
| "loss": 1.6359931230545044, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.19291819291819293, | |
| "grad_norm": 3.7982375621795654, | |
| "learning_rate": 4.786585365853659e-06, | |
| "loss": 1.0939006805419922, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.19536019536019536, | |
| "grad_norm": 1.8634134531021118, | |
| "learning_rate": 4.8475609756097565e-06, | |
| "loss": 0.9020692110061646, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1978021978021978, | |
| "grad_norm": 1.9401910305023193, | |
| "learning_rate": 4.908536585365854e-06, | |
| "loss": 1.3406171798706055, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.20024420024420025, | |
| "grad_norm": 2.7686517238616943, | |
| "learning_rate": 4.9695121951219515e-06, | |
| "loss": 1.2336323261260986, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2026862026862027, | |
| "grad_norm": 2.1715619564056396, | |
| "learning_rate": 4.999998853502653e-06, | |
| "loss": 1.2935197353363037, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 1.8246636390686035, | |
| "learning_rate": 4.999989681530883e-06, | |
| "loss": 1.1559749841690063, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.20757020757020758, | |
| "grad_norm": 4.966519832611084, | |
| "learning_rate": 4.999971337624732e-06, | |
| "loss": 0.929039478302002, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.21001221001221002, | |
| "grad_norm": 1.679980993270874, | |
| "learning_rate": 4.999943821858978e-06, | |
| "loss": 1.0169018507003784, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.21245421245421245, | |
| "grad_norm": 2.0559370517730713, | |
| "learning_rate": 4.999907134345786e-06, | |
| "loss": 1.3057047128677368, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2148962148962149, | |
| "grad_norm": 2.602260112762451, | |
| "learning_rate": 4.9998612752347116e-06, | |
| "loss": 1.2571014165878296, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.21733821733821734, | |
| "grad_norm": 3.518317222595215, | |
| "learning_rate": 4.999806244712696e-06, | |
| "loss": 1.3580776453018188, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 1.0767415761947632, | |
| "learning_rate": 4.9997420430040665e-06, | |
| "loss": 0.9726645946502686, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 4.324513912200928, | |
| "learning_rate": 4.9996686703705395e-06, | |
| "loss": 0.8844138383865356, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.22466422466422467, | |
| "grad_norm": 1.6926108598709106, | |
| "learning_rate": 4.999586127111211e-06, | |
| "loss": 1.2904834747314453, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2271062271062271, | |
| "grad_norm": 3.4072794914245605, | |
| "learning_rate": 4.9994944135625655e-06, | |
| "loss": 1.288368582725525, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.22954822954822954, | |
| "grad_norm": 2.327322483062744, | |
| "learning_rate": 4.999393530098465e-06, | |
| "loss": 1.3512585163116455, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.231990231990232, | |
| "grad_norm": 1.8644685745239258, | |
| "learning_rate": 4.999283477130157e-06, | |
| "loss": 1.3694134950637817, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.23443223443223443, | |
| "grad_norm": 2.46710205078125, | |
| "learning_rate": 4.999164255106262e-06, | |
| "loss": 1.3137428760528564, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.23687423687423687, | |
| "grad_norm": 2.8349263668060303, | |
| "learning_rate": 4.999035864512782e-06, | |
| "loss": 1.308716058731079, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.23931623931623933, | |
| "grad_norm": 4.252539157867432, | |
| "learning_rate": 4.998898305873094e-06, | |
| "loss": 1.0035754442214966, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.24175824175824176, | |
| "grad_norm": 1.9181326627731323, | |
| "learning_rate": 4.9987515797479455e-06, | |
| "loss": 1.283682942390442, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2442002442002442, | |
| "grad_norm": 2.797574996948242, | |
| "learning_rate": 4.998595686735457e-06, | |
| "loss": 1.3744878768920898, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.24664224664224665, | |
| "grad_norm": 2.4476912021636963, | |
| "learning_rate": 4.998430627471114e-06, | |
| "loss": 1.3049349784851074, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.2490842490842491, | |
| "grad_norm": 1.6749374866485596, | |
| "learning_rate": 4.998256402627771e-06, | |
| "loss": 0.9939874410629272, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2515262515262515, | |
| "grad_norm": 1.9039818048477173, | |
| "learning_rate": 4.998073012915644e-06, | |
| "loss": 1.26462721824646, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.25396825396825395, | |
| "grad_norm": 1.5555559396743774, | |
| "learning_rate": 4.99788045908231e-06, | |
| "loss": 1.118224024772644, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 12.163622856140137, | |
| "learning_rate": 4.9976787419126995e-06, | |
| "loss": 0.9382672905921936, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2588522588522589, | |
| "grad_norm": 2.534181594848633, | |
| "learning_rate": 4.997467862229102e-06, | |
| "loss": 0.6328732967376709, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2612942612942613, | |
| "grad_norm": 1.489608645439148, | |
| "learning_rate": 4.997247820891152e-06, | |
| "loss": 1.0992366075515747, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.26373626373626374, | |
| "grad_norm": 2.1970038414001465, | |
| "learning_rate": 4.997018618795836e-06, | |
| "loss": 1.2712618112564087, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2661782661782662, | |
| "grad_norm": 1.4587446451187134, | |
| "learning_rate": 4.996780256877479e-06, | |
| "loss": 1.1741327047348022, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2686202686202686, | |
| "grad_norm": 2.0022170543670654, | |
| "learning_rate": 4.996532736107749e-06, | |
| "loss": 1.3054232597351074, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.27106227106227104, | |
| "grad_norm": 1.731757402420044, | |
| "learning_rate": 4.996276057495648e-06, | |
| "loss": 0.934091329574585, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.27350427350427353, | |
| "grad_norm": 1.4423786401748657, | |
| "learning_rate": 4.996010222087509e-06, | |
| "loss": 0.9163894653320312, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.27594627594627597, | |
| "grad_norm": 1.7184131145477295, | |
| "learning_rate": 4.9957352309669935e-06, | |
| "loss": 1.3263689279556274, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2783882783882784, | |
| "grad_norm": 2.437328338623047, | |
| "learning_rate": 4.9954510852550825e-06, | |
| "loss": 1.3698230981826782, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.28083028083028083, | |
| "grad_norm": 2.120469093322754, | |
| "learning_rate": 4.995157786110078e-06, | |
| "loss": 1.343611717224121, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.28327228327228327, | |
| "grad_norm": 6.02695369720459, | |
| "learning_rate": 4.9948553347275964e-06, | |
| "loss": 0.7583301663398743, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 1.9317870140075684, | |
| "learning_rate": 4.994543732340559e-06, | |
| "loss": 1.0170681476593018, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.28815628815628813, | |
| "grad_norm": 1.3222551345825195, | |
| "learning_rate": 4.994222980219193e-06, | |
| "loss": 1.272110939025879, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2905982905982906, | |
| "grad_norm": 1.7373944520950317, | |
| "learning_rate": 4.993893079671023e-06, | |
| "loss": 1.2445218563079834, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 2.4315457344055176, | |
| "learning_rate": 4.993554032040867e-06, | |
| "loss": 1.1302506923675537, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2954822954822955, | |
| "grad_norm": 3.029109477996826, | |
| "learning_rate": 4.993205838710829e-06, | |
| "loss": 0.9910866022109985, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2979242979242979, | |
| "grad_norm": 1.9078646898269653, | |
| "learning_rate": 4.992848501100299e-06, | |
| "loss": 1.3285576105117798, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.30036630036630035, | |
| "grad_norm": 1.1271051168441772, | |
| "learning_rate": 4.992482020665938e-06, | |
| "loss": 0.7790983319282532, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3028083028083028, | |
| "grad_norm": 2.9028432369232178, | |
| "learning_rate": 4.992106398901679e-06, | |
| "loss": 1.1949691772460938, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3052503052503053, | |
| "grad_norm": 3.402926445007324, | |
| "learning_rate": 4.9917216373387205e-06, | |
| "loss": 0.9305516481399536, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 4.722480773925781, | |
| "learning_rate": 4.991327737545517e-06, | |
| "loss": 1.0460638999938965, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.31013431013431014, | |
| "grad_norm": 2.7775771617889404, | |
| "learning_rate": 4.990924701127776e-06, | |
| "loss": 1.2800921201705933, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3125763125763126, | |
| "grad_norm": 1.9031347036361694, | |
| "learning_rate": 4.990512529728448e-06, | |
| "loss": 1.2638157606124878, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.315018315018315, | |
| "grad_norm": 1.927398443222046, | |
| "learning_rate": 4.990091225027721e-06, | |
| "loss": 1.3112692832946777, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 2.3837084770202637, | |
| "learning_rate": 4.9896607887430185e-06, | |
| "loss": 1.2674881219863892, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3199023199023199, | |
| "grad_norm": 4.82175874710083, | |
| "learning_rate": 4.989221222628985e-06, | |
| "loss": 1.4771348237991333, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.32234432234432236, | |
| "grad_norm": 4.768642425537109, | |
| "learning_rate": 4.988772528477482e-06, | |
| "loss": 0.7117833495140076, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3247863247863248, | |
| "grad_norm": 3.3639814853668213, | |
| "learning_rate": 4.988314708117581e-06, | |
| "loss": 1.0419560670852661, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.32722832722832723, | |
| "grad_norm": 4.912712574005127, | |
| "learning_rate": 4.987847763415557e-06, | |
| "loss": 1.3187146186828613, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 2.553563117980957, | |
| "learning_rate": 4.9873716962748805e-06, | |
| "loss": 0.9921520352363586, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3321123321123321, | |
| "grad_norm": 2.590106964111328, | |
| "learning_rate": 4.986886508636206e-06, | |
| "loss": 1.2800440788269043, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.33455433455433453, | |
| "grad_norm": 5.722552299499512, | |
| "learning_rate": 4.986392202477369e-06, | |
| "loss": 0.9619787335395813, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.336996336996337, | |
| "grad_norm": 2.612945556640625, | |
| "learning_rate": 4.985888779813377e-06, | |
| "loss": 1.0021531581878662, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.33943833943833945, | |
| "grad_norm": 3.078714370727539, | |
| "learning_rate": 4.985376242696399e-06, | |
| "loss": 1.3929091691970825, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 1.928337812423706, | |
| "learning_rate": 4.984854593215759e-06, | |
| "loss": 1.2902088165283203, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3443223443223443, | |
| "grad_norm": 1.6284582614898682, | |
| "learning_rate": 4.984323833497925e-06, | |
| "loss": 1.2728163003921509, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.34676434676434675, | |
| "grad_norm": 2.321744680404663, | |
| "learning_rate": 4.983783965706507e-06, | |
| "loss": 1.311239242553711, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3492063492063492, | |
| "grad_norm": 1.7774631977081299, | |
| "learning_rate": 4.983234992042237e-06, | |
| "loss": 1.1027390956878662, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3516483516483517, | |
| "grad_norm": 3.514158010482788, | |
| "learning_rate": 4.982676914742971e-06, | |
| "loss": 1.6526391506195068, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3540903540903541, | |
| "grad_norm": 5.824040412902832, | |
| "learning_rate": 4.982109736083676e-06, | |
| "loss": 0.9344091415405273, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.35653235653235654, | |
| "grad_norm": 1.5543690919876099, | |
| "learning_rate": 4.981533458376416e-06, | |
| "loss": 1.292595386505127, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 2.081808090209961, | |
| "learning_rate": 4.980948083970351e-06, | |
| "loss": 1.0262247323989868, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3614163614163614, | |
| "grad_norm": 2.1623454093933105, | |
| "learning_rate": 4.980353615251719e-06, | |
| "loss": 1.280896782875061, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.36385836385836384, | |
| "grad_norm": 9.417366027832031, | |
| "learning_rate": 4.9797500546438344e-06, | |
| "loss": 1.4011857509613037, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 2.1483418941497803, | |
| "learning_rate": 4.979137404607072e-06, | |
| "loss": 1.243982195854187, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.36874236874236876, | |
| "grad_norm": 2.855179786682129, | |
| "learning_rate": 4.978515667638858e-06, | |
| "loss": 0.8995228409767151, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3711843711843712, | |
| "grad_norm": 1.9090166091918945, | |
| "learning_rate": 4.9778848462736625e-06, | |
| "loss": 1.1892352104187012, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.37362637362637363, | |
| "grad_norm": 1.4595932960510254, | |
| "learning_rate": 4.977244943082987e-06, | |
| "loss": 1.3153109550476074, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.37606837606837606, | |
| "grad_norm": 2.5620715618133545, | |
| "learning_rate": 4.976595960675356e-06, | |
| "loss": 1.3017933368682861, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3785103785103785, | |
| "grad_norm": 2.5225541591644287, | |
| "learning_rate": 4.975937901696302e-06, | |
| "loss": 1.3250616788864136, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 4.8774895668029785, | |
| "learning_rate": 4.975270768828359e-06, | |
| "loss": 0.984774649143219, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.3833943833943834, | |
| "grad_norm": 1.8592923879623413, | |
| "learning_rate": 4.974594564791051e-06, | |
| "loss": 1.3683158159255981, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.38583638583638585, | |
| "grad_norm": 4.383054733276367, | |
| "learning_rate": 4.9739092923408784e-06, | |
| "loss": 0.6529649496078491, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3882783882783883, | |
| "grad_norm": 3.972773790359497, | |
| "learning_rate": 4.97321495427131e-06, | |
| "loss": 0.9518109560012817, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3907203907203907, | |
| "grad_norm": 5.475085735321045, | |
| "learning_rate": 4.972511553412768e-06, | |
| "loss": 1.334009051322937, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.39316239316239315, | |
| "grad_norm": 2.0150842666625977, | |
| "learning_rate": 4.971799092632619e-06, | |
| "loss": 1.344587802886963, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.3956043956043956, | |
| "grad_norm": 1.9884312152862549, | |
| "learning_rate": 4.971077574835165e-06, | |
| "loss": 1.3174562454223633, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.398046398046398, | |
| "grad_norm": 2.862060308456421, | |
| "learning_rate": 4.970347002961623e-06, | |
| "loss": 1.244167447090149, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4004884004884005, | |
| "grad_norm": 1.4828734397888184, | |
| "learning_rate": 4.969607379990123e-06, | |
| "loss": 1.2446471452713013, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 5.2736592292785645, | |
| "learning_rate": 4.968858708935686e-06, | |
| "loss": 0.8940474987030029, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4053724053724054, | |
| "grad_norm": 3.302468776702881, | |
| "learning_rate": 4.968100992850223e-06, | |
| "loss": 0.6339259147644043, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4078144078144078, | |
| "grad_norm": 2.196411371231079, | |
| "learning_rate": 4.967334234822514e-06, | |
| "loss": 1.0478650331497192, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 1.72081458568573, | |
| "learning_rate": 4.966558437978196e-06, | |
| "loss": 1.349544882774353, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4126984126984127, | |
| "grad_norm": 1.245092511177063, | |
| "learning_rate": 4.965773605479754e-06, | |
| "loss": 0.9362432956695557, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.41514041514041516, | |
| "grad_norm": 10.09897518157959, | |
| "learning_rate": 4.964979740526505e-06, | |
| "loss": 1.0755311250686646, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4175824175824176, | |
| "grad_norm": 2.285883903503418, | |
| "learning_rate": 4.964176846354588e-06, | |
| "loss": 1.6347922086715698, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.42002442002442003, | |
| "grad_norm": 1.59197199344635, | |
| "learning_rate": 4.963364926236949e-06, | |
| "loss": 0.9156535863876343, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.42246642246642246, | |
| "grad_norm": 1.6245992183685303, | |
| "learning_rate": 4.962543983483325e-06, | |
| "loss": 1.11324143409729, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.4249084249084249, | |
| "grad_norm": 2.302391767501831, | |
| "learning_rate": 4.961714021440236e-06, | |
| "loss": 1.3008726835250854, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 2.053579092025757, | |
| "learning_rate": 4.960875043490967e-06, | |
| "loss": 0.8544071316719055, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4297924297924298, | |
| "grad_norm": 1.6109215021133423, | |
| "learning_rate": 4.960027053055557e-06, | |
| "loss": 1.0643997192382812, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.43223443223443225, | |
| "grad_norm": 1.7497365474700928, | |
| "learning_rate": 4.959170053590781e-06, | |
| "loss": 1.2529405355453491, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4346764346764347, | |
| "grad_norm": 1.5827484130859375, | |
| "learning_rate": 4.958304048590143e-06, | |
| "loss": 0.8821004033088684, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.4371184371184371, | |
| "grad_norm": 3.8849446773529053, | |
| "learning_rate": 4.957429041583855e-06, | |
| "loss": 1.127004623413086, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 1.9818916320800781, | |
| "learning_rate": 4.956545036138824e-06, | |
| "loss": 1.207819938659668, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.442002442002442, | |
| "grad_norm": 3.0806636810302734, | |
| "learning_rate": 4.9556520358586394e-06, | |
| "loss": 1.0458451509475708, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 6.247749328613281, | |
| "learning_rate": 4.95475004438356e-06, | |
| "loss": 1.1090641021728516, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4468864468864469, | |
| "grad_norm": 4.639119625091553, | |
| "learning_rate": 4.953839065390494e-06, | |
| "loss": 1.37210214138031, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.44932844932844934, | |
| "grad_norm": 3.0761399269104004, | |
| "learning_rate": 4.952919102592985e-06, | |
| "loss": 1.020755410194397, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4517704517704518, | |
| "grad_norm": 0.9568601846694946, | |
| "learning_rate": 4.9519901597412036e-06, | |
| "loss": 1.0233187675476074, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4542124542124542, | |
| "grad_norm": 1.6627336740493774, | |
| "learning_rate": 4.9510522406219215e-06, | |
| "loss": 1.2981936931610107, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.45665445665445664, | |
| "grad_norm": 1.6876623630523682, | |
| "learning_rate": 4.9501053490585055e-06, | |
| "loss": 0.8830539584159851, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4590964590964591, | |
| "grad_norm": 2.635246515274048, | |
| "learning_rate": 4.9491494889108956e-06, | |
| "loss": 1.219455599784851, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 2.5608506202697754, | |
| "learning_rate": 4.948184664075594e-06, | |
| "loss": 0.9302881956100464, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.463980463980464, | |
| "grad_norm": 2.0959465503692627, | |
| "learning_rate": 4.947210878485644e-06, | |
| "loss": 0.8517276048660278, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.46642246642246643, | |
| "grad_norm": 1.482036828994751, | |
| "learning_rate": 4.94622813611062e-06, | |
| "loss": 1.2661558389663696, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.46886446886446886, | |
| "grad_norm": 2.3324732780456543, | |
| "learning_rate": 4.945236440956604e-06, | |
| "loss": 1.0352469682693481, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4713064713064713, | |
| "grad_norm": 5.657218933105469, | |
| "learning_rate": 4.944235797066177e-06, | |
| "loss": 1.2012758255004883, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.47374847374847373, | |
| "grad_norm": 3.256732225418091, | |
| "learning_rate": 4.943226208518398e-06, | |
| "loss": 1.5897492170333862, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 1.6683677434921265, | |
| "learning_rate": 4.942207679428788e-06, | |
| "loss": 0.7892211079597473, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.47863247863247865, | |
| "grad_norm": 2.3011298179626465, | |
| "learning_rate": 4.941180213949314e-06, | |
| "loss": 0.8288873434066772, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4810744810744811, | |
| "grad_norm": 1.1541416645050049, | |
| "learning_rate": 4.94014381626837e-06, | |
| "loss": 0.9236152172088623, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.4835164835164835, | |
| "grad_norm": 2.699540853500366, | |
| "learning_rate": 4.939098490610763e-06, | |
| "loss": 1.2205630540847778, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.48595848595848595, | |
| "grad_norm": 3.6751928329467773, | |
| "learning_rate": 4.938044241237695e-06, | |
| "loss": 1.2720117568969727, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4884004884004884, | |
| "grad_norm": 2.8597030639648438, | |
| "learning_rate": 4.936981072446743e-06, | |
| "loss": 0.5283371210098267, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4908424908424908, | |
| "grad_norm": 2.1727144718170166, | |
| "learning_rate": 4.935908988571845e-06, | |
| "loss": 1.2206032276153564, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4932844932844933, | |
| "grad_norm": 1.26828932762146, | |
| "learning_rate": 4.934827993983279e-06, | |
| "loss": 1.3251525163650513, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.49572649572649574, | |
| "grad_norm": 2.455037832260132, | |
| "learning_rate": 4.933738093087651e-06, | |
| "loss": 0.6017684936523438, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4981684981684982, | |
| "grad_norm": 2.95031476020813, | |
| "learning_rate": 4.932639290327866e-06, | |
| "loss": 0.8958187103271484, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5006105006105006, | |
| "grad_norm": 1.4214322566986084, | |
| "learning_rate": 4.931531590183123e-06, | |
| "loss": 1.255342721939087, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.503052503052503, | |
| "grad_norm": 10.521769523620605, | |
| "learning_rate": 4.930414997168889e-06, | |
| "loss": 0.5480175614356995, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5054945054945055, | |
| "grad_norm": 1.6570682525634766, | |
| "learning_rate": 4.929289515836882e-06, | |
| "loss": 1.3151097297668457, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5079365079365079, | |
| "grad_norm": 2.6421968936920166, | |
| "learning_rate": 4.928155150775049e-06, | |
| "loss": 1.2698694467544556, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5103785103785103, | |
| "grad_norm": 2.2661855220794678, | |
| "learning_rate": 4.927011906607559e-06, | |
| "loss": 1.1845803260803223, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.9527170658111572, | |
| "learning_rate": 4.925859787994767e-06, | |
| "loss": 1.2397900819778442, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5152625152625152, | |
| "grad_norm": 1.2722523212432861, | |
| "learning_rate": 4.924698799633212e-06, | |
| "loss": 1.2302662134170532, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5177045177045178, | |
| "grad_norm": 1.8450767993927002, | |
| "learning_rate": 4.923528946255584e-06, | |
| "loss": 1.257878303527832, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5201465201465202, | |
| "grad_norm": 2.1251304149627686, | |
| "learning_rate": 4.922350232630715e-06, | |
| "loss": 1.0593935251235962, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5225885225885226, | |
| "grad_norm": 1.2513749599456787, | |
| "learning_rate": 4.9211626635635515e-06, | |
| "loss": 1.2507191896438599, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.525030525030525, | |
| "grad_norm": 8.465970039367676, | |
| "learning_rate": 4.919966243895142e-06, | |
| "loss": 0.8818293809890747, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5274725274725275, | |
| "grad_norm": 6.935226917266846, | |
| "learning_rate": 4.918760978502611e-06, | |
| "loss": 0.5760735273361206, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5299145299145299, | |
| "grad_norm": 11.597949028015137, | |
| "learning_rate": 4.917546872299143e-06, | |
| "loss": 1.2672209739685059, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5323565323565324, | |
| "grad_norm": 1.8173397779464722, | |
| "learning_rate": 4.916323930233962e-06, | |
| "loss": 1.2190382480621338, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5347985347985348, | |
| "grad_norm": 3.132521867752075, | |
| "learning_rate": 4.915092157292313e-06, | |
| "loss": 1.2443459033966064, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5372405372405372, | |
| "grad_norm": 1.43805730342865, | |
| "learning_rate": 4.913851558495433e-06, | |
| "loss": 1.2091344594955444, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5396825396825397, | |
| "grad_norm": 11.365583419799805, | |
| "learning_rate": 4.912602138900545e-06, | |
| "loss": 1.0195097923278809, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5421245421245421, | |
| "grad_norm": 2.1645138263702393, | |
| "learning_rate": 4.911343903600823e-06, | |
| "loss": 0.8177242279052734, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5445665445665445, | |
| "grad_norm": 1.9511176347732544, | |
| "learning_rate": 4.91007685772538e-06, | |
| "loss": 0.9824368357658386, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5470085470085471, | |
| "grad_norm": 1.3720399141311646, | |
| "learning_rate": 4.908801006439247e-06, | |
| "loss": 1.08683443069458, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 2.627140760421753, | |
| "learning_rate": 4.9075163549433455e-06, | |
| "loss": 0.979245126247406, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5518925518925519, | |
| "grad_norm": 2.8625056743621826, | |
| "learning_rate": 4.906222908474474e-06, | |
| "loss": 0.7317221760749817, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5543345543345544, | |
| "grad_norm": 3.2224857807159424, | |
| "learning_rate": 4.90492067230528e-06, | |
| "loss": 1.258061170578003, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5567765567765568, | |
| "grad_norm": 2.8512461185455322, | |
| "learning_rate": 4.903609651744244e-06, | |
| "loss": 1.2263869047164917, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5592185592185592, | |
| "grad_norm": 1.08021080493927, | |
| "learning_rate": 4.902289852135655e-06, | |
| "loss": 0.6804142594337463, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5616605616605617, | |
| "grad_norm": 46.32835006713867, | |
| "learning_rate": 4.90096127885959e-06, | |
| "loss": 1.1406168937683105, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 3.643751621246338, | |
| "learning_rate": 4.899623937331887e-06, | |
| "loss": 1.1659770011901855, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5665445665445665, | |
| "grad_norm": 6.275250434875488, | |
| "learning_rate": 4.898277833004135e-06, | |
| "loss": 0.5430421829223633, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.568986568986569, | |
| "grad_norm": 1.6964603662490845, | |
| "learning_rate": 4.896922971363635e-06, | |
| "loss": 1.487717628479004, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 1.5128223896026611, | |
| "learning_rate": 4.895559357933394e-06, | |
| "loss": 1.2990221977233887, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5738705738705738, | |
| "grad_norm": 2.9639620780944824, | |
| "learning_rate": 4.89418699827209e-06, | |
| "loss": 1.001917839050293, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5763125763125763, | |
| "grad_norm": 3.8676769733428955, | |
| "learning_rate": 4.892805897974059e-06, | |
| "loss": 1.2513344287872314, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5787545787545788, | |
| "grad_norm": 3.1442391872406006, | |
| "learning_rate": 4.891416062669262e-06, | |
| "loss": 0.8551501631736755, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5811965811965812, | |
| "grad_norm": 4.836908340454102, | |
| "learning_rate": 4.890017498023274e-06, | |
| "loss": 0.9901700615882874, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5836385836385837, | |
| "grad_norm": 3.409428358078003, | |
| "learning_rate": 4.888610209737249e-06, | |
| "loss": 1.1505521535873413, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 2.039818525314331, | |
| "learning_rate": 4.887194203547907e-06, | |
| "loss": 1.2868854999542236, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5885225885225885, | |
| "grad_norm": 3.3642868995666504, | |
| "learning_rate": 4.885769485227503e-06, | |
| "loss": 0.5171108245849609, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.590964590964591, | |
| "grad_norm": 1.0177332162857056, | |
| "learning_rate": 4.8843360605838055e-06, | |
| "loss": 0.9433972239494324, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5934065934065934, | |
| "grad_norm": 0.7848219275474548, | |
| "learning_rate": 4.882893935460078e-06, | |
| "loss": 1.0055443048477173, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5958485958485958, | |
| "grad_norm": 1.8216912746429443, | |
| "learning_rate": 4.881443115735045e-06, | |
| "loss": 0.9295751452445984, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5982905982905983, | |
| "grad_norm": 2.449176073074341, | |
| "learning_rate": 4.879983607322881e-06, | |
| "loss": 0.9871832132339478, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6007326007326007, | |
| "grad_norm": 2.1226084232330322, | |
| "learning_rate": 4.878515416173174e-06, | |
| "loss": 0.7565707564353943, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6031746031746031, | |
| "grad_norm": 1.5631353855133057, | |
| "learning_rate": 4.877038548270907e-06, | |
| "loss": 0.9493947625160217, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6056166056166056, | |
| "grad_norm": 2.126840829849243, | |
| "learning_rate": 4.875553009636437e-06, | |
| "loss": 1.216259479522705, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.608058608058608, | |
| "grad_norm": 6.31650447845459, | |
| "learning_rate": 4.874058806325463e-06, | |
| "loss": 0.5695387125015259, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.6105006105006106, | |
| "grad_norm": 1.5655598640441895, | |
| "learning_rate": 4.872555944429006e-06, | |
| "loss": 0.8497368097305298, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.612942612942613, | |
| "grad_norm": 2.7936275005340576, | |
| "learning_rate": 4.871044430073383e-06, | |
| "loss": 1.2087408304214478, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 5.054646015167236, | |
| "learning_rate": 4.869524269420183e-06, | |
| "loss": 1.2262006998062134, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6178266178266179, | |
| "grad_norm": 2.619194507598877, | |
| "learning_rate": 4.8679954686662404e-06, | |
| "loss": 1.2392239570617676, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6202686202686203, | |
| "grad_norm": 2.1149373054504395, | |
| "learning_rate": 4.866458034043611e-06, | |
| "loss": 1.2161999940872192, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 3.2427122592926025, | |
| "learning_rate": 4.864911971819545e-06, | |
| "loss": 1.2096397876739502, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6251526251526252, | |
| "grad_norm": 1.8958977460861206, | |
| "learning_rate": 4.863357288296463e-06, | |
| "loss": 1.0511081218719482, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.6275946275946276, | |
| "grad_norm": 2.3320584297180176, | |
| "learning_rate": 4.861793989811929e-06, | |
| "loss": 1.039177417755127, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.63003663003663, | |
| "grad_norm": 2.3830647468566895, | |
| "learning_rate": 4.860222082738628e-06, | |
| "loss": 0.9379343390464783, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.6324786324786325, | |
| "grad_norm": 1.4862414598464966, | |
| "learning_rate": 4.858641573484334e-06, | |
| "loss": 1.2305572032928467, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 1.5390454530715942, | |
| "learning_rate": 4.8570524684918885e-06, | |
| "loss": 0.7816034555435181, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6373626373626373, | |
| "grad_norm": 3.6071054935455322, | |
| "learning_rate": 4.855454774239174e-06, | |
| "loss": 0.9562470316886902, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6398046398046398, | |
| "grad_norm": 19.057411193847656, | |
| "learning_rate": 4.8538484972390844e-06, | |
| "loss": 0.9935526847839355, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6422466422466423, | |
| "grad_norm": 5.404201030731201, | |
| "learning_rate": 4.852233644039503e-06, | |
| "loss": 1.2573553323745728, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6446886446886447, | |
| "grad_norm": 5.5111002922058105, | |
| "learning_rate": 4.8506102212232714e-06, | |
| "loss": 1.309897780418396, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6471306471306472, | |
| "grad_norm": 12.94403076171875, | |
| "learning_rate": 4.848978235408165e-06, | |
| "loss": 1.0515775680541992, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6495726495726496, | |
| "grad_norm": 2.2977471351623535, | |
| "learning_rate": 4.847337693246869e-06, | |
| "loss": 1.0648335218429565, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.652014652014652, | |
| "grad_norm": 4.799200057983398, | |
| "learning_rate": 4.845688601426942e-06, | |
| "loss": 1.5188199281692505, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6544566544566545, | |
| "grad_norm": 1.9204214811325073, | |
| "learning_rate": 4.8440309666708006e-06, | |
| "loss": 0.8509761691093445, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6568986568986569, | |
| "grad_norm": 3.197026014328003, | |
| "learning_rate": 4.842364795735681e-06, | |
| "loss": 1.237154483795166, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 2.667442798614502, | |
| "learning_rate": 4.840690095413621e-06, | |
| "loss": 1.256026268005371, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6617826617826618, | |
| "grad_norm": 1.8451228141784668, | |
| "learning_rate": 4.8390068725314235e-06, | |
| "loss": 0.9330289959907532, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6642246642246642, | |
| "grad_norm": 2.3354480266571045, | |
| "learning_rate": 4.837315133950639e-06, | |
| "loss": 1.2343664169311523, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.377455472946167, | |
| "learning_rate": 4.835614886567523e-06, | |
| "loss": 1.1341302394866943, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6691086691086691, | |
| "grad_norm": 2.2880988121032715, | |
| "learning_rate": 4.833906137313027e-06, | |
| "loss": 1.2215226888656616, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6715506715506715, | |
| "grad_norm": 1.8074506521224976, | |
| "learning_rate": 4.8321888931527526e-06, | |
| "loss": 1.1459529399871826, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.673992673992674, | |
| "grad_norm": 2.449573516845703, | |
| "learning_rate": 4.83046316108693e-06, | |
| "loss": 0.9938110709190369, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6764346764346765, | |
| "grad_norm": 1.4324339628219604, | |
| "learning_rate": 4.828728948150395e-06, | |
| "loss": 1.018953800201416, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6788766788766789, | |
| "grad_norm": 1.9855033159255981, | |
| "learning_rate": 4.826986261412551e-06, | |
| "loss": 0.7265840768814087, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6813186813186813, | |
| "grad_norm": 5.827937126159668, | |
| "learning_rate": 4.825235107977347e-06, | |
| "loss": 1.159310221672058, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 1.3800535202026367, | |
| "learning_rate": 4.82347549498324e-06, | |
| "loss": 1.1552388668060303, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6862026862026862, | |
| "grad_norm": 6.40132999420166, | |
| "learning_rate": 4.821707429603181e-06, | |
| "loss": 0.9877975583076477, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6886446886446886, | |
| "grad_norm": 5.481659889221191, | |
| "learning_rate": 4.8199309190445694e-06, | |
| "loss": 1.294710636138916, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6910866910866911, | |
| "grad_norm": 1.7129062414169312, | |
| "learning_rate": 4.818145970549233e-06, | |
| "loss": 1.0880193710327148, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6935286935286935, | |
| "grad_norm": 2.8719332218170166, | |
| "learning_rate": 4.816352591393398e-06, | |
| "loss": 1.286997675895691, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 3.2201790809631348, | |
| "learning_rate": 4.814550788887655e-06, | |
| "loss": 0.9447314143180847, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6984126984126984, | |
| "grad_norm": 9.533697128295898, | |
| "learning_rate": 4.812740570376933e-06, | |
| "loss": 0.9670330286026001, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7008547008547008, | |
| "grad_norm": 4.622603416442871, | |
| "learning_rate": 4.810921943240469e-06, | |
| "loss": 1.4118473529815674, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.7032967032967034, | |
| "grad_norm": 1.7296289205551147, | |
| "learning_rate": 4.809094914891775e-06, | |
| "loss": 1.2039122581481934, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7057387057387058, | |
| "grad_norm": 1.5504480600357056, | |
| "learning_rate": 4.807259492778613e-06, | |
| "loss": 1.1822270154953003, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.7081807081807082, | |
| "grad_norm": 1.588944435119629, | |
| "learning_rate": 4.805415684382959e-06, | |
| "loss": 1.142565131187439, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7106227106227107, | |
| "grad_norm": 1.771332859992981, | |
| "learning_rate": 4.803563497220976e-06, | |
| "loss": 1.1912704706192017, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.7130647130647131, | |
| "grad_norm": 5.767767429351807, | |
| "learning_rate": 4.8017029388429845e-06, | |
| "loss": 1.0446151494979858, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7155067155067155, | |
| "grad_norm": 1.6248974800109863, | |
| "learning_rate": 4.799834016833425e-06, | |
| "loss": 1.287752628326416, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 2.37131404876709, | |
| "learning_rate": 4.7979567388108376e-06, | |
| "loss": 1.061058759689331, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.7203907203907204, | |
| "grad_norm": 1.6095200777053833, | |
| "learning_rate": 4.796071112427821e-06, | |
| "loss": 0.9337313771247864, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7228327228327228, | |
| "grad_norm": 1.546781063079834, | |
| "learning_rate": 4.794177145371006e-06, | |
| "loss": 0.8499547243118286, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7252747252747253, | |
| "grad_norm": 1.973464846611023, | |
| "learning_rate": 4.792274845361025e-06, | |
| "loss": 1.199100375175476, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7277167277167277, | |
| "grad_norm": 2.8694047927856445, | |
| "learning_rate": 4.790364220152477e-06, | |
| "loss": 0.9500537514686584, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7301587301587301, | |
| "grad_norm": 3.2333526611328125, | |
| "learning_rate": 4.788445277533902e-06, | |
| "loss": 0.9067592024803162, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 1.7088539600372314, | |
| "learning_rate": 4.786518025327742e-06, | |
| "loss": 1.1730542182922363, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7350427350427351, | |
| "grad_norm": 3.451296091079712, | |
| "learning_rate": 4.7845824713903115e-06, | |
| "loss": 1.319393515586853, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.7374847374847375, | |
| "grad_norm": 2.3576159477233887, | |
| "learning_rate": 4.782638623611771e-06, | |
| "loss": 1.1339298486709595, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.73992673992674, | |
| "grad_norm": 4.726226329803467, | |
| "learning_rate": 4.780686489916086e-06, | |
| "loss": 1.4150636196136475, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7423687423687424, | |
| "grad_norm": 1.4049737453460693, | |
| "learning_rate": 4.778726078261001e-06, | |
| "loss": 1.265529751777649, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7448107448107448, | |
| "grad_norm": 2.383326768875122, | |
| "learning_rate": 4.776757396638005e-06, | |
| "loss": 0.8798419237136841, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7472527472527473, | |
| "grad_norm": 1.5629072189331055, | |
| "learning_rate": 4.774780453072298e-06, | |
| "loss": 1.2364379167556763, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.7496947496947497, | |
| "grad_norm": 1.7620823383331299, | |
| "learning_rate": 4.772795255622761e-06, | |
| "loss": 1.2224982976913452, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7521367521367521, | |
| "grad_norm": 1.2436916828155518, | |
| "learning_rate": 4.770801812381919e-06, | |
| "loss": 0.8438993096351624, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7545787545787546, | |
| "grad_norm": 1.9763593673706055, | |
| "learning_rate": 4.768800131475913e-06, | |
| "loss": 1.5313855409622192, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.757020757020757, | |
| "grad_norm": 3.1992554664611816, | |
| "learning_rate": 4.7667902210644616e-06, | |
| "loss": 1.008560061454773, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7594627594627594, | |
| "grad_norm": 7.259356498718262, | |
| "learning_rate": 4.764772089340833e-06, | |
| "loss": 0.9306063652038574, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 1.7845251560211182, | |
| "learning_rate": 4.762745744531808e-06, | |
| "loss": 1.2115577459335327, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7643467643467643, | |
| "grad_norm": 1.6257309913635254, | |
| "learning_rate": 4.760711194897646e-06, | |
| "loss": 1.2677242755889893, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7667887667887668, | |
| "grad_norm": 3.1209113597869873, | |
| "learning_rate": 4.758668448732057e-06, | |
| "loss": 1.0252844095230103, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 1.4179201126098633, | |
| "learning_rate": 4.7566175143621575e-06, | |
| "loss": 1.2540860176086426, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7716727716727717, | |
| "grad_norm": 1.1921404600143433, | |
| "learning_rate": 4.754558400148449e-06, | |
| "loss": 0.8723937273025513, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7741147741147741, | |
| "grad_norm": 2.1802115440368652, | |
| "learning_rate": 4.752491114484773e-06, | |
| "loss": 0.8961063623428345, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7765567765567766, | |
| "grad_norm": 1.9183303117752075, | |
| "learning_rate": 4.7504156657982835e-06, | |
| "loss": 1.226144790649414, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.778998778998779, | |
| "grad_norm": 1.9927711486816406, | |
| "learning_rate": 4.74833206254941e-06, | |
| "loss": 1.2360224723815918, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7814407814407814, | |
| "grad_norm": 4.131889343261719, | |
| "learning_rate": 4.746240313231823e-06, | |
| "loss": 0.9043057560920715, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7838827838827839, | |
| "grad_norm": 3.734161138534546, | |
| "learning_rate": 4.744140426372401e-06, | |
| "loss": 1.0058786869049072, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7863247863247863, | |
| "grad_norm": 1.5284754037857056, | |
| "learning_rate": 4.742032410531195e-06, | |
| "loss": 1.124707818031311, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7887667887667887, | |
| "grad_norm": 3.012943744659424, | |
| "learning_rate": 4.73991627430139e-06, | |
| "loss": 0.9144378304481506, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7912087912087912, | |
| "grad_norm": 2.2246816158294678, | |
| "learning_rate": 4.737792026309278e-06, | |
| "loss": 1.19635009765625, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 1.9574096202850342, | |
| "learning_rate": 4.735659675214215e-06, | |
| "loss": 0.5257167220115662, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.796092796092796, | |
| "grad_norm": 2.1935298442840576, | |
| "learning_rate": 4.7335192297085895e-06, | |
| "loss": 0.7748251557350159, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7985347985347986, | |
| "grad_norm": 2.698744535446167, | |
| "learning_rate": 4.731370698517786e-06, | |
| "loss": 1.1536623239517212, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.800976800976801, | |
| "grad_norm": 1.8622996807098389, | |
| "learning_rate": 4.729214090400149e-06, | |
| "loss": 1.200728178024292, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8034188034188035, | |
| "grad_norm": 9.128825187683105, | |
| "learning_rate": 4.727049414146952e-06, | |
| "loss": 0.4623393714427948, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 2.4344100952148438, | |
| "learning_rate": 4.724876678582352e-06, | |
| "loss": 1.0503042936325073, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8083028083028083, | |
| "grad_norm": 1.744651198387146, | |
| "learning_rate": 4.722695892563363e-06, | |
| "loss": 1.1860074996948242, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.8107448107448108, | |
| "grad_norm": 1.6168715953826904, | |
| "learning_rate": 4.720507064979816e-06, | |
| "loss": 1.2846834659576416, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8131868131868132, | |
| "grad_norm": 2.017427444458008, | |
| "learning_rate": 4.7183102047543205e-06, | |
| "loss": 0.8671167492866516, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.8156288156288156, | |
| "grad_norm": 2.0237629413604736, | |
| "learning_rate": 4.716105320842234e-06, | |
| "loss": 1.0235426425933838, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.818070818070818, | |
| "grad_norm": 2.6489999294281006, | |
| "learning_rate": 4.713892422231619e-06, | |
| "loss": 1.3756883144378662, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 1.48908269405365, | |
| "learning_rate": 4.71167151794321e-06, | |
| "loss": 1.2407268285751343, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8229548229548229, | |
| "grad_norm": 1.3047491312026978, | |
| "learning_rate": 4.709442617030379e-06, | |
| "loss": 0.9855388402938843, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.8253968253968254, | |
| "grad_norm": 1.8140162229537964, | |
| "learning_rate": 4.707205728579091e-06, | |
| "loss": 0.882321298122406, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8278388278388278, | |
| "grad_norm": 13.20935344696045, | |
| "learning_rate": 4.704960861707875e-06, | |
| "loss": 1.2027504444122314, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.8302808302808303, | |
| "grad_norm": 1.8309438228607178, | |
| "learning_rate": 4.702708025567784e-06, | |
| "loss": 1.2264920473098755, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8327228327228328, | |
| "grad_norm": 5.225240707397461, | |
| "learning_rate": 4.700447229342353e-06, | |
| "loss": 1.1251945495605469, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.8351648351648352, | |
| "grad_norm": 12.371641159057617, | |
| "learning_rate": 4.698178482247571e-06, | |
| "loss": 0.6810005307197571, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8376068376068376, | |
| "grad_norm": 1.8889416456222534, | |
| "learning_rate": 4.695901793531834e-06, | |
| "loss": 1.325577974319458, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.8400488400488401, | |
| "grad_norm": 2.4011776447296143, | |
| "learning_rate": 4.693617172475914e-06, | |
| "loss": 1.2832276821136475, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 6.96901273727417, | |
| "learning_rate": 4.691324628392918e-06, | |
| "loss": 0.9534074664115906, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8449328449328449, | |
| "grad_norm": 3.2731733322143555, | |
| "learning_rate": 4.68902417062825e-06, | |
| "loss": 1.2452714443206787, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.8473748473748474, | |
| "grad_norm": 1.0289312601089478, | |
| "learning_rate": 4.686715808559575e-06, | |
| "loss": 0.9713150858879089, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8498168498168498, | |
| "grad_norm": 4.545189380645752, | |
| "learning_rate": 4.684399551596778e-06, | |
| "loss": 1.130218744277954, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.8522588522588522, | |
| "grad_norm": 87.15223693847656, | |
| "learning_rate": 4.682075409181928e-06, | |
| "loss": 0.9914512634277344, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "grad_norm": 1.9779729843139648, | |
| "learning_rate": 4.6797433907892385e-06, | |
| "loss": 1.0588513612747192, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 1.3967251777648926, | |
| "learning_rate": 4.677403505925027e-06, | |
| "loss": 1.0360347032546997, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.8595848595848596, | |
| "grad_norm": 2.3286092281341553, | |
| "learning_rate": 4.6750557641276805e-06, | |
| "loss": 0.9465680122375488, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8620268620268621, | |
| "grad_norm": 1.6770824193954468, | |
| "learning_rate": 4.672700174967613e-06, | |
| "loss": 0.4001966118812561, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8644688644688645, | |
| "grad_norm": 1.5843091011047363, | |
| "learning_rate": 4.6703367480472304e-06, | |
| "loss": 1.1794531345367432, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.8669108669108669, | |
| "grad_norm": 7.7685017585754395, | |
| "learning_rate": 4.667965493000883e-06, | |
| "loss": 0.8692483901977539, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8693528693528694, | |
| "grad_norm": 7.2869977951049805, | |
| "learning_rate": 4.665586419494837e-06, | |
| "loss": 1.3455827236175537, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 2.4024009704589844, | |
| "learning_rate": 4.66319953722723e-06, | |
| "loss": 1.53640615940094, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.8742368742368742, | |
| "grad_norm": 1.8035765886306763, | |
| "learning_rate": 4.660804855928029e-06, | |
| "loss": 1.400252103805542, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8766788766788767, | |
| "grad_norm": 2.3839004039764404, | |
| "learning_rate": 4.658402385358992e-06, | |
| "loss": 0.88499915599823, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 2.779142141342163, | |
| "learning_rate": 4.655992135313634e-06, | |
| "loss": 1.1850217580795288, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8815628815628815, | |
| "grad_norm": 5.73760986328125, | |
| "learning_rate": 4.6535741156171796e-06, | |
| "loss": 0.8437918424606323, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.884004884004884, | |
| "grad_norm": 2.219005584716797, | |
| "learning_rate": 4.651148336126527e-06, | |
| "loss": 1.2156010866165161, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8864468864468864, | |
| "grad_norm": 2.8996713161468506, | |
| "learning_rate": 4.6487148067302065e-06, | |
| "loss": 1.1615610122680664, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 1.5215322971343994, | |
| "learning_rate": 4.646273537348337e-06, | |
| "loss": 1.15150785446167, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8913308913308914, | |
| "grad_norm": 1.62069833278656, | |
| "learning_rate": 4.643824537932595e-06, | |
| "loss": 1.3772497177124023, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8937728937728938, | |
| "grad_norm": 5.082972526550293, | |
| "learning_rate": 4.641367818466164e-06, | |
| "loss": 1.1609463691711426, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8962148962148963, | |
| "grad_norm": 2.289997100830078, | |
| "learning_rate": 4.6389033889637e-06, | |
| "loss": 0.9794567227363586, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8986568986568987, | |
| "grad_norm": 1.610153317451477, | |
| "learning_rate": 4.636431259471284e-06, | |
| "loss": 0.7476667165756226, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9010989010989011, | |
| "grad_norm": 3.150763511657715, | |
| "learning_rate": 4.633951440066391e-06, | |
| "loss": 0.8690844774246216, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.9035409035409036, | |
| "grad_norm": 4.614825248718262, | |
| "learning_rate": 4.631463940857841e-06, | |
| "loss": 1.0103671550750732, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.905982905982906, | |
| "grad_norm": 1.2555079460144043, | |
| "learning_rate": 4.6289687719857595e-06, | |
| "loss": 0.873469352722168, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.9084249084249084, | |
| "grad_norm": 1.3857243061065674, | |
| "learning_rate": 4.626465943621538e-06, | |
| "loss": 0.8869038224220276, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9108669108669109, | |
| "grad_norm": 8.226287841796875, | |
| "learning_rate": 4.623955465967791e-06, | |
| "loss": 1.1199119091033936, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.9133089133089133, | |
| "grad_norm": 2.548466920852661, | |
| "learning_rate": 4.621437349258316e-06, | |
| "loss": 0.8345762491226196, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 1.7079286575317383, | |
| "learning_rate": 4.618911603758047e-06, | |
| "loss": 1.3368088006973267, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9181929181929182, | |
| "grad_norm": 1.5761820077896118, | |
| "learning_rate": 4.616378239763021e-06, | |
| "loss": 1.2190864086151123, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9206349206349206, | |
| "grad_norm": 1.6565887928009033, | |
| "learning_rate": 4.613837267600328e-06, | |
| "loss": 1.2295691967010498, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 1.4082704782485962, | |
| "learning_rate": 4.611288697628074e-06, | |
| "loss": 1.2072789669036865, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9255189255189256, | |
| "grad_norm": 2.3025150299072266, | |
| "learning_rate": 4.608732540235336e-06, | |
| "loss": 1.2459933757781982, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.927960927960928, | |
| "grad_norm": 2.0221993923187256, | |
| "learning_rate": 4.60616880584212e-06, | |
| "loss": 0.7390088438987732, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9304029304029304, | |
| "grad_norm": 3.3428783416748047, | |
| "learning_rate": 4.603597504899322e-06, | |
| "loss": 1.0999096632003784, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.9328449328449329, | |
| "grad_norm": 1.6523158550262451, | |
| "learning_rate": 4.601018647888677e-06, | |
| "loss": 0.8729748129844666, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9352869352869353, | |
| "grad_norm": 62.29137420654297, | |
| "learning_rate": 4.598432245322729e-06, | |
| "loss": 0.5466877818107605, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.9377289377289377, | |
| "grad_norm": 1.815721035003662, | |
| "learning_rate": 4.595838307744775e-06, | |
| "loss": 0.8582451939582825, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.9401709401709402, | |
| "grad_norm": 1.5688364505767822, | |
| "learning_rate": 4.593236845728832e-06, | |
| "loss": 0.7072654962539673, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.9426129426129426, | |
| "grad_norm": 1.4338949918746948, | |
| "learning_rate": 4.590627869879586e-06, | |
| "loss": 1.053293228149414, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.945054945054945, | |
| "grad_norm": 2.2348110675811768, | |
| "learning_rate": 4.588011390832357e-06, | |
| "loss": 1.2199137210845947, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.9474969474969475, | |
| "grad_norm": 2.0182247161865234, | |
| "learning_rate": 4.585387419253048e-06, | |
| "loss": 1.1575353145599365, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.9499389499389499, | |
| "grad_norm": 2.0367462635040283, | |
| "learning_rate": 4.582755965838105e-06, | |
| "loss": 0.842775821685791, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 1.4695345163345337, | |
| "learning_rate": 4.580117041314476e-06, | |
| "loss": 0.9546113610267639, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9548229548229549, | |
| "grad_norm": 4.101283550262451, | |
| "learning_rate": 4.577470656439562e-06, | |
| "loss": 1.0789297819137573, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.9572649572649573, | |
| "grad_norm": 1.6646188497543335, | |
| "learning_rate": 4.574816822001175e-06, | |
| "loss": 1.2004691362380981, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.9597069597069597, | |
| "grad_norm": 1.4552080631256104, | |
| "learning_rate": 4.572155548817498e-06, | |
| "loss": 1.2365154027938843, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.9621489621489622, | |
| "grad_norm": 2.4475789070129395, | |
| "learning_rate": 4.5694868477370325e-06, | |
| "loss": 1.330816388130188, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.9645909645909646, | |
| "grad_norm": 2.0710349082946777, | |
| "learning_rate": 4.566810729638565e-06, | |
| "loss": 1.2555092573165894, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.967032967032967, | |
| "grad_norm": 1.5041587352752686, | |
| "learning_rate": 4.564127205431112e-06, | |
| "loss": 1.0104345083236694, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.9694749694749695, | |
| "grad_norm": 1.5308382511138916, | |
| "learning_rate": 4.5614362860538855e-06, | |
| "loss": 1.2593212127685547, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9719169719169719, | |
| "grad_norm": 1.3870609998703003, | |
| "learning_rate": 4.558737982476238e-06, | |
| "loss": 1.1537375450134277, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 1.61140775680542, | |
| "learning_rate": 4.556032305697628e-06, | |
| "loss": 1.154402732849121, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9768009768009768, | |
| "grad_norm": 2.04144024848938, | |
| "learning_rate": 4.553319266747566e-06, | |
| "loss": 1.2140703201293945, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9792429792429792, | |
| "grad_norm": 2.3697726726531982, | |
| "learning_rate": 4.550598876685578e-06, | |
| "loss": 0.895045280456543, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.9816849816849816, | |
| "grad_norm": 1.659780502319336, | |
| "learning_rate": 4.547871146601154e-06, | |
| "loss": 1.1046396493911743, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9841269841269841, | |
| "grad_norm": 2.151529312133789, | |
| "learning_rate": 4.545136087613705e-06, | |
| "loss": 1.5022149085998535, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9865689865689866, | |
| "grad_norm": 2.602121353149414, | |
| "learning_rate": 4.5423937108725195e-06, | |
| "loss": 0.8793852925300598, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 3.102078914642334, | |
| "learning_rate": 4.5396440275567135e-06, | |
| "loss": 1.256363034248352, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9914529914529915, | |
| "grad_norm": 2.6925265789031982, | |
| "learning_rate": 4.536887048875191e-06, | |
| "loss": 1.2054369449615479, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9938949938949939, | |
| "grad_norm": 2.5501744747161865, | |
| "learning_rate": 4.5341227860665935e-06, | |
| "loss": 1.278929591178894, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9963369963369964, | |
| "grad_norm": 1.176234483718872, | |
| "learning_rate": 4.531351250399254e-06, | |
| "loss": 1.1633700132369995, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9987789987789988, | |
| "grad_norm": 1.4937130212783813, | |
| "learning_rate": 4.5285724531711575e-06, | |
| "loss": 1.0776214599609375, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.0012210012210012, | |
| "grad_norm": 3.934837818145752, | |
| "learning_rate": 4.525786405709885e-06, | |
| "loss": 0.9735159873962402, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.0036630036630036, | |
| "grad_norm": 1.3186302185058594, | |
| "learning_rate": 4.5229931193725775e-06, | |
| "loss": 1.1400266885757446, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.006105006105006, | |
| "grad_norm": 3.5487184524536133, | |
| "learning_rate": 4.520192605545879e-06, | |
| "loss": 0.522385835647583, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.0085470085470085, | |
| "grad_norm": 1.5596842765808105, | |
| "learning_rate": 4.517384875645903e-06, | |
| "loss": 1.0808534622192383, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.010989010989011, | |
| "grad_norm": 12.584797859191895, | |
| "learning_rate": 4.514569941118172e-06, | |
| "loss": 0.8573816418647766, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.0134310134310134, | |
| "grad_norm": 2.6060709953308105, | |
| "learning_rate": 4.511747813437582e-06, | |
| "loss": 0.8253161907196045, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.0158730158730158, | |
| "grad_norm": 6.2511420249938965, | |
| "learning_rate": 4.50891850410835e-06, | |
| "loss": 0.8468748331069946, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.0183150183150182, | |
| "grad_norm": 1.8716773986816406, | |
| "learning_rate": 4.506082024663969e-06, | |
| "loss": 0.833984375, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.0207570207570207, | |
| "grad_norm": 2.3107144832611084, | |
| "learning_rate": 4.503238386667159e-06, | |
| "loss": 1.1121944189071655, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.0231990231990231, | |
| "grad_norm": 1.7429516315460205, | |
| "learning_rate": 4.500387601709824e-06, | |
| "loss": 1.1830196380615234, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 2.6718502044677734, | |
| "learning_rate": 4.497529681413001e-06, | |
| "loss": 0.8847705125808716, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.028083028083028, | |
| "grad_norm": 3.5513010025024414, | |
| "learning_rate": 4.4946646374268105e-06, | |
| "loss": 0.9079216122627258, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.0305250305250304, | |
| "grad_norm": 1.6211357116699219, | |
| "learning_rate": 4.491792481430419e-06, | |
| "loss": 1.1324057579040527, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.032967032967033, | |
| "grad_norm": 4.688745975494385, | |
| "learning_rate": 4.488913225131977e-06, | |
| "loss": 0.8587746620178223, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.0354090354090355, | |
| "grad_norm": 1.1522576808929443, | |
| "learning_rate": 4.4860268802685865e-06, | |
| "loss": 1.1451654434204102, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.037851037851038, | |
| "grad_norm": 8.89565658569336, | |
| "learning_rate": 4.483133458606239e-06, | |
| "loss": 0.93172687292099, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0402930402930404, | |
| "grad_norm": 2.5761866569519043, | |
| "learning_rate": 4.480232971939777e-06, | |
| "loss": 0.6277377605438232, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.0427350427350428, | |
| "grad_norm": 1.6280884742736816, | |
| "learning_rate": 4.477325432092845e-06, | |
| "loss": 1.103888750076294, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.0451770451770452, | |
| "grad_norm": 6.281750202178955, | |
| "learning_rate": 4.474410850917835e-06, | |
| "loss": 0.8592535257339478, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.0476190476190477, | |
| "grad_norm": 2.389967203140259, | |
| "learning_rate": 4.471489240295845e-06, | |
| "loss": 1.1699893474578857, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.05006105006105, | |
| "grad_norm": 1.8374966382980347, | |
| "learning_rate": 4.4685606121366295e-06, | |
| "loss": 0.7583469748497009, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.0525030525030525, | |
| "grad_norm": 1.7573800086975098, | |
| "learning_rate": 4.4656249783785465e-06, | |
| "loss": 1.051936149597168, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.054945054945055, | |
| "grad_norm": 3.5590884685516357, | |
| "learning_rate": 4.462682350988513e-06, | |
| "loss": 1.1263583898544312, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.0573870573870574, | |
| "grad_norm": 2.979887008666992, | |
| "learning_rate": 4.459732741961957e-06, | |
| "loss": 1.4335747957229614, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.0598290598290598, | |
| "grad_norm": 2.712446689605713, | |
| "learning_rate": 4.456776163322761e-06, | |
| "loss": 0.39710327982902527, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.0622710622710623, | |
| "grad_norm": 1.8534517288208008, | |
| "learning_rate": 4.453812627123227e-06, | |
| "loss": 0.9377206563949585, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.0647130647130647, | |
| "grad_norm": 1.9186785221099854, | |
| "learning_rate": 4.450842145444012e-06, | |
| "loss": 1.0142245292663574, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.0671550671550671, | |
| "grad_norm": 1.7083848714828491, | |
| "learning_rate": 4.4478647303940905e-06, | |
| "loss": 0.7915772199630737, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.0695970695970696, | |
| "grad_norm": 2.7380499839782715, | |
| "learning_rate": 4.4448803941106964e-06, | |
| "loss": 1.10654878616333, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.072039072039072, | |
| "grad_norm": 2.324699878692627, | |
| "learning_rate": 4.44188914875928e-06, | |
| "loss": 1.0548545122146606, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.0744810744810744, | |
| "grad_norm": 1.84871506690979, | |
| "learning_rate": 4.438891006533456e-06, | |
| "loss": 0.747241735458374, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 1.8665426969528198, | |
| "learning_rate": 4.435885979654953e-06, | |
| "loss": 1.0984582901000977, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.0793650793650793, | |
| "grad_norm": 3.5970113277435303, | |
| "learning_rate": 4.432874080373565e-06, | |
| "loss": 0.7559424638748169, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.0818070818070817, | |
| "grad_norm": 8.55659294128418, | |
| "learning_rate": 4.4298553209671e-06, | |
| "loss": 0.6807610988616943, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.0842490842490842, | |
| "grad_norm": 2.020759344100952, | |
| "learning_rate": 4.426829713741332e-06, | |
| "loss": 1.144335389137268, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.0866910866910866, | |
| "grad_norm": 1.3028897047042847, | |
| "learning_rate": 4.4237972710299475e-06, | |
| "loss": 0.8821287751197815, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.089133089133089, | |
| "grad_norm": 4.502945899963379, | |
| "learning_rate": 4.420758005194502e-06, | |
| "loss": 1.0961552858352661, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.0915750915750915, | |
| "grad_norm": 2.57550048828125, | |
| "learning_rate": 4.417711928624358e-06, | |
| "loss": 1.079803705215454, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.0940170940170941, | |
| "grad_norm": 1.6765140295028687, | |
| "learning_rate": 4.41465905373665e-06, | |
| "loss": 1.0386958122253418, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.0964590964590966, | |
| "grad_norm": 1.710028052330017, | |
| "learning_rate": 4.411599392976217e-06, | |
| "loss": 0.8400865793228149, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.098901098901099, | |
| "grad_norm": 1.6493473052978516, | |
| "learning_rate": 4.408532958815566e-06, | |
| "loss": 0.7645131945610046, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1013431013431014, | |
| "grad_norm": 5.944808006286621, | |
| "learning_rate": 4.405459763754814e-06, | |
| "loss": 0.5732899904251099, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.1037851037851039, | |
| "grad_norm": 2.6218488216400146, | |
| "learning_rate": 4.402379820321636e-06, | |
| "loss": 0.6524146199226379, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.1062271062271063, | |
| "grad_norm": 6.440639972686768, | |
| "learning_rate": 4.399293141071219e-06, | |
| "loss": 1.0054997205734253, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.1086691086691087, | |
| "grad_norm": 2.3345463275909424, | |
| "learning_rate": 4.396199738586208e-06, | |
| "loss": 1.0879311561584473, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 1.7419912815093994, | |
| "learning_rate": 4.393099625476652e-06, | |
| "loss": 0.7915565371513367, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.1135531135531136, | |
| "grad_norm": 7.168088912963867, | |
| "learning_rate": 4.389992814379959e-06, | |
| "loss": 1.140365719795227, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.115995115995116, | |
| "grad_norm": 4.943615913391113, | |
| "learning_rate": 4.386879317960839e-06, | |
| "loss": 0.7865337133407593, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.1184371184371185, | |
| "grad_norm": 2.3716390132904053, | |
| "learning_rate": 4.383759148911254e-06, | |
| "loss": 0.8161624670028687, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.120879120879121, | |
| "grad_norm": 1.9857978820800781, | |
| "learning_rate": 4.380632319950368e-06, | |
| "loss": 1.118779182434082, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.1233211233211233, | |
| "grad_norm": 1.886316180229187, | |
| "learning_rate": 4.377498843824491e-06, | |
| "loss": 1.064637541770935, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.1257631257631258, | |
| "grad_norm": 1.9790380001068115, | |
| "learning_rate": 4.374358733307035e-06, | |
| "loss": 0.8831157684326172, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.1282051282051282, | |
| "grad_norm": 2.9242825508117676, | |
| "learning_rate": 4.37121200119845e-06, | |
| "loss": 0.7275701761245728, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.1306471306471306, | |
| "grad_norm": 3.6830453872680664, | |
| "learning_rate": 4.368058660326182e-06, | |
| "loss": 0.6424413323402405, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.133089133089133, | |
| "grad_norm": 23.97179412841797, | |
| "learning_rate": 4.364898723544618e-06, | |
| "loss": 0.5658762454986572, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.1355311355311355, | |
| "grad_norm": 1.438925862312317, | |
| "learning_rate": 4.361732203735032e-06, | |
| "loss": 1.0492331981658936, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.137973137973138, | |
| "grad_norm": 2.813020706176758, | |
| "learning_rate": 4.358559113805531e-06, | |
| "loss": 1.0911149978637695, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.1404151404151404, | |
| "grad_norm": 3.2957863807678223, | |
| "learning_rate": 4.355379466691008e-06, | |
| "loss": 0.9345967769622803, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 1.7585479021072388, | |
| "learning_rate": 4.3521932753530856e-06, | |
| "loss": 0.9201436042785645, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.1452991452991452, | |
| "grad_norm": 3.857144594192505, | |
| "learning_rate": 4.34900055278006e-06, | |
| "loss": 1.0130703449249268, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.1477411477411477, | |
| "grad_norm": 1.5104448795318604, | |
| "learning_rate": 4.345801311986855e-06, | |
| "loss": 1.0904299020767212, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.15018315018315, | |
| "grad_norm": 1.7477518320083618, | |
| "learning_rate": 4.342595566014965e-06, | |
| "loss": 1.0437507629394531, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.1526251526251525, | |
| "grad_norm": 2.9423069953918457, | |
| "learning_rate": 4.339383327932402e-06, | |
| "loss": 0.820652425289154, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.155067155067155, | |
| "grad_norm": 1.767242670059204, | |
| "learning_rate": 4.33616461083364e-06, | |
| "loss": 0.788296103477478, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.1575091575091574, | |
| "grad_norm": 10.629752159118652, | |
| "learning_rate": 4.33293942783957e-06, | |
| "loss": 0.6116584539413452, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.1599511599511598, | |
| "grad_norm": 3.033034086227417, | |
| "learning_rate": 4.329707792097436e-06, | |
| "loss": 1.0888707637786865, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1623931623931625, | |
| "grad_norm": 1.710669755935669, | |
| "learning_rate": 4.326469716780787e-06, | |
| "loss": 1.091694712638855, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.164835164835165, | |
| "grad_norm": 3.053687572479248, | |
| "learning_rate": 4.323225215089425e-06, | |
| "loss": 0.9434468746185303, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.1672771672771673, | |
| "grad_norm": 1.6665173768997192, | |
| "learning_rate": 4.319974300249346e-06, | |
| "loss": 0.8895185589790344, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.1697191697191698, | |
| "grad_norm": 5.074321269989014, | |
| "learning_rate": 4.3167169855126885e-06, | |
| "loss": 0.7732067108154297, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.1721611721611722, | |
| "grad_norm": 3.9608309268951416, | |
| "learning_rate": 4.313453284157683e-06, | |
| "loss": 1.162553071975708, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.1746031746031746, | |
| "grad_norm": 2.7939271926879883, | |
| "learning_rate": 4.310183209488592e-06, | |
| "loss": 0.9017969369888306, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.177045177045177, | |
| "grad_norm": 2.613326072692871, | |
| "learning_rate": 4.306906774835658e-06, | |
| "loss": 0.7510517835617065, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.1794871794871795, | |
| "grad_norm": 0.82303386926651, | |
| "learning_rate": 4.303623993555051e-06, | |
| "loss": 1.073706030845642, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.181929181929182, | |
| "grad_norm": 4.074831962585449, | |
| "learning_rate": 4.300334879028813e-06, | |
| "loss": 1.135977029800415, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.1843711843711844, | |
| "grad_norm": 3.945430040359497, | |
| "learning_rate": 4.2970394446648015e-06, | |
| "loss": 0.7781526446342468, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.1868131868131868, | |
| "grad_norm": 1.8157840967178345, | |
| "learning_rate": 4.293737703896636e-06, | |
| "loss": 1.197265625, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.1892551892551892, | |
| "grad_norm": 2.2199547290802, | |
| "learning_rate": 4.290429670183648e-06, | |
| "loss": 0.8672367334365845, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.1916971916971917, | |
| "grad_norm": 11.338154792785645, | |
| "learning_rate": 4.287115357010816e-06, | |
| "loss": 0.7362724542617798, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.1941391941391941, | |
| "grad_norm": 1.7665647268295288, | |
| "learning_rate": 4.283794777888718e-06, | |
| "loss": 0.8837488293647766, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.1965811965811965, | |
| "grad_norm": 1.333461880683899, | |
| "learning_rate": 4.280467946353478e-06, | |
| "loss": 1.094375491142273, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.199023199023199, | |
| "grad_norm": 1.3403749465942383, | |
| "learning_rate": 4.277134875966703e-06, | |
| "loss": 1.0798702239990234, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.2014652014652014, | |
| "grad_norm": 1.700862169265747, | |
| "learning_rate": 4.273795580315437e-06, | |
| "loss": 1.195528507232666, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.2039072039072038, | |
| "grad_norm": 8.78385066986084, | |
| "learning_rate": 4.270450073012095e-06, | |
| "loss": 0.7649343013763428, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.2063492063492063, | |
| "grad_norm": 67.73321533203125, | |
| "learning_rate": 4.267098367694419e-06, | |
| "loss": 0.7331146001815796, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.2087912087912087, | |
| "grad_norm": 3.189149856567383, | |
| "learning_rate": 4.263740478025412e-06, | |
| "loss": 0.8756888508796692, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.2112332112332111, | |
| "grad_norm": 4.3046770095825195, | |
| "learning_rate": 4.2603764176932925e-06, | |
| "loss": 1.1108595132827759, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.2136752136752136, | |
| "grad_norm": 2.0171821117401123, | |
| "learning_rate": 4.257006200411429e-06, | |
| "loss": 1.0103721618652344, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.2161172161172162, | |
| "grad_norm": 2.713459014892578, | |
| "learning_rate": 4.25362983991829e-06, | |
| "loss": 0.9784596562385559, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.2185592185592187, | |
| "grad_norm": 1.9199753999710083, | |
| "learning_rate": 4.250247349977385e-06, | |
| "loss": 1.062201738357544, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.221001221001221, | |
| "grad_norm": 11.171542167663574, | |
| "learning_rate": 4.246858744377212e-06, | |
| "loss": 0.744211733341217, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2234432234432235, | |
| "grad_norm": 2.0410537719726562, | |
| "learning_rate": 4.243464036931198e-06, | |
| "loss": 1.0498521327972412, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.225885225885226, | |
| "grad_norm": 1.5847947597503662, | |
| "learning_rate": 4.240063241477643e-06, | |
| "loss": 1.1089041233062744, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.2283272283272284, | |
| "grad_norm": 3.658682346343994, | |
| "learning_rate": 4.2366563718796664e-06, | |
| "loss": 0.8046331405639648, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 1.940625548362732, | |
| "learning_rate": 4.233243442025145e-06, | |
| "loss": 0.7440409064292908, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.2332112332112333, | |
| "grad_norm": 1.9868489503860474, | |
| "learning_rate": 4.229824465826665e-06, | |
| "loss": 1.144100308418274, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.2356532356532357, | |
| "grad_norm": 1.2745945453643799, | |
| "learning_rate": 4.226399457221454e-06, | |
| "loss": 0.6603936553001404, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.2380952380952381, | |
| "grad_norm": 1.5920745134353638, | |
| "learning_rate": 4.222968430171336e-06, | |
| "loss": 1.1303434371948242, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.2405372405372406, | |
| "grad_norm": 4.021664619445801, | |
| "learning_rate": 4.219531398662665e-06, | |
| "loss": 1.0450407266616821, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.242979242979243, | |
| "grad_norm": 1.6237807273864746, | |
| "learning_rate": 4.216088376706274e-06, | |
| "loss": 1.0899841785430908, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.2454212454212454, | |
| "grad_norm": 2.2023823261260986, | |
| "learning_rate": 4.212639378337413e-06, | |
| "loss": 0.7024634480476379, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.2478632478632479, | |
| "grad_norm": 8.069097518920898, | |
| "learning_rate": 4.209184417615697e-06, | |
| "loss": 0.9512033462524414, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.2503052503052503, | |
| "grad_norm": 1.6683331727981567, | |
| "learning_rate": 4.2057235086250455e-06, | |
| "loss": 1.052414059638977, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.2527472527472527, | |
| "grad_norm": 3.130899667739868, | |
| "learning_rate": 4.2022566654736255e-06, | |
| "loss": 1.0695925951004028, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.2551892551892552, | |
| "grad_norm": 1.8631014823913574, | |
| "learning_rate": 4.198783902293794e-06, | |
| "loss": 0.9780709147453308, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.2576312576312576, | |
| "grad_norm": 2.728553295135498, | |
| "learning_rate": 4.1953052332420415e-06, | |
| "loss": 0.9186390042304993, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.26007326007326, | |
| "grad_norm": 1.7069987058639526, | |
| "learning_rate": 4.191820672498931e-06, | |
| "loss": 1.138177514076233, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.2625152625152625, | |
| "grad_norm": 3.96309494972229, | |
| "learning_rate": 4.188330234269046e-06, | |
| "loss": 1.230303406715393, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.264957264957265, | |
| "grad_norm": 0.6388441920280457, | |
| "learning_rate": 4.184833932780927e-06, | |
| "loss": 0.7601897716522217, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.2673992673992673, | |
| "grad_norm": 2.074471950531006, | |
| "learning_rate": 4.181331782287015e-06, | |
| "loss": 0.6320565938949585, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.2698412698412698, | |
| "grad_norm": 1.3992935419082642, | |
| "learning_rate": 4.177823797063597e-06, | |
| "loss": 0.7402109503746033, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.2722832722832722, | |
| "grad_norm": 1.8529661893844604, | |
| "learning_rate": 4.174309991410742e-06, | |
| "loss": 1.1013227701187134, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.2747252747252746, | |
| "grad_norm": 1.740545392036438, | |
| "learning_rate": 4.1707903796522474e-06, | |
| "loss": 0.9940573573112488, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.277167277167277, | |
| "grad_norm": 3.5190329551696777, | |
| "learning_rate": 4.1672649761355785e-06, | |
| "loss": 1.0399502515792847, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.2796092796092795, | |
| "grad_norm": 3.40808367729187, | |
| "learning_rate": 4.163733795231808e-06, | |
| "loss": 0.8423551321029663, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 8.643896102905273, | |
| "learning_rate": 4.160196851335564e-06, | |
| "loss": 0.3857470452785492, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2844932844932844, | |
| "grad_norm": 2.840670347213745, | |
| "learning_rate": 4.156654158864964e-06, | |
| "loss": 1.0681036710739136, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.2869352869352868, | |
| "grad_norm": 3.3994009494781494, | |
| "learning_rate": 4.15310573226156e-06, | |
| "loss": 0.86181640625, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.2893772893772895, | |
| "grad_norm": 5.254836559295654, | |
| "learning_rate": 4.149551585990277e-06, | |
| "loss": 0.7644107937812805, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.291819291819292, | |
| "grad_norm": 2.2039105892181396, | |
| "learning_rate": 4.1459917345393614e-06, | |
| "loss": 1.2520135641098022, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.2942612942612943, | |
| "grad_norm": 1.7039287090301514, | |
| "learning_rate": 4.142426192420308e-06, | |
| "loss": 1.0944513082504272, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.2967032967032968, | |
| "grad_norm": 4.587660789489746, | |
| "learning_rate": 4.138854974167818e-06, | |
| "loss": 0.6725199222564697, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.2991452991452992, | |
| "grad_norm": 1.5989353656768799, | |
| "learning_rate": 4.135278094339725e-06, | |
| "loss": 1.1340867280960083, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.3015873015873016, | |
| "grad_norm": 2.533905029296875, | |
| "learning_rate": 4.131695567516943e-06, | |
| "loss": 0.7726882100105286, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.304029304029304, | |
| "grad_norm": 1.1167593002319336, | |
| "learning_rate": 4.1281074083034065e-06, | |
| "loss": 1.0258402824401855, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.3064713064713065, | |
| "grad_norm": 8.628117561340332, | |
| "learning_rate": 4.12451363132601e-06, | |
| "loss": 0.8221207857131958, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.308913308913309, | |
| "grad_norm": 3.239126443862915, | |
| "learning_rate": 4.120914251234548e-06, | |
| "loss": 1.0316239595413208, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.3113553113553114, | |
| "grad_norm": 3.97194504737854, | |
| "learning_rate": 4.117309282701655e-06, | |
| "loss": 0.7956058382987976, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.3137973137973138, | |
| "grad_norm": 2.8797948360443115, | |
| "learning_rate": 4.1136987404227476e-06, | |
| "loss": 0.7710628509521484, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.3162393162393162, | |
| "grad_norm": 6.195582389831543, | |
| "learning_rate": 4.110082639115963e-06, | |
| "loss": 1.073829174041748, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.3186813186813187, | |
| "grad_norm": 1.3067351579666138, | |
| "learning_rate": 4.106460993522101e-06, | |
| "loss": 0.9566723108291626, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.321123321123321, | |
| "grad_norm": 2.2232918739318848, | |
| "learning_rate": 4.102833818404557e-06, | |
| "loss": 0.9678391218185425, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.3235653235653235, | |
| "grad_norm": 2.109621047973633, | |
| "learning_rate": 4.099201128549275e-06, | |
| "loss": 1.1640703678131104, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.326007326007326, | |
| "grad_norm": 4.480690956115723, | |
| "learning_rate": 4.095562938764672e-06, | |
| "loss": 1.0956099033355713, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.3284493284493284, | |
| "grad_norm": 2.0747313499450684, | |
| "learning_rate": 4.091919263881592e-06, | |
| "loss": 1.097609281539917, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.3308913308913308, | |
| "grad_norm": 2.344632387161255, | |
| "learning_rate": 4.088270118753232e-06, | |
| "loss": 0.7443391680717468, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 32.47975540161133, | |
| "learning_rate": 4.084615518255092e-06, | |
| "loss": 1.0534281730651855, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.3357753357753357, | |
| "grad_norm": 1.4418542385101318, | |
| "learning_rate": 4.08095547728491e-06, | |
| "loss": 1.1028659343719482, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.3382173382173383, | |
| "grad_norm": 6.136029243469238, | |
| "learning_rate": 4.077290010762602e-06, | |
| "loss": 0.47979384660720825, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.3406593406593408, | |
| "grad_norm": 2.139401435852051, | |
| "learning_rate": 4.0736191336301986e-06, | |
| "loss": 1.1901733875274658, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.3431013431013432, | |
| "grad_norm": 1.571408987045288, | |
| "learning_rate": 4.06994286085179e-06, | |
| "loss": 1.075485348701477, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3455433455433456, | |
| "grad_norm": 1.0710482597351074, | |
| "learning_rate": 4.066261207413458e-06, | |
| "loss": 1.0476422309875488, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.347985347985348, | |
| "grad_norm": 2.6131324768066406, | |
| "learning_rate": 4.06257418832322e-06, | |
| "loss": 0.8847273588180542, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.3504273504273505, | |
| "grad_norm": 1.8128620386123657, | |
| "learning_rate": 4.058881818610966e-06, | |
| "loss": 1.1783521175384521, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.352869352869353, | |
| "grad_norm": 34.26594924926758, | |
| "learning_rate": 4.055184113328397e-06, | |
| "loss": 0.9166494011878967, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.3553113553113554, | |
| "grad_norm": 1.9319859743118286, | |
| "learning_rate": 4.051481087548966e-06, | |
| "loss": 1.1042914390563965, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.3577533577533578, | |
| "grad_norm": 2.550018072128296, | |
| "learning_rate": 4.047772756367811e-06, | |
| "loss": 1.0983607769012451, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.3601953601953602, | |
| "grad_norm": 3.659637212753296, | |
| "learning_rate": 4.044059134901701e-06, | |
| "loss": 1.0594271421432495, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.3626373626373627, | |
| "grad_norm": 4.164947986602783, | |
| "learning_rate": 4.0403402382889676e-06, | |
| "loss": 0.4707038700580597, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.3650793650793651, | |
| "grad_norm": 1.7244220972061157, | |
| "learning_rate": 4.036616081689447e-06, | |
| "loss": 1.137607216835022, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.3675213675213675, | |
| "grad_norm": 1.9371610879898071, | |
| "learning_rate": 4.032886680284419e-06, | |
| "loss": 1.1212375164031982, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.36996336996337, | |
| "grad_norm": 2.010833263397217, | |
| "learning_rate": 4.029152049276541e-06, | |
| "loss": 1.0424951314926147, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.3724053724053724, | |
| "grad_norm": 1.6150962114334106, | |
| "learning_rate": 4.025412203889791e-06, | |
| "loss": 0.9809345602989197, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.3748473748473748, | |
| "grad_norm": 2.5580382347106934, | |
| "learning_rate": 4.0216671593694e-06, | |
| "loss": 1.2934308052062988, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.3772893772893773, | |
| "grad_norm": 2.092132806777954, | |
| "learning_rate": 4.017916930981797e-06, | |
| "loss": 1.0607208013534546, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.3797313797313797, | |
| "grad_norm": 2.038407802581787, | |
| "learning_rate": 4.014161534014538e-06, | |
| "loss": 0.8067485094070435, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.3821733821733821, | |
| "grad_norm": 1.479718804359436, | |
| "learning_rate": 4.010400983776253e-06, | |
| "loss": 0.7700361609458923, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 3.232928514480591, | |
| "learning_rate": 4.006635295596575e-06, | |
| "loss": 0.4854944348335266, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.387057387057387, | |
| "grad_norm": 2.037388563156128, | |
| "learning_rate": 4.002864484826083e-06, | |
| "loss": 0.9804095029830933, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.3894993894993894, | |
| "grad_norm": 1.7072653770446777, | |
| "learning_rate": 3.99908856683624e-06, | |
| "loss": 1.1063387393951416, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.3919413919413919, | |
| "grad_norm": 4.661365509033203, | |
| "learning_rate": 3.995307557019326e-06, | |
| "loss": 0.8346843719482422, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.3943833943833943, | |
| "grad_norm": 2.608985662460327, | |
| "learning_rate": 3.991521470788377e-06, | |
| "loss": 0.9450017213821411, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.3968253968253967, | |
| "grad_norm": 2.2186226844787598, | |
| "learning_rate": 3.987730323577123e-06, | |
| "loss": 0.6135491728782654, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.3992673992673992, | |
| "grad_norm": 1.9363148212432861, | |
| "learning_rate": 3.983934130839927e-06, | |
| "loss": 1.068377137184143, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.4017094017094016, | |
| "grad_norm": 6.124155521392822, | |
| "learning_rate": 3.980132908051717e-06, | |
| "loss": 0.8843311667442322, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.404151404151404, | |
| "grad_norm": 1.894343376159668, | |
| "learning_rate": 3.976326670707927e-06, | |
| "loss": 0.7890317440032959, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4065934065934065, | |
| "grad_norm": 1.4660074710845947, | |
| "learning_rate": 3.972515434324432e-06, | |
| "loss": 0.8038425445556641, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.409035409035409, | |
| "grad_norm": 1.7170904874801636, | |
| "learning_rate": 3.9686992144374854e-06, | |
| "loss": 0.9780741930007935, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.4114774114774113, | |
| "grad_norm": 6.812156677246094, | |
| "learning_rate": 3.964878026603656e-06, | |
| "loss": 0.7489140629768372, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.4139194139194138, | |
| "grad_norm": 3.0899953842163086, | |
| "learning_rate": 3.961051886399763e-06, | |
| "loss": 1.009106159210205, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.4163614163614164, | |
| "grad_norm": 1.569420576095581, | |
| "learning_rate": 3.9572208094228155e-06, | |
| "loss": 1.0201953649520874, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.4188034188034189, | |
| "grad_norm": 2.1486785411834717, | |
| "learning_rate": 3.9533848112899455e-06, | |
| "loss": 0.7411532402038574, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.4212454212454213, | |
| "grad_norm": 13.017099380493164, | |
| "learning_rate": 3.949543907638345e-06, | |
| "loss": 0.7296299934387207, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.4236874236874237, | |
| "grad_norm": 1.9764689207077026, | |
| "learning_rate": 3.945698114125207e-06, | |
| "loss": 1.1636407375335693, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.4261294261294262, | |
| "grad_norm": 0.6818609833717346, | |
| "learning_rate": 3.941847446427651e-06, | |
| "loss": 0.9746972322463989, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 2.446106433868408, | |
| "learning_rate": 3.937991920242671e-06, | |
| "loss": 0.8085231184959412, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.431013431013431, | |
| "grad_norm": 2.190028190612793, | |
| "learning_rate": 3.934131551287067e-06, | |
| "loss": 1.1608608961105347, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.4334554334554335, | |
| "grad_norm": 1.8594470024108887, | |
| "learning_rate": 3.930266355297375e-06, | |
| "loss": 1.1073782444000244, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "grad_norm": 3.316195487976074, | |
| "learning_rate": 3.926396348029814e-06, | |
| "loss": 1.1658706665039062, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.4383394383394383, | |
| "grad_norm": 2.6010489463806152, | |
| "learning_rate": 3.922521545260211e-06, | |
| "loss": 0.9183681011199951, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.4407814407814408, | |
| "grad_norm": 5.369879245758057, | |
| "learning_rate": 3.918641962783945e-06, | |
| "loss": 1.037269949913025, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.4432234432234432, | |
| "grad_norm": 3.0808987617492676, | |
| "learning_rate": 3.914757616415877e-06, | |
| "loss": 0.8047484755516052, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.4456654456654456, | |
| "grad_norm": 1.6899147033691406, | |
| "learning_rate": 3.910868521990289e-06, | |
| "loss": 1.117107629776001, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.448107448107448, | |
| "grad_norm": 1.6038181781768799, | |
| "learning_rate": 3.906974695360818e-06, | |
| "loss": 1.0371313095092773, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.4505494505494505, | |
| "grad_norm": 2.300448179244995, | |
| "learning_rate": 3.90307615240039e-06, | |
| "loss": 0.8785613179206848, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.452991452991453, | |
| "grad_norm": 1.9171602725982666, | |
| "learning_rate": 3.8991729090011585e-06, | |
| "loss": 1.0834622383117676, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.4554334554334554, | |
| "grad_norm": 2.6901988983154297, | |
| "learning_rate": 3.895264981074438e-06, | |
| "loss": 0.8501840829849243, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.4578754578754578, | |
| "grad_norm": 1.8914860486984253, | |
| "learning_rate": 3.891352384550639e-06, | |
| "loss": 0.8218003511428833, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.4603174603174602, | |
| "grad_norm": 2.6401541233062744, | |
| "learning_rate": 3.887435135379202e-06, | |
| "loss": 0.7749768495559692, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.462759462759463, | |
| "grad_norm": 3.5819826126098633, | |
| "learning_rate": 3.8835132495285344e-06, | |
| "loss": 0.9986313581466675, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.4652014652014653, | |
| "grad_norm": 2.515784502029419, | |
| "learning_rate": 3.879586742985945e-06, | |
| "loss": 1.154970645904541, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4676434676434678, | |
| "grad_norm": 2.7575578689575195, | |
| "learning_rate": 3.875655631757579e-06, | |
| "loss": 1.0889326333999634, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.4700854700854702, | |
| "grad_norm": 1.673169493675232, | |
| "learning_rate": 3.871719931868352e-06, | |
| "loss": 1.109386920928955, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.4725274725274726, | |
| "grad_norm": 3.21140193939209, | |
| "learning_rate": 3.867779659361885e-06, | |
| "loss": 0.9718731641769409, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.474969474969475, | |
| "grad_norm": 2.298818588256836, | |
| "learning_rate": 3.863834830300437e-06, | |
| "loss": 0.8030334115028381, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.4774114774114775, | |
| "grad_norm": 3.9100306034088135, | |
| "learning_rate": 3.859885460764845e-06, | |
| "loss": 0.9156997203826904, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.47985347985348, | |
| "grad_norm": 1.3137868642807007, | |
| "learning_rate": 3.855931566854451e-06, | |
| "loss": 1.0059466361999512, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.4822954822954824, | |
| "grad_norm": 1.9000264406204224, | |
| "learning_rate": 3.851973164687046e-06, | |
| "loss": 1.1118829250335693, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.4847374847374848, | |
| "grad_norm": 1.584736943244934, | |
| "learning_rate": 3.848010270398792e-06, | |
| "loss": 1.0681581497192383, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.4871794871794872, | |
| "grad_norm": 1.8261507749557495, | |
| "learning_rate": 3.844042900144167e-06, | |
| "loss": 0.2508808970451355, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.4896214896214897, | |
| "grad_norm": 1.896042823791504, | |
| "learning_rate": 3.8400710700958945e-06, | |
| "loss": 0.6199178695678711, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.492063492063492, | |
| "grad_norm": 2.0678446292877197, | |
| "learning_rate": 3.836094796444875e-06, | |
| "loss": 1.0399789810180664, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.4945054945054945, | |
| "grad_norm": 6.400730133056641, | |
| "learning_rate": 3.832114095400129e-06, | |
| "loss": 0.8569754362106323, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.496947496947497, | |
| "grad_norm": 2.1547770500183105, | |
| "learning_rate": 3.8281289831887185e-06, | |
| "loss": 1.1074395179748535, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.4993894993894994, | |
| "grad_norm": 1.7979967594146729, | |
| "learning_rate": 3.824139476055692e-06, | |
| "loss": 0.36593061685562134, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.5018315018315018, | |
| "grad_norm": 35.071903228759766, | |
| "learning_rate": 3.820145590264012e-06, | |
| "loss": 0.8221673965454102, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.5042735042735043, | |
| "grad_norm": 0.9250247478485107, | |
| "learning_rate": 3.81614734209449e-06, | |
| "loss": 0.6617715954780579, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.5067155067155067, | |
| "grad_norm": 4.005329132080078, | |
| "learning_rate": 3.812144747845719e-06, | |
| "loss": 1.1474699974060059, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.5091575091575091, | |
| "grad_norm": 2.440639019012451, | |
| "learning_rate": 3.808137823834012e-06, | |
| "loss": 0.8988032937049866, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.5115995115995116, | |
| "grad_norm": 1.8108290433883667, | |
| "learning_rate": 3.80412658639333e-06, | |
| "loss": 0.8774833679199219, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.514041514041514, | |
| "grad_norm": 1.4303427934646606, | |
| "learning_rate": 3.800111051875217e-06, | |
| "loss": 1.0372514724731445, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.5164835164835164, | |
| "grad_norm": 1.5728963613510132, | |
| "learning_rate": 3.7960912366487353e-06, | |
| "loss": 1.0711747407913208, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.5189255189255189, | |
| "grad_norm": 9.220934867858887, | |
| "learning_rate": 3.7920671571003953e-06, | |
| "loss": 0.686614453792572, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.5213675213675213, | |
| "grad_norm": 1.5577303171157837, | |
| "learning_rate": 3.7880388296340924e-06, | |
| "loss": 0.7836710810661316, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.5238095238095237, | |
| "grad_norm": 1.9703376293182373, | |
| "learning_rate": 3.7840062706710362e-06, | |
| "loss": 0.8961681127548218, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.5262515262515262, | |
| "grad_norm": 2.641063690185547, | |
| "learning_rate": 3.7799694966496888e-06, | |
| "loss": 1.1727888584136963, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.5286935286935286, | |
| "grad_norm": 5.275555610656738, | |
| "learning_rate": 3.775928524025691e-06, | |
| "loss": 0.875237226486206, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.531135531135531, | |
| "grad_norm": 1.5248931646347046, | |
| "learning_rate": 3.771883369271803e-06, | |
| "loss": 1.040828824043274, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.5335775335775335, | |
| "grad_norm": 2.20690655708313, | |
| "learning_rate": 3.7678340488778302e-06, | |
| "loss": 1.1615933179855347, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.536019536019536, | |
| "grad_norm": 1.435325026512146, | |
| "learning_rate": 3.763780579350559e-06, | |
| "loss": 0.40704652667045593, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 14.3430814743042, | |
| "learning_rate": 3.759722977213691e-06, | |
| "loss": 0.8075951337814331, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.5409035409035408, | |
| "grad_norm": 16.239559173583984, | |
| "learning_rate": 3.755661259007774e-06, | |
| "loss": 0.6135749816894531, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.5433455433455432, | |
| "grad_norm": 2.538618803024292, | |
| "learning_rate": 3.751595441290133e-06, | |
| "loss": 0.8490422964096069, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.5457875457875456, | |
| "grad_norm": 2.3163981437683105, | |
| "learning_rate": 3.7475255406348067e-06, | |
| "loss": 0.8143582940101624, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.5482295482295483, | |
| "grad_norm": 1.8422861099243164, | |
| "learning_rate": 3.7434515736324746e-06, | |
| "loss": 1.0519959926605225, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.5506715506715507, | |
| "grad_norm": 9.199726104736328, | |
| "learning_rate": 3.7393735568903955e-06, | |
| "loss": 0.4911290109157562, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.5531135531135531, | |
| "grad_norm": 2.1301679611206055, | |
| "learning_rate": 3.7352915070323366e-06, | |
| "loss": 1.189732313156128, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 1.937249779701233, | |
| "learning_rate": 3.731205440698501e-06, | |
| "loss": 0.9045177102088928, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.557997557997558, | |
| "grad_norm": 2.8137459754943848, | |
| "learning_rate": 3.7271153745454726e-06, | |
| "loss": 1.390211582183838, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.5604395604395604, | |
| "grad_norm": 2.1598775386810303, | |
| "learning_rate": 3.723021325246132e-06, | |
| "loss": 0.737874448299408, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.5628815628815629, | |
| "grad_norm": 2.4186580181121826, | |
| "learning_rate": 3.7189233094896044e-06, | |
| "loss": 1.0836533308029175, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.5653235653235653, | |
| "grad_norm": 2.439676284790039, | |
| "learning_rate": 3.714821343981179e-06, | |
| "loss": 0.7069857120513916, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.5677655677655677, | |
| "grad_norm": 1.5403668880462646, | |
| "learning_rate": 3.7107154454422456e-06, | |
| "loss": 1.0703009366989136, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.5702075702075702, | |
| "grad_norm": 3.893155097961426, | |
| "learning_rate": 3.706605630610231e-06, | |
| "loss": 1.1834505796432495, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.5726495726495726, | |
| "grad_norm": 5.153315544128418, | |
| "learning_rate": 3.7024919162385232e-06, | |
| "loss": 0.5492372512817383, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.575091575091575, | |
| "grad_norm": 1.3920317888259888, | |
| "learning_rate": 3.6983743190964077e-06, | |
| "loss": 0.8411808013916016, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.5775335775335775, | |
| "grad_norm": 9.354891777038574, | |
| "learning_rate": 3.6942528559689965e-06, | |
| "loss": 0.36394214630126953, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.5799755799755801, | |
| "grad_norm": 2.3740155696868896, | |
| "learning_rate": 3.690127543657162e-06, | |
| "loss": 0.7142713069915771, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.5824175824175826, | |
| "grad_norm": 37.80674362182617, | |
| "learning_rate": 3.685998398977468e-06, | |
| "loss": 1.0909113883972168, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.584859584859585, | |
| "grad_norm": 1.855957269668579, | |
| "learning_rate": 3.6818654387620993e-06, | |
| "loss": 1.1598751544952393, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "grad_norm": 2.314946174621582, | |
| "learning_rate": 3.677728679858797e-06, | |
| "loss": 0.9421340823173523, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5897435897435899, | |
| "grad_norm": 5.468100070953369, | |
| "learning_rate": 3.673588139130784e-06, | |
| "loss": 1.2048614025115967, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.5921855921855923, | |
| "grad_norm": 3.331906795501709, | |
| "learning_rate": 3.6694438334567024e-06, | |
| "loss": 1.1039568185806274, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.5946275946275947, | |
| "grad_norm": 1.5079933404922485, | |
| "learning_rate": 3.6652957797305387e-06, | |
| "loss": 0.6897386908531189, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.5970695970695972, | |
| "grad_norm": 2.3638577461242676, | |
| "learning_rate": 3.661143994861563e-06, | |
| "loss": 1.1327297687530518, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.5995115995115996, | |
| "grad_norm": 2.4536283016204834, | |
| "learning_rate": 3.6569884957742497e-06, | |
| "loss": 1.0871834754943848, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.601953601953602, | |
| "grad_norm": 1.548901915550232, | |
| "learning_rate": 3.652829299408217e-06, | |
| "loss": 1.0074199438095093, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.6043956043956045, | |
| "grad_norm": 1.3679847717285156, | |
| "learning_rate": 3.648666422718155e-06, | |
| "loss": 1.1029393672943115, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.606837606837607, | |
| "grad_norm": 2.071131706237793, | |
| "learning_rate": 3.644499882673756e-06, | |
| "loss": 1.1430408954620361, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.6092796092796093, | |
| "grad_norm": 2.4289538860321045, | |
| "learning_rate": 3.6403296962596442e-06, | |
| "loss": 1.0161014795303345, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.6117216117216118, | |
| "grad_norm": 1.8402098417282104, | |
| "learning_rate": 3.6361558804753088e-06, | |
| "loss": 1.2254347801208496, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.6141636141636142, | |
| "grad_norm": 1.484537124633789, | |
| "learning_rate": 3.631978452335036e-06, | |
| "loss": 1.116368293762207, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.6166056166056166, | |
| "grad_norm": 1.7078075408935547, | |
| "learning_rate": 3.6277974288678354e-06, | |
| "loss": 1.0890535116195679, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.619047619047619, | |
| "grad_norm": 4.279214382171631, | |
| "learning_rate": 3.6236128271173716e-06, | |
| "loss": 0.8000863790512085, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.6214896214896215, | |
| "grad_norm": 1.6943376064300537, | |
| "learning_rate": 3.6194246641418993e-06, | |
| "loss": 1.1035950183868408, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.623931623931624, | |
| "grad_norm": 3.024909257888794, | |
| "learning_rate": 3.6152329570141863e-06, | |
| "loss": 1.078392744064331, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.6263736263736264, | |
| "grad_norm": 4.725790977478027, | |
| "learning_rate": 3.611037722821452e-06, | |
| "loss": 0.8447167277336121, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.6288156288156288, | |
| "grad_norm": 1.9349464178085327, | |
| "learning_rate": 3.6068389786652915e-06, | |
| "loss": 1.1011463403701782, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.6312576312576312, | |
| "grad_norm": 1.9638590812683105, | |
| "learning_rate": 3.6026367416616054e-06, | |
| "loss": 0.7226951718330383, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.6336996336996337, | |
| "grad_norm": 3.807051420211792, | |
| "learning_rate": 3.598431028940539e-06, | |
| "loss": 1.0683143138885498, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.636141636141636, | |
| "grad_norm": 2.799273729324341, | |
| "learning_rate": 3.594221857646399e-06, | |
| "loss": 0.5557500720024109, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.6385836385836385, | |
| "grad_norm": 1.5128666162490845, | |
| "learning_rate": 3.5900092449375977e-06, | |
| "loss": 0.391013503074646, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "grad_norm": 2.4419357776641846, | |
| "learning_rate": 3.5857932079865703e-06, | |
| "loss": 1.2627594470977783, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.6434676434676434, | |
| "grad_norm": 1.5012274980545044, | |
| "learning_rate": 3.5815737639797143e-06, | |
| "loss": 1.1198487281799316, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.6459096459096458, | |
| "grad_norm": 1.7359366416931152, | |
| "learning_rate": 3.5773509301173136e-06, | |
| "loss": 0.7089607119560242, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.6483516483516483, | |
| "grad_norm": 1.7854307889938354, | |
| "learning_rate": 3.573124723613473e-06, | |
| "loss": 0.7905706763267517, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6507936507936507, | |
| "grad_norm": 2.4434316158294678, | |
| "learning_rate": 3.568895161696042e-06, | |
| "loss": 1.0632576942443848, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.6532356532356531, | |
| "grad_norm": 1.7432414293289185, | |
| "learning_rate": 3.5646622616065537e-06, | |
| "loss": 1.170975685119629, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.6556776556776556, | |
| "grad_norm": 1.8956907987594604, | |
| "learning_rate": 3.560426040600143e-06, | |
| "loss": 1.0797570943832397, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.658119658119658, | |
| "grad_norm": 1.6335842609405518, | |
| "learning_rate": 3.556186515945486e-06, | |
| "loss": 0.5945901870727539, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.6605616605616604, | |
| "grad_norm": 2.311692714691162, | |
| "learning_rate": 3.5519437049247257e-06, | |
| "loss": 0.8245255947113037, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.6630036630036629, | |
| "grad_norm": 2.2353930473327637, | |
| "learning_rate": 3.547697624833401e-06, | |
| "loss": 1.1110084056854248, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.6654456654456653, | |
| "grad_norm": 1.7413452863693237, | |
| "learning_rate": 3.543448292980376e-06, | |
| "loss": 1.1027268171310425, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.6678876678876677, | |
| "grad_norm": 1.9247740507125854, | |
| "learning_rate": 3.5391957266877724e-06, | |
| "loss": 1.0763671398162842, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.6703296703296702, | |
| "grad_norm": 1.415798544883728, | |
| "learning_rate": 3.534939943290896e-06, | |
| "loss": 1.0487414598464966, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.6727716727716728, | |
| "grad_norm": 2.411515235900879, | |
| "learning_rate": 3.530680960138166e-06, | |
| "loss": 1.142496109008789, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.6752136752136753, | |
| "grad_norm": 1.571021556854248, | |
| "learning_rate": 3.5264187945910465e-06, | |
| "loss": 0.6615177392959595, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.6776556776556777, | |
| "grad_norm": 1.4412907361984253, | |
| "learning_rate": 3.5221534640239745e-06, | |
| "loss": 0.29376649856567383, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.6800976800976801, | |
| "grad_norm": 6.718142509460449, | |
| "learning_rate": 3.5178849858242874e-06, | |
| "loss": 1.1929081678390503, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.6825396825396826, | |
| "grad_norm": 4.863142013549805, | |
| "learning_rate": 3.5136133773921553e-06, | |
| "loss": 1.202161192893982, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.684981684981685, | |
| "grad_norm": 0.7358537912368774, | |
| "learning_rate": 3.509338656140508e-06, | |
| "loss": 0.9144766330718994, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.6874236874236874, | |
| "grad_norm": 4.494753837585449, | |
| "learning_rate": 3.505060839494964e-06, | |
| "loss": 0.978439211845398, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.6898656898656899, | |
| "grad_norm": 1.7089729309082031, | |
| "learning_rate": 3.5007799448937617e-06, | |
| "loss": 1.1718627214431763, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 1.796030879020691, | |
| "learning_rate": 3.496495989787683e-06, | |
| "loss": 1.0744086503982544, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.6947496947496947, | |
| "grad_norm": 1.5995069742202759, | |
| "learning_rate": 3.49220899163999e-06, | |
| "loss": 1.1244831085205078, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.6971916971916972, | |
| "grad_norm": 3.2209115028381348, | |
| "learning_rate": 3.4879189679263474e-06, | |
| "loss": 0.3722049295902252, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.6996336996336996, | |
| "grad_norm": 1.2462571859359741, | |
| "learning_rate": 3.4836259361347524e-06, | |
| "loss": 1.0250697135925293, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.702075702075702, | |
| "grad_norm": 2.99985408782959, | |
| "learning_rate": 3.479329913765467e-06, | |
| "loss": 0.946092426776886, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.7045177045177047, | |
| "grad_norm": 3.67580246925354, | |
| "learning_rate": 3.475030918330942e-06, | |
| "loss": 1.217712163925171, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.7069597069597071, | |
| "grad_norm": 3.188765525817871, | |
| "learning_rate": 3.4707289673557486e-06, | |
| "loss": 0.9007408022880554, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.7094017094017095, | |
| "grad_norm": 5.768331050872803, | |
| "learning_rate": 3.4664240783765064e-06, | |
| "loss": 0.4004557728767395, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.711843711843712, | |
| "grad_norm": 5.148880958557129, | |
| "learning_rate": 3.4621162689418104e-06, | |
| "loss": 0.9390780329704285, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 1.9988371133804321, | |
| "learning_rate": 3.4578055566121617e-06, | |
| "loss": 1.065889596939087, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.7167277167277168, | |
| "grad_norm": 4.718473434448242, | |
| "learning_rate": 3.453491958959894e-06, | |
| "loss": 0.5322512984275818, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.7191697191697193, | |
| "grad_norm": 3.3976686000823975, | |
| "learning_rate": 3.449175493569103e-06, | |
| "loss": 1.1359853744506836, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.7216117216117217, | |
| "grad_norm": 6.322020530700684, | |
| "learning_rate": 3.4448561780355766e-06, | |
| "loss": 0.7464244961738586, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.7240537240537241, | |
| "grad_norm": 11.572935104370117, | |
| "learning_rate": 3.4405340299667183e-06, | |
| "loss": 0.8479959964752197, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.7264957264957266, | |
| "grad_norm": 1.7882882356643677, | |
| "learning_rate": 3.436209066981479e-06, | |
| "loss": 1.0817737579345703, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.728937728937729, | |
| "grad_norm": 5.552520275115967, | |
| "learning_rate": 3.4318813067102853e-06, | |
| "loss": 0.9852099418640137, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.7313797313797314, | |
| "grad_norm": 1.9042245149612427, | |
| "learning_rate": 3.4275507667949658e-06, | |
| "loss": 1.1091506481170654, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.7338217338217339, | |
| "grad_norm": 2.391268491744995, | |
| "learning_rate": 3.423217464888681e-06, | |
| "loss": 0.8407750725746155, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.7362637362637363, | |
| "grad_norm": 2.475590944290161, | |
| "learning_rate": 3.41888141865585e-06, | |
| "loss": 0.9131081104278564, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.7387057387057387, | |
| "grad_norm": 5.156746864318848, | |
| "learning_rate": 3.4145426457720787e-06, | |
| "loss": 0.7782116532325745, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.7411477411477412, | |
| "grad_norm": 7.184075355529785, | |
| "learning_rate": 3.4102011639240884e-06, | |
| "loss": 0.7344411611557007, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.7435897435897436, | |
| "grad_norm": 2.159703016281128, | |
| "learning_rate": 3.4058569908096436e-06, | |
| "loss": 1.132224202156067, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.746031746031746, | |
| "grad_norm": 1.8462954759597778, | |
| "learning_rate": 3.4015101441374776e-06, | |
| "loss": 1.173128366470337, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.7484737484737485, | |
| "grad_norm": 2.538024425506592, | |
| "learning_rate": 3.397160641627226e-06, | |
| "loss": 0.7561154961585999, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.750915750915751, | |
| "grad_norm": 1.7686879634857178, | |
| "learning_rate": 3.392808501009347e-06, | |
| "loss": 0.6580084562301636, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.7533577533577533, | |
| "grad_norm": 10.234268188476562, | |
| "learning_rate": 3.3884537400250554e-06, | |
| "loss": 0.6667467951774597, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.7557997557997558, | |
| "grad_norm": 1.436072826385498, | |
| "learning_rate": 3.384096376426247e-06, | |
| "loss": 0.5105250477790833, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.7582417582417582, | |
| "grad_norm": 1.6276432275772095, | |
| "learning_rate": 3.379736427975425e-06, | |
| "loss": 1.0976946353912354, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.7606837606837606, | |
| "grad_norm": 3.592867136001587, | |
| "learning_rate": 3.3753739124456343e-06, | |
| "loss": 0.8957812786102295, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.763125763125763, | |
| "grad_norm": 4.000123023986816, | |
| "learning_rate": 3.371008847620379e-06, | |
| "loss": 0.7372997403144836, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.7655677655677655, | |
| "grad_norm": 3.1201529502868652, | |
| "learning_rate": 3.366641251293559e-06, | |
| "loss": 1.102899193763733, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.768009768009768, | |
| "grad_norm": 1.768283486366272, | |
| "learning_rate": 3.3622711412693914e-06, | |
| "loss": 1.124794602394104, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.7704517704517704, | |
| "grad_norm": 2.403294801712036, | |
| "learning_rate": 3.3578985353623416e-06, | |
| "loss": 0.9902628660202026, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.7728937728937728, | |
| "grad_norm": 3.0186891555786133, | |
| "learning_rate": 3.3535234513970494e-06, | |
| "loss": 0.399064302444458, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.7753357753357752, | |
| "grad_norm": 1.5962026119232178, | |
| "learning_rate": 3.349145907208255e-06, | |
| "loss": 0.7983530163764954, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 3.296353816986084, | |
| "learning_rate": 3.3447659206407285e-06, | |
| "loss": 0.5403007864952087, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.7802197802197801, | |
| "grad_norm": 1.5648705959320068, | |
| "learning_rate": 3.3403835095491967e-06, | |
| "loss": 1.0592517852783203, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.7826617826617825, | |
| "grad_norm": 3.352639675140381, | |
| "learning_rate": 3.3359986917982675e-06, | |
| "loss": 1.0402568578720093, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.785103785103785, | |
| "grad_norm": 3.2459142208099365, | |
| "learning_rate": 3.3316114852623617e-06, | |
| "loss": 0.9993575811386108, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.7875457875457874, | |
| "grad_norm": 2.1725311279296875, | |
| "learning_rate": 3.327221907825638e-06, | |
| "loss": 0.8232885599136353, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.7899877899877898, | |
| "grad_norm": 2.444363594055176, | |
| "learning_rate": 3.3228299773819165e-06, | |
| "loss": 0.8555684685707092, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.7924297924297923, | |
| "grad_norm": 4.547183990478516, | |
| "learning_rate": 3.318435711834615e-06, | |
| "loss": 0.8133440017700195, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 3.024049758911133, | |
| "learning_rate": 3.3140391290966646e-06, | |
| "loss": 1.0311592817306519, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.7973137973137974, | |
| "grad_norm": 4.397846221923828, | |
| "learning_rate": 3.309640247090445e-06, | |
| "loss": 1.0561209917068481, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.7997557997557998, | |
| "grad_norm": 2.594501256942749, | |
| "learning_rate": 3.3052390837477087e-06, | |
| "loss": 0.6757609248161316, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.8021978021978022, | |
| "grad_norm": 2.992253541946411, | |
| "learning_rate": 3.300835657009507e-06, | |
| "loss": 0.7614642977714539, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.8046398046398047, | |
| "grad_norm": 5.074526786804199, | |
| "learning_rate": 3.2964299848261187e-06, | |
| "loss": 0.8146823048591614, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.807081807081807, | |
| "grad_norm": 3.6561779975891113, | |
| "learning_rate": 3.2920220851569746e-06, | |
| "loss": 0.4933128356933594, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.8095238095238095, | |
| "grad_norm": 5.129440784454346, | |
| "learning_rate": 3.2876119759705884e-06, | |
| "loss": 0.8365576267242432, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.811965811965812, | |
| "grad_norm": 1.3081094026565552, | |
| "learning_rate": 3.2831996752444774e-06, | |
| "loss": 1.174236536026001, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.8144078144078144, | |
| "grad_norm": 1.9769134521484375, | |
| "learning_rate": 3.2787852009650945e-06, | |
| "loss": 1.0928758382797241, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.8168498168498168, | |
| "grad_norm": 3.200984001159668, | |
| "learning_rate": 3.2743685711277533e-06, | |
| "loss": 0.7248603701591492, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.8192918192918193, | |
| "grad_norm": 1.9529130458831787, | |
| "learning_rate": 3.269949803736554e-06, | |
| "loss": 0.8898839950561523, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.8217338217338217, | |
| "grad_norm": 2.331352949142456, | |
| "learning_rate": 3.265528916804308e-06, | |
| "loss": 1.097998857498169, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.8241758241758241, | |
| "grad_norm": 7.352150917053223, | |
| "learning_rate": 3.261105928352472e-06, | |
| "loss": 0.7203211784362793, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.8266178266178266, | |
| "grad_norm": 8.535738945007324, | |
| "learning_rate": 3.2566808564110635e-06, | |
| "loss": 0.8137180209159851, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.8290598290598292, | |
| "grad_norm": 1.5943210124969482, | |
| "learning_rate": 3.252253719018599e-06, | |
| "loss": 1.4954842329025269, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.8315018315018317, | |
| "grad_norm": 2.4209067821502686, | |
| "learning_rate": 3.2478245342220094e-06, | |
| "loss": 1.2031804323196411, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.833943833943834, | |
| "grad_norm": 3.7259180545806885, | |
| "learning_rate": 3.243393320076575e-06, | |
| "loss": 0.8611478805541992, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.8363858363858365, | |
| "grad_norm": 0.5359264612197876, | |
| "learning_rate": 3.238960094645848e-06, | |
| "loss": 0.9046647548675537, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.838827838827839, | |
| "grad_norm": 2.4440624713897705, | |
| "learning_rate": 3.2345248760015777e-06, | |
| "loss": 0.7731856107711792, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.8412698412698414, | |
| "grad_norm": 1.7057727575302124, | |
| "learning_rate": 3.2300876822236427e-06, | |
| "loss": 0.8238407373428345, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.8437118437118438, | |
| "grad_norm": 2.0124754905700684, | |
| "learning_rate": 3.225648531399968e-06, | |
| "loss": 1.0737024545669556, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 1.904160499572754, | |
| "learning_rate": 3.22120744162646e-06, | |
| "loss": 1.0689663887023926, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.8485958485958487, | |
| "grad_norm": 1.249457836151123, | |
| "learning_rate": 3.2167644310069276e-06, | |
| "loss": 1.0780993700027466, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.8510378510378511, | |
| "grad_norm": 3.9271388053894043, | |
| "learning_rate": 3.2123195176530104e-06, | |
| "loss": 0.833716094493866, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.8534798534798536, | |
| "grad_norm": 1.8167206048965454, | |
| "learning_rate": 3.207872719684104e-06, | |
| "loss": 1.1510157585144043, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.855921855921856, | |
| "grad_norm": 3.824442148208618, | |
| "learning_rate": 3.203424055227287e-06, | |
| "loss": 0.9223167896270752, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.8583638583638584, | |
| "grad_norm": 4.120997905731201, | |
| "learning_rate": 3.1989735424172456e-06, | |
| "loss": 0.9817994832992554, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.8608058608058609, | |
| "grad_norm": 2.414776563644409, | |
| "learning_rate": 3.1945211993962035e-06, | |
| "loss": 0.9063418507575989, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.8632478632478633, | |
| "grad_norm": 4.998463153839111, | |
| "learning_rate": 3.190067044313841e-06, | |
| "loss": 0.9489470720291138, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.8656898656898657, | |
| "grad_norm": 1.9804654121398926, | |
| "learning_rate": 3.185611095327227e-06, | |
| "loss": 0.7647035121917725, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.8681318681318682, | |
| "grad_norm": 1.3335086107254028, | |
| "learning_rate": 3.181153370600745e-06, | |
| "loss": 0.9383209943771362, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.8705738705738706, | |
| "grad_norm": 4.721079349517822, | |
| "learning_rate": 3.176693888306014e-06, | |
| "loss": 0.77753746509552, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.873015873015873, | |
| "grad_norm": 2.030644655227661, | |
| "learning_rate": 3.1722326666218213e-06, | |
| "loss": 0.8778524994850159, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.8754578754578755, | |
| "grad_norm": 1.5334826707839966, | |
| "learning_rate": 3.16776972373404e-06, | |
| "loss": 1.1086459159851074, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.877899877899878, | |
| "grad_norm": 1.6864469051361084, | |
| "learning_rate": 3.1633050778355624e-06, | |
| "loss": 1.0293059349060059, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.8803418803418803, | |
| "grad_norm": 2.2873408794403076, | |
| "learning_rate": 3.158838747126224e-06, | |
| "loss": 1.0864299535751343, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.8827838827838828, | |
| "grad_norm": 1.5731513500213623, | |
| "learning_rate": 3.1543707498127267e-06, | |
| "loss": 1.0680838823318481, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.8852258852258852, | |
| "grad_norm": 2.0635628700256348, | |
| "learning_rate": 3.1499011041085662e-06, | |
| "loss": 0.9070185422897339, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.8876678876678876, | |
| "grad_norm": 2.2307991981506348, | |
| "learning_rate": 3.145429828233959e-06, | |
| "loss": 1.060643196105957, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.89010989010989, | |
| "grad_norm": 3.084059476852417, | |
| "learning_rate": 3.1409569404157646e-06, | |
| "loss": 1.0800150632858276, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.8925518925518925, | |
| "grad_norm": 2.034463882446289, | |
| "learning_rate": 3.136482458887416e-06, | |
| "loss": 0.6771202087402344, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.894993894993895, | |
| "grad_norm": 2.416832447052002, | |
| "learning_rate": 3.132006401888841e-06, | |
| "loss": 1.1564983129501343, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.8974358974358974, | |
| "grad_norm": 2.9857418537139893, | |
| "learning_rate": 3.1275287876663905e-06, | |
| "loss": 0.8453341126441956, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.8998778998778998, | |
| "grad_norm": 1.9065909385681152, | |
| "learning_rate": 3.123049634472764e-06, | |
| "loss": 1.206203818321228, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.9023199023199022, | |
| "grad_norm": 1.7331615686416626, | |
| "learning_rate": 3.118568960566933e-06, | |
| "loss": 0.9110676050186157, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 2.49706768989563, | |
| "learning_rate": 3.114086784214069e-06, | |
| "loss": 0.6509535908699036, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.907203907203907, | |
| "grad_norm": 1.9002443552017212, | |
| "learning_rate": 3.109603123685468e-06, | |
| "loss": 1.080418586730957, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.9096459096459095, | |
| "grad_norm": 3.7310116291046143, | |
| "learning_rate": 3.1051179972584756e-06, | |
| "loss": 0.7549952268600464, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.912087912087912, | |
| "grad_norm": 1.4353991746902466, | |
| "learning_rate": 3.1006314232164146e-06, | |
| "loss": 1.083061695098877, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.9145299145299144, | |
| "grad_norm": 2.5150792598724365, | |
| "learning_rate": 3.0961434198485067e-06, | |
| "loss": 0.9303537607192993, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.9169719169719168, | |
| "grad_norm": 1.2595463991165161, | |
| "learning_rate": 3.0916540054498028e-06, | |
| "loss": 0.7716434001922607, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.9194139194139193, | |
| "grad_norm": 1.386602759361267, | |
| "learning_rate": 3.087163198321103e-06, | |
| "loss": 1.1206477880477905, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.9218559218559217, | |
| "grad_norm": 2.0977489948272705, | |
| "learning_rate": 3.0826710167688866e-06, | |
| "loss": 0.7714415788650513, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.9242979242979243, | |
| "grad_norm": 3.282386302947998, | |
| "learning_rate": 3.0781774791052347e-06, | |
| "loss": 1.0669711828231812, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.9267399267399268, | |
| "grad_norm": 2.187236785888672, | |
| "learning_rate": 3.073682603647758e-06, | |
| "loss": 0.7885124683380127, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.9291819291819292, | |
| "grad_norm": 2.4865806102752686, | |
| "learning_rate": 3.0691864087195172e-06, | |
| "loss": 1.084753394126892, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.9316239316239316, | |
| "grad_norm": 3.804330348968506, | |
| "learning_rate": 3.064688912648957e-06, | |
| "loss": 0.3611922860145569, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.934065934065934, | |
| "grad_norm": 18.454357147216797, | |
| "learning_rate": 3.0601901337698213e-06, | |
| "loss": 0.5478751063346863, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.9365079365079365, | |
| "grad_norm": 9.308585166931152, | |
| "learning_rate": 3.055690090421085e-06, | |
| "loss": 0.6894688606262207, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.938949938949939, | |
| "grad_norm": 4.380536079406738, | |
| "learning_rate": 3.0511888009468792e-06, | |
| "loss": 1.172979474067688, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.9413919413919414, | |
| "grad_norm": 1.1702888011932373, | |
| "learning_rate": 3.0466862836964117e-06, | |
| "loss": 1.1025750637054443, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.9438339438339438, | |
| "grad_norm": 2.2686538696289062, | |
| "learning_rate": 3.0421825570238978e-06, | |
| "loss": 1.0041526556015015, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.9462759462759462, | |
| "grad_norm": 1.5547155141830444, | |
| "learning_rate": 3.037677639288481e-06, | |
| "loss": 0.7530244588851929, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.9487179487179487, | |
| "grad_norm": 1.6241923570632935, | |
| "learning_rate": 3.0331715488541626e-06, | |
| "loss": 0.6593371629714966, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.9511599511599511, | |
| "grad_norm": 1.3635199069976807, | |
| "learning_rate": 3.0286643040897203e-06, | |
| "loss": 0.7976773381233215, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.9536019536019538, | |
| "grad_norm": 1.5380146503448486, | |
| "learning_rate": 3.0241559233686424e-06, | |
| "loss": 0.8483846187591553, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.9560439560439562, | |
| "grad_norm": 1.5258017778396606, | |
| "learning_rate": 3.0196464250690434e-06, | |
| "loss": 1.0973600149154663, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.9584859584859586, | |
| "grad_norm": 5.223465442657471, | |
| "learning_rate": 3.0151358275735965e-06, | |
| "loss": 1.2270939350128174, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.960927960927961, | |
| "grad_norm": 4.014069080352783, | |
| "learning_rate": 3.0106241492694533e-06, | |
| "loss": 1.3512402772903442, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.9633699633699635, | |
| "grad_norm": 1.4490033388137817, | |
| "learning_rate": 3.0061114085481745e-06, | |
| "loss": 1.1516140699386597, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.965811965811966, | |
| "grad_norm": 1.19436776638031, | |
| "learning_rate": 3.0015976238056475e-06, | |
| "loss": 1.0787304639816284, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.9682539682539684, | |
| "grad_norm": 6.923144817352295, | |
| "learning_rate": 2.9970828134420198e-06, | |
| "loss": 0.9626544713973999, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.9706959706959708, | |
| "grad_norm": 1.686660885810852, | |
| "learning_rate": 2.992566995861616e-06, | |
| "loss": 1.1870635747909546, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.9731379731379732, | |
| "grad_norm": 2.969782829284668, | |
| "learning_rate": 2.988050189472869e-06, | |
| "loss": 0.9546635150909424, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.9755799755799757, | |
| "grad_norm": 2.04162335395813, | |
| "learning_rate": 2.983532412688242e-06, | |
| "loss": 1.0080379247665405, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.978021978021978, | |
| "grad_norm": 1.1154638528823853, | |
| "learning_rate": 2.979013683924154e-06, | |
| "loss": 1.1551849842071533, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.9804639804639805, | |
| "grad_norm": 1.3147307634353638, | |
| "learning_rate": 2.9744940216009037e-06, | |
| "loss": 0.8124474287033081, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.982905982905983, | |
| "grad_norm": 3.960902690887451, | |
| "learning_rate": 2.969973444142597e-06, | |
| "loss": 0.5901971459388733, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.9853479853479854, | |
| "grad_norm": 2.4836363792419434, | |
| "learning_rate": 2.965451969977069e-06, | |
| "loss": 0.8430943489074707, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.9877899877899878, | |
| "grad_norm": 5.949784278869629, | |
| "learning_rate": 2.9609296175358102e-06, | |
| "loss": 0.9984661340713501, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.9902319902319903, | |
| "grad_norm": 2.4892053604125977, | |
| "learning_rate": 2.9564064052538926e-06, | |
| "loss": 1.1695860624313354, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.9926739926739927, | |
| "grad_norm": 1.6151142120361328, | |
| "learning_rate": 2.951882351569892e-06, | |
| "loss": 1.063124179840088, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.9951159951159951, | |
| "grad_norm": 2.0610530376434326, | |
| "learning_rate": 2.9473574749258143e-06, | |
| "loss": 0.8075814247131348, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.9975579975579976, | |
| "grad_norm": 2.036194086074829, | |
| "learning_rate": 2.94283179376702e-06, | |
| "loss": 1.161010980606079, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.6809015274047852, | |
| "learning_rate": 2.9383053265421514e-06, | |
| "loss": 1.0740622282028198, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.0024420024420024, | |
| "grad_norm": 7.186413288116455, | |
| "learning_rate": 2.9337780917030513e-06, | |
| "loss": 0.9597793221473694, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.004884004884005, | |
| "grad_norm": 2.5799577236175537, | |
| "learning_rate": 2.929250107704694e-06, | |
| "loss": 0.7062101364135742, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.0073260073260073, | |
| "grad_norm": 0.9430143237113953, | |
| "learning_rate": 2.924721393005109e-06, | |
| "loss": 0.9560756087303162, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.0097680097680097, | |
| "grad_norm": 2.4356815814971924, | |
| "learning_rate": 2.9201919660653e-06, | |
| "loss": 0.7125204801559448, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.012210012210012, | |
| "grad_norm": 2.3169310092926025, | |
| "learning_rate": 2.9156618453491786e-06, | |
| "loss": 0.8216168880462646, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.0146520146520146, | |
| "grad_norm": 11.127049446105957, | |
| "learning_rate": 2.911131049323483e-06, | |
| "loss": 0.8026351928710938, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.017094017094017, | |
| "grad_norm": 2.923428535461426, | |
| "learning_rate": 2.9065995964577028e-06, | |
| "loss": 0.7188471555709839, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.0195360195360195, | |
| "grad_norm": 4.269984722137451, | |
| "learning_rate": 2.902067505224008e-06, | |
| "loss": 1.2672061920166016, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.021978021978022, | |
| "grad_norm": 1.2916280031204224, | |
| "learning_rate": 2.897534794097167e-06, | |
| "loss": 0.5318281054496765, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.0244200244200243, | |
| "grad_norm": 2.5028984546661377, | |
| "learning_rate": 2.89300148155448e-06, | |
| "loss": 0.9953727126121521, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.0268620268620268, | |
| "grad_norm": 2.887450695037842, | |
| "learning_rate": 2.8884675860756946e-06, | |
| "loss": 0.9623196125030518, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.029304029304029, | |
| "grad_norm": 2.6880152225494385, | |
| "learning_rate": 2.883933126142937e-06, | |
| "loss": 1.0482466220855713, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.0317460317460316, | |
| "grad_norm": 1.8128950595855713, | |
| "learning_rate": 2.8793981202406335e-06, | |
| "loss": 0.4340633749961853, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.034188034188034, | |
| "grad_norm": 3.808696985244751, | |
| "learning_rate": 2.874862586855437e-06, | |
| "loss": 0.7226059436798096, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.0366300366300365, | |
| "grad_norm": 1.5693755149841309, | |
| "learning_rate": 2.870326544476148e-06, | |
| "loss": 1.0041981935501099, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.039072039072039, | |
| "grad_norm": 3.0417141914367676, | |
| "learning_rate": 2.8657900115936465e-06, | |
| "loss": 0.7336680889129639, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.0415140415140414, | |
| "grad_norm": 3.467229127883911, | |
| "learning_rate": 2.8612530067008067e-06, | |
| "loss": 0.9192556142807007, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.043956043956044, | |
| "grad_norm": 3.149291515350342, | |
| "learning_rate": 2.8567155482924315e-06, | |
| "loss": 0.9109829068183899, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.0463980463980462, | |
| "grad_norm": 1.5668519735336304, | |
| "learning_rate": 2.8521776548651692e-06, | |
| "loss": 0.6515228748321533, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.0488400488400487, | |
| "grad_norm": 3.5928568840026855, | |
| "learning_rate": 2.8476393449174426e-06, | |
| "loss": 1.0088976621627808, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 2.0251355171203613, | |
| "learning_rate": 2.843100636949374e-06, | |
| "loss": 1.004931092262268, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.0537240537240535, | |
| "grad_norm": 3.476871967315674, | |
| "learning_rate": 2.838561549462705e-06, | |
| "loss": 0.7845253348350525, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.056166056166056, | |
| "grad_norm": 8.491005897521973, | |
| "learning_rate": 2.8340221009607272e-06, | |
| "loss": 0.7041101455688477, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.0586080586080584, | |
| "grad_norm": 7.643034934997559, | |
| "learning_rate": 2.829482309948203e-06, | |
| "loss": 0.8947182297706604, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.061050061050061, | |
| "grad_norm": 2.488511323928833, | |
| "learning_rate": 2.824942194931289e-06, | |
| "loss": 0.9186074137687683, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.0634920634920633, | |
| "grad_norm": 10.357978820800781, | |
| "learning_rate": 2.820401774417466e-06, | |
| "loss": 0.7940126061439514, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.065934065934066, | |
| "grad_norm": 1.5219630002975464, | |
| "learning_rate": 2.815861066915458e-06, | |
| "loss": 0.7649714350700378, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.0683760683760686, | |
| "grad_norm": 1.8372576236724854, | |
| "learning_rate": 2.811320090935159e-06, | |
| "loss": 0.7867807149887085, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.070818070818071, | |
| "grad_norm": 2.9736199378967285, | |
| "learning_rate": 2.806778864987558e-06, | |
| "loss": 1.0023208856582642, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.0732600732600734, | |
| "grad_norm": 4.48581075668335, | |
| "learning_rate": 2.802237407584663e-06, | |
| "loss": 0.9700354337692261, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.075702075702076, | |
| "grad_norm": 2.3087658882141113, | |
| "learning_rate": 2.797695737239425e-06, | |
| "loss": 0.9603742361068726, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.0781440781440783, | |
| "grad_norm": 3.8156135082244873, | |
| "learning_rate": 2.7931538724656625e-06, | |
| "loss": 0.4553748667240143, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.0805860805860807, | |
| "grad_norm": 1.0407174825668335, | |
| "learning_rate": 2.788611831777989e-06, | |
| "loss": 0.5665370225906372, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.083028083028083, | |
| "grad_norm": 5.208148956298828, | |
| "learning_rate": 2.784069633691732e-06, | |
| "loss": 0.41125673055648804, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.0854700854700856, | |
| "grad_norm": 4.539152145385742, | |
| "learning_rate": 2.779527296722863e-06, | |
| "loss": 0.9381171464920044, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.087912087912088, | |
| "grad_norm": 2.998134136199951, | |
| "learning_rate": 2.774984839387918e-06, | |
| "loss": 0.7079961895942688, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.0903540903540905, | |
| "grad_norm": 3.755718231201172, | |
| "learning_rate": 2.7704422802039255e-06, | |
| "loss": 0.7328172922134399, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.092796092796093, | |
| "grad_norm": 1.5156927108764648, | |
| "learning_rate": 2.765899637688327e-06, | |
| "loss": 0.6182104349136353, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.0952380952380953, | |
| "grad_norm": 2.6270971298217773, | |
| "learning_rate": 2.7613569303589054e-06, | |
| "loss": 0.7295227646827698, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.0976800976800978, | |
| "grad_norm": 2.0563015937805176, | |
| "learning_rate": 2.756814176733707e-06, | |
| "loss": 0.9640318155288696, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.1001221001221, | |
| "grad_norm": 2.9778478145599365, | |
| "learning_rate": 2.752271395330967e-06, | |
| "loss": 0.9460858106613159, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.1025641025641026, | |
| "grad_norm": 2.579092025756836, | |
| "learning_rate": 2.7477286046690336e-06, | |
| "loss": 0.9912809133529663, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.105006105006105, | |
| "grad_norm": 2.132593870162964, | |
| "learning_rate": 2.743185823266294e-06, | |
| "loss": 0.657219648361206, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.1074481074481075, | |
| "grad_norm": 3.7171902656555176, | |
| "learning_rate": 2.7386430696410953e-06, | |
| "loss": 0.6395490765571594, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.10989010989011, | |
| "grad_norm": 1.617601752281189, | |
| "learning_rate": 2.7341003623116743e-06, | |
| "loss": 0.5296671986579895, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.1123321123321124, | |
| "grad_norm": 2.1062819957733154, | |
| "learning_rate": 2.729557719796076e-06, | |
| "loss": 0.8426005840301514, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.114774114774115, | |
| "grad_norm": 3.001302480697632, | |
| "learning_rate": 2.7250151606120826e-06, | |
| "loss": 0.565944254398346, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.1172161172161172, | |
| "grad_norm": 0.658115565776825, | |
| "learning_rate": 2.7204727032771376e-06, | |
| "loss": 0.3656719923019409, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.1196581196581197, | |
| "grad_norm": 2.848242998123169, | |
| "learning_rate": 2.7159303663082687e-06, | |
| "loss": 0.9933385252952576, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.122100122100122, | |
| "grad_norm": 4.3910417556762695, | |
| "learning_rate": 2.7113881682220123e-06, | |
| "loss": 0.9253290891647339, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.1245421245421245, | |
| "grad_norm": 1.5515904426574707, | |
| "learning_rate": 2.7068461275343382e-06, | |
| "loss": 0.8804880976676941, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.126984126984127, | |
| "grad_norm": 5.038269996643066, | |
| "learning_rate": 2.7023042627605754e-06, | |
| "loss": 1.0033385753631592, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.1294261294261294, | |
| "grad_norm": 2.507053852081299, | |
| "learning_rate": 2.6977625924153376e-06, | |
| "loss": 0.671730637550354, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.131868131868132, | |
| "grad_norm": 2.777392625808716, | |
| "learning_rate": 2.6932211350124425e-06, | |
| "loss": 1.001034140586853, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.1343101343101343, | |
| "grad_norm": 5.460464954376221, | |
| "learning_rate": 2.6886799090648417e-06, | |
| "loss": 0.38881126046180725, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.1367521367521367, | |
| "grad_norm": 6.266025543212891, | |
| "learning_rate": 2.684138933084543e-06, | |
| "loss": 1.1089563369750977, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.139194139194139, | |
| "grad_norm": 2.3073322772979736, | |
| "learning_rate": 2.6795982255825354e-06, | |
| "loss": 0.9409431219100952, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.1416361416361416, | |
| "grad_norm": 3.6696202754974365, | |
| "learning_rate": 2.6750578050687115e-06, | |
| "loss": 0.8869442939758301, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.144078144078144, | |
| "grad_norm": 4.61408805847168, | |
| "learning_rate": 2.6705176900517983e-06, | |
| "loss": 1.01822030544281, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.1465201465201464, | |
| "grad_norm": 2.669914484024048, | |
| "learning_rate": 2.665977899039274e-06, | |
| "loss": 0.48161619901657104, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.148962148962149, | |
| "grad_norm": 2.6554932594299316, | |
| "learning_rate": 2.661438450537296e-06, | |
| "loss": 0.8899593353271484, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.1514041514041513, | |
| "grad_norm": 1.00688636302948, | |
| "learning_rate": 2.656899363050628e-06, | |
| "loss": 0.6889787912368774, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 5.138449192047119, | |
| "learning_rate": 2.6523606550825577e-06, | |
| "loss": 0.6849108934402466, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.156288156288156, | |
| "grad_norm": 1.4361852407455444, | |
| "learning_rate": 2.647822345134832e-06, | |
| "loss": 0.5109698176383972, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.1587301587301586, | |
| "grad_norm": 4.641076564788818, | |
| "learning_rate": 2.6432844517075696e-06, | |
| "loss": 0.7529181838035583, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.161172161172161, | |
| "grad_norm": 2.5101401805877686, | |
| "learning_rate": 2.638746993299194e-06, | |
| "loss": 0.6117711067199707, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.1636141636141635, | |
| "grad_norm": 1.5911965370178223, | |
| "learning_rate": 2.6342099884063542e-06, | |
| "loss": 0.9727715849876404, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.166056166056166, | |
| "grad_norm": 4.569766044616699, | |
| "learning_rate": 2.6296734555238517e-06, | |
| "loss": 0.8418964147567749, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.1684981684981683, | |
| "grad_norm": 2.1551764011383057, | |
| "learning_rate": 2.625137413144564e-06, | |
| "loss": 1.0541213750839233, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.1709401709401708, | |
| "grad_norm": 6.51698112487793, | |
| "learning_rate": 2.6206018797593672e-06, | |
| "loss": 0.6803760528564453, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.173382173382173, | |
| "grad_norm": 2.092607021331787, | |
| "learning_rate": 2.6160668738570638e-06, | |
| "loss": 0.9858105182647705, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.1758241758241756, | |
| "grad_norm": 8.542030334472656, | |
| "learning_rate": 2.6115324139243065e-06, | |
| "loss": 0.7755582332611084, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.178266178266178, | |
| "grad_norm": 2.45867919921875, | |
| "learning_rate": 2.606998518445521e-06, | |
| "loss": 0.9509971141815186, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.1807081807081805, | |
| "grad_norm": 5.667660236358643, | |
| "learning_rate": 2.6024652059028337e-06, | |
| "loss": 0.8328191041946411, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.183150183150183, | |
| "grad_norm": 15.772777557373047, | |
| "learning_rate": 2.5979324947759936e-06, | |
| "loss": 0.9569545388221741, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.185592185592186, | |
| "grad_norm": 4.427873134613037, | |
| "learning_rate": 2.5934004035422983e-06, | |
| "loss": 0.897070050239563, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.1880341880341883, | |
| "grad_norm": 4.582241535186768, | |
| "learning_rate": 2.5888689506765186e-06, | |
| "loss": 0.9291706681251526, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 2.202183485031128, | |
| "learning_rate": 2.5843381546508217e-06, | |
| "loss": 0.545952320098877, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.192918192918193, | |
| "grad_norm": 1.6277117729187012, | |
| "learning_rate": 2.579808033934701e-06, | |
| "loss": 0.6887462735176086, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.1953601953601956, | |
| "grad_norm": 4.229698657989502, | |
| "learning_rate": 2.5752786069948925e-06, | |
| "loss": 0.8135143518447876, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.197802197802198, | |
| "grad_norm": 2.0216007232666016, | |
| "learning_rate": 2.5707498922953065e-06, | |
| "loss": 0.9676254391670227, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.2002442002442004, | |
| "grad_norm": 3.828848361968994, | |
| "learning_rate": 2.5662219082969502e-06, | |
| "loss": 0.9208850264549255, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.202686202686203, | |
| "grad_norm": 2.4354822635650635, | |
| "learning_rate": 2.561694673457849e-06, | |
| "loss": 0.6844379305839539, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 1.510022521018982, | |
| "learning_rate": 2.55716820623298e-06, | |
| "loss": 0.938340961933136, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.2075702075702077, | |
| "grad_norm": 1.947124719619751, | |
| "learning_rate": 2.5526425250741864e-06, | |
| "loss": 0.9929482936859131, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.21001221001221, | |
| "grad_norm": 2.3316943645477295, | |
| "learning_rate": 2.548117648430109e-06, | |
| "loss": 0.7233268618583679, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.2124542124542126, | |
| "grad_norm": 2.0162341594696045, | |
| "learning_rate": 2.543593594746108e-06, | |
| "loss": 0.6767272353172302, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.214896214896215, | |
| "grad_norm": 0.9409213662147522, | |
| "learning_rate": 2.539070382464191e-06, | |
| "loss": 0.435127854347229, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.2173382173382175, | |
| "grad_norm": 1.5501841306686401, | |
| "learning_rate": 2.5345480300229313e-06, | |
| "loss": 0.9680942893028259, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.21978021978022, | |
| "grad_norm": 2.132582426071167, | |
| "learning_rate": 2.5300265558574034e-06, | |
| "loss": 0.890035092830658, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 1.4618556499481201, | |
| "learning_rate": 2.525505978399097e-06, | |
| "loss": 0.8848022818565369, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.2246642246642248, | |
| "grad_norm": 1.61579167842865, | |
| "learning_rate": 2.5209863160758467e-06, | |
| "loss": 0.5495251417160034, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.227106227106227, | |
| "grad_norm": 2.571030378341675, | |
| "learning_rate": 2.5164675873117588e-06, | |
| "loss": 0.79774409532547, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.2295482295482296, | |
| "grad_norm": 3.0071425437927246, | |
| "learning_rate": 2.511949810527131e-06, | |
| "loss": 0.7262362241744995, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.231990231990232, | |
| "grad_norm": 5.134605884552002, | |
| "learning_rate": 2.507433004138385e-06, | |
| "loss": 0.6448302865028381, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.2344322344322345, | |
| "grad_norm": 7.841651916503906, | |
| "learning_rate": 2.5029171865579813e-06, | |
| "loss": 0.8010722398757935, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.236874236874237, | |
| "grad_norm": 2.221493721008301, | |
| "learning_rate": 2.4984023761943532e-06, | |
| "loss": 0.9125744104385376, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.2393162393162394, | |
| "grad_norm": 2.1063764095306396, | |
| "learning_rate": 2.493888591451826e-06, | |
| "loss": 0.9146173000335693, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.241758241758242, | |
| "grad_norm": 2.964050054550171, | |
| "learning_rate": 2.4893758507305465e-06, | |
| "loss": 1.0444574356079102, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.244200244200244, | |
| "grad_norm": 3.5007026195526123, | |
| "learning_rate": 2.4848641724264046e-06, | |
| "loss": 1.0267515182495117, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.2466422466422467, | |
| "grad_norm": 3.2701382637023926, | |
| "learning_rate": 2.4803535749309578e-06, | |
| "loss": 0.44911229610443115, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.249084249084249, | |
| "grad_norm": 3.629748821258545, | |
| "learning_rate": 2.4758440766313583e-06, | |
| "loss": 1.014188528060913, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.2515262515262515, | |
| "grad_norm": 2.0354208946228027, | |
| "learning_rate": 2.4713356959102804e-06, | |
| "loss": 1.1367512941360474, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.253968253968254, | |
| "grad_norm": 7.688552379608154, | |
| "learning_rate": 2.4668284511458385e-06, | |
| "loss": 0.641595721244812, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 0.6762326955795288, | |
| "learning_rate": 2.4623223607115195e-06, | |
| "loss": 0.6372429132461548, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.258852258852259, | |
| "grad_norm": 8.27773666381836, | |
| "learning_rate": 2.457817442976103e-06, | |
| "loss": 0.8019550442695618, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.2612942612942613, | |
| "grad_norm": 4.783394813537598, | |
| "learning_rate": 2.453313716303589e-06, | |
| "loss": 0.896358072757721, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.2637362637362637, | |
| "grad_norm": 1.9131306409835815, | |
| "learning_rate": 2.4488111990531223e-06, | |
| "loss": 0.9868752360343933, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.266178266178266, | |
| "grad_norm": 1.9729207754135132, | |
| "learning_rate": 2.4443099095789147e-06, | |
| "loss": 1.0031776428222656, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.2686202686202686, | |
| "grad_norm": 1.891891360282898, | |
| "learning_rate": 2.4398098662301794e-06, | |
| "loss": 0.8341459631919861, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.271062271062271, | |
| "grad_norm": 4.215878486633301, | |
| "learning_rate": 2.435311087351044e-06, | |
| "loss": 1.061068058013916, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.2735042735042734, | |
| "grad_norm": 3.9982106685638428, | |
| "learning_rate": 2.430813591280483e-06, | |
| "loss": 0.6541799902915955, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.275946275946276, | |
| "grad_norm": 1.4903755187988281, | |
| "learning_rate": 2.426317396352243e-06, | |
| "loss": 0.6281458139419556, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.2783882783882783, | |
| "grad_norm": 2.3184974193573, | |
| "learning_rate": 2.421822520894766e-06, | |
| "loss": 0.9676836729049683, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.2808302808302807, | |
| "grad_norm": 6.018508434295654, | |
| "learning_rate": 2.4173289832311137e-06, | |
| "loss": 0.9747646450996399, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.283272283272283, | |
| "grad_norm": 3.8310694694519043, | |
| "learning_rate": 2.4128368016788973e-06, | |
| "loss": 0.9547437429428101, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 2.1508607864379883, | |
| "learning_rate": 2.408345994550198e-06, | |
| "loss": 0.9991099834442139, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.288156288156288, | |
| "grad_norm": 11.147876739501953, | |
| "learning_rate": 2.403856580151494e-06, | |
| "loss": 0.2010164111852646, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.2905982905982905, | |
| "grad_norm": 1.8129445314407349, | |
| "learning_rate": 2.3993685767835866e-06, | |
| "loss": 0.9613729119300842, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.293040293040293, | |
| "grad_norm": 6.837078094482422, | |
| "learning_rate": 2.3948820027415247e-06, | |
| "loss": 0.6076623201370239, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.2954822954822953, | |
| "grad_norm": 10.136993408203125, | |
| "learning_rate": 2.390396876314533e-06, | |
| "loss": 0.819178581237793, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.2979242979242978, | |
| "grad_norm": 4.178940773010254, | |
| "learning_rate": 2.3859132157859323e-06, | |
| "loss": 0.963537335395813, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.3003663003663, | |
| "grad_norm": 3.2244484424591064, | |
| "learning_rate": 2.3814310394330683e-06, | |
| "loss": 0.6918718218803406, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.3028083028083026, | |
| "grad_norm": 2.9510533809661865, | |
| "learning_rate": 2.3769503655272375e-06, | |
| "loss": 1.1837718486785889, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.305250305250305, | |
| "grad_norm": 2.189448833465576, | |
| "learning_rate": 2.3724712123336098e-06, | |
| "loss": 0.953423798084259, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 1.599765419960022, | |
| "learning_rate": 2.3679935981111594e-06, | |
| "loss": 0.9839805960655212, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.31013431013431, | |
| "grad_norm": 3.1475296020507812, | |
| "learning_rate": 2.363517541112585e-06, | |
| "loss": 0.9580415487289429, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.3125763125763124, | |
| "grad_norm": 4.1370744705200195, | |
| "learning_rate": 2.359043059584236e-06, | |
| "loss": 1.0927242040634155, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.315018315018315, | |
| "grad_norm": 2.406658411026001, | |
| "learning_rate": 2.354570171766042e-06, | |
| "loss": 0.9985021948814392, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.317460317460317, | |
| "grad_norm": 2.142878770828247, | |
| "learning_rate": 2.350098895891434e-06, | |
| "loss": 1.1066349744796753, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.3199023199023197, | |
| "grad_norm": 1.68159818649292, | |
| "learning_rate": 2.345629250187274e-06, | |
| "loss": 0.9075395464897156, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.3223443223443225, | |
| "grad_norm": 2.389622688293457, | |
| "learning_rate": 2.3411612528737765e-06, | |
| "loss": 1.001306414604187, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.324786324786325, | |
| "grad_norm": 17.086624145507812, | |
| "learning_rate": 2.3366949221644387e-06, | |
| "loss": 0.5735141038894653, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.3272283272283274, | |
| "grad_norm": 2.313629388809204, | |
| "learning_rate": 2.3322302762659616e-06, | |
| "loss": 0.45153266191482544, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.32967032967033, | |
| "grad_norm": 2.6861674785614014, | |
| "learning_rate": 2.3277673333781803e-06, | |
| "loss": 0.6503361463546753, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.3321123321123323, | |
| "grad_norm": 2.286579132080078, | |
| "learning_rate": 2.323306111693986e-06, | |
| "loss": 0.4222344160079956, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.3345543345543347, | |
| "grad_norm": 6.489071846008301, | |
| "learning_rate": 2.3188466293992555e-06, | |
| "loss": 0.816202700138092, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.336996336996337, | |
| "grad_norm": 1.8680096864700317, | |
| "learning_rate": 2.3143889046727735e-06, | |
| "loss": 0.865801990032196, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.3394383394383396, | |
| "grad_norm": 3.2620620727539062, | |
| "learning_rate": 2.3099329556861605e-06, | |
| "loss": 0.9299424290657043, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.341880341880342, | |
| "grad_norm": 2.1896071434020996, | |
| "learning_rate": 2.305478800603798e-06, | |
| "loss": 0.7136389017105103, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.3443223443223444, | |
| "grad_norm": 3.63539457321167, | |
| "learning_rate": 2.301026457582754e-06, | |
| "loss": 0.32393085956573486, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.346764346764347, | |
| "grad_norm": 1.8691846132278442, | |
| "learning_rate": 2.2965759447727136e-06, | |
| "loss": 0.7247822284698486, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.3492063492063493, | |
| "grad_norm": 3.239156723022461, | |
| "learning_rate": 2.2921272803158966e-06, | |
| "loss": 0.5720818638801575, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.3516483516483517, | |
| "grad_norm": 2.6364552974700928, | |
| "learning_rate": 2.2876804823469907e-06, | |
| "loss": 0.977821946144104, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.354090354090354, | |
| "grad_norm": 2.6305339336395264, | |
| "learning_rate": 2.2832355689930736e-06, | |
| "loss": 0.4369853138923645, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.3565323565323566, | |
| "grad_norm": 1.0626336336135864, | |
| "learning_rate": 2.2787925583735403e-06, | |
| "loss": 0.513285219669342, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 2.138998031616211, | |
| "learning_rate": 2.274351468600033e-06, | |
| "loss": 0.7080082297325134, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.3614163614163615, | |
| "grad_norm": 1.5141674280166626, | |
| "learning_rate": 2.2699123177763584e-06, | |
| "loss": 0.9225776195526123, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.363858363858364, | |
| "grad_norm": 1.8718771934509277, | |
| "learning_rate": 2.265475123998423e-06, | |
| "loss": 0.5893734693527222, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.3663003663003663, | |
| "grad_norm": 1.9005098342895508, | |
| "learning_rate": 2.2610399053541536e-06, | |
| "loss": 0.9091716408729553, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.3687423687423688, | |
| "grad_norm": 2.963907241821289, | |
| "learning_rate": 2.2566066799234255e-06, | |
| "loss": 0.7350085377693176, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.371184371184371, | |
| "grad_norm": 0.5085861682891846, | |
| "learning_rate": 2.252175465777991e-06, | |
| "loss": 0.7246252298355103, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.3736263736263736, | |
| "grad_norm": 3.5920464992523193, | |
| "learning_rate": 2.2477462809814023e-06, | |
| "loss": 0.8181778788566589, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.376068376068376, | |
| "grad_norm": 4.16288948059082, | |
| "learning_rate": 2.2433191435889368e-06, | |
| "loss": 0.2666274309158325, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.3785103785103785, | |
| "grad_norm": 1.2004927396774292, | |
| "learning_rate": 2.2388940716475292e-06, | |
| "loss": 0.6288062334060669, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 5.698200225830078, | |
| "learning_rate": 2.234471083195692e-06, | |
| "loss": 0.5255064964294434, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.3833943833943834, | |
| "grad_norm": 2.2315514087677, | |
| "learning_rate": 2.2300501962634474e-06, | |
| "loss": 0.5431297421455383, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.385836385836386, | |
| "grad_norm": 6.294608116149902, | |
| "learning_rate": 2.2256314288722474e-06, | |
| "loss": 0.7784007787704468, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.3882783882783882, | |
| "grad_norm": 2.0798985958099365, | |
| "learning_rate": 2.2212147990349062e-06, | |
| "loss": 1.0333225727081299, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.3907203907203907, | |
| "grad_norm": 1.6888413429260254, | |
| "learning_rate": 2.2168003247555238e-06, | |
| "loss": 0.7074629068374634, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.393162393162393, | |
| "grad_norm": 1.4450297355651855, | |
| "learning_rate": 2.2123880240294127e-06, | |
| "loss": 1.10811448097229, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.3956043956043955, | |
| "grad_norm": 2.086488723754883, | |
| "learning_rate": 2.2079779148430265e-06, | |
| "loss": 0.6509331464767456, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.398046398046398, | |
| "grad_norm": 1.7849246263504028, | |
| "learning_rate": 2.203570015173882e-06, | |
| "loss": 0.966160774230957, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.4004884004884004, | |
| "grad_norm": 3.1780035495758057, | |
| "learning_rate": 2.199164342990494e-06, | |
| "loss": 0.5994513034820557, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.402930402930403, | |
| "grad_norm": 2.7655558586120605, | |
| "learning_rate": 2.1947609162522924e-06, | |
| "loss": 0.6144997477531433, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.4053724053724053, | |
| "grad_norm": 9.948949813842773, | |
| "learning_rate": 2.190359752909556e-06, | |
| "loss": 0.4493882656097412, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.4078144078144077, | |
| "grad_norm": 3.8871443271636963, | |
| "learning_rate": 2.1859608709033357e-06, | |
| "loss": 0.22239239513874054, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 1.7906450033187866, | |
| "learning_rate": 2.1815642881653858e-06, | |
| "loss": 0.23173484206199646, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.4126984126984126, | |
| "grad_norm": 7.670548915863037, | |
| "learning_rate": 2.177170022618084e-06, | |
| "loss": 0.38976311683654785, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.415140415140415, | |
| "grad_norm": 2.966620922088623, | |
| "learning_rate": 2.1727780921743633e-06, | |
| "loss": 0.9863390922546387, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.4175824175824174, | |
| "grad_norm": 1.4998663663864136, | |
| "learning_rate": 2.1683885147376394e-06, | |
| "loss": 0.47463205456733704, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.42002442002442, | |
| "grad_norm": 2.3728787899017334, | |
| "learning_rate": 2.1640013082017332e-06, | |
| "loss": 1.125450849533081, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.4224664224664223, | |
| "grad_norm": 1.6976672410964966, | |
| "learning_rate": 2.1596164904508044e-06, | |
| "loss": 0.5219910740852356, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.4249084249084247, | |
| "grad_norm": 2.192134380340576, | |
| "learning_rate": 2.1552340793592718e-06, | |
| "loss": 1.040833830833435, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.427350427350427, | |
| "grad_norm": 2.2941408157348633, | |
| "learning_rate": 2.1508540927917458e-06, | |
| "loss": 0.9751767516136169, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.42979242979243, | |
| "grad_norm": 2.0631191730499268, | |
| "learning_rate": 2.1464765486029517e-06, | |
| "loss": 1.166698932647705, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.4322344322344325, | |
| "grad_norm": 9.183150291442871, | |
| "learning_rate": 2.1421014646376583e-06, | |
| "loss": 1.0005483627319336, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.434676434676435, | |
| "grad_norm": 2.409327268600464, | |
| "learning_rate": 2.137728858730609e-06, | |
| "loss": 0.9616595506668091, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.4371184371184373, | |
| "grad_norm": 1.7600177526474, | |
| "learning_rate": 2.133358748706442e-06, | |
| "loss": 0.7983999848365784, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.4395604395604398, | |
| "grad_norm": 1.8082541227340698, | |
| "learning_rate": 2.128991152379622e-06, | |
| "loss": 0.9734374284744263, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.442002442002442, | |
| "grad_norm": 1.7054754495620728, | |
| "learning_rate": 2.1246260875543672e-06, | |
| "loss": 0.6818905472755432, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 3.318437099456787, | |
| "learning_rate": 2.1202635720245744e-06, | |
| "loss": 1.0553401708602905, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.446886446886447, | |
| "grad_norm": 1.892685055732727, | |
| "learning_rate": 2.115903623573754e-06, | |
| "loss": 0.637603759765625, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.4493284493284495, | |
| "grad_norm": 3.667452573776245, | |
| "learning_rate": 2.1115462599749453e-06, | |
| "loss": 0.6911687254905701, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.451770451770452, | |
| "grad_norm": 1.999451994895935, | |
| "learning_rate": 2.107191498990654e-06, | |
| "loss": 1.1354289054870605, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.4542124542124544, | |
| "grad_norm": 2.8429207801818848, | |
| "learning_rate": 2.1028393583727752e-06, | |
| "loss": 0.6011534929275513, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.456654456654457, | |
| "grad_norm": 3.9235146045684814, | |
| "learning_rate": 2.0984898558625227e-06, | |
| "loss": 0.6388018131256104, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.4590964590964592, | |
| "grad_norm": 2.5842745304107666, | |
| "learning_rate": 2.0941430091903576e-06, | |
| "loss": 1.0912564992904663, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 2.1695728302001953, | |
| "learning_rate": 2.0897988360759127e-06, | |
| "loss": 0.90839684009552, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.463980463980464, | |
| "grad_norm": 1.3284540176391602, | |
| "learning_rate": 2.0854573542279216e-06, | |
| "loss": 1.0240721702575684, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.4664224664224665, | |
| "grad_norm": 5.811964511871338, | |
| "learning_rate": 2.081118581344151e-06, | |
| "loss": 0.7707440257072449, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.468864468864469, | |
| "grad_norm": 1.8133083581924438, | |
| "learning_rate": 2.0767825351113192e-06, | |
| "loss": 0.6514004468917847, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.4713064713064714, | |
| "grad_norm": 4.037316799163818, | |
| "learning_rate": 2.072449233205035e-06, | |
| "loss": 0.7341061234474182, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.473748473748474, | |
| "grad_norm": 1.9994157552719116, | |
| "learning_rate": 2.068118693289715e-06, | |
| "loss": 0.9125716090202332, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.4761904761904763, | |
| "grad_norm": 0.7720515727996826, | |
| "learning_rate": 2.0637909330185217e-06, | |
| "loss": 0.6419773101806641, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.4786324786324787, | |
| "grad_norm": 1.6481854915618896, | |
| "learning_rate": 2.0594659700332833e-06, | |
| "loss": 0.9903475046157837, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.481074481074481, | |
| "grad_norm": 2.603499174118042, | |
| "learning_rate": 2.055143821964424e-06, | |
| "loss": 1.1345065832138062, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.4835164835164836, | |
| "grad_norm": 2.5555107593536377, | |
| "learning_rate": 2.0508245064308968e-06, | |
| "loss": 0.5736313462257385, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.485958485958486, | |
| "grad_norm": 2.2995779514312744, | |
| "learning_rate": 2.046508041040107e-06, | |
| "loss": 1.004111409187317, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.4884004884004884, | |
| "grad_norm": 1.261184573173523, | |
| "learning_rate": 2.04219444338784e-06, | |
| "loss": 0.6451095342636108, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.490842490842491, | |
| "grad_norm": 9.273902893066406, | |
| "learning_rate": 2.0378837310581907e-06, | |
| "loss": 0.769629955291748, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.4932844932844933, | |
| "grad_norm": 5.710522174835205, | |
| "learning_rate": 2.0335759216234947e-06, | |
| "loss": 0.9529898166656494, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.4957264957264957, | |
| "grad_norm": 1.7338002920150757, | |
| "learning_rate": 2.0292710326442517e-06, | |
| "loss": 0.7281374931335449, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.498168498168498, | |
| "grad_norm": 7.91054630279541, | |
| "learning_rate": 2.0249690816690583e-06, | |
| "loss": 0.5946838855743408, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.5006105006105006, | |
| "grad_norm": 2.516921281814575, | |
| "learning_rate": 2.0206700862345334e-06, | |
| "loss": 0.719270646572113, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.503052503052503, | |
| "grad_norm": 9.922062873840332, | |
| "learning_rate": 2.016374063865248e-06, | |
| "loss": 0.8115828037261963, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.5054945054945055, | |
| "grad_norm": 2.6232059001922607, | |
| "learning_rate": 2.0120810320736537e-06, | |
| "loss": 1.1120948791503906, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.507936507936508, | |
| "grad_norm": 1.4087735414505005, | |
| "learning_rate": 2.00779100836001e-06, | |
| "loss": 0.7034242153167725, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.5103785103785103, | |
| "grad_norm": 1.9080172777175903, | |
| "learning_rate": 2.003504010212317e-06, | |
| "loss": 1.0267211198806763, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.5128205128205128, | |
| "grad_norm": 8.740047454833984, | |
| "learning_rate": 1.99922005510624e-06, | |
| "loss": 0.31465768814086914, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.515262515262515, | |
| "grad_norm": 3.2588388919830322, | |
| "learning_rate": 1.9949391605050365e-06, | |
| "loss": 0.2918320596218109, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.5177045177045176, | |
| "grad_norm": 6.833197116851807, | |
| "learning_rate": 1.990661343859493e-06, | |
| "loss": 0.7108557224273682, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.52014652014652, | |
| "grad_norm": 6.0173821449279785, | |
| "learning_rate": 1.986386622607845e-06, | |
| "loss": 0.8981122374534607, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.5225885225885225, | |
| "grad_norm": 1.5162910223007202, | |
| "learning_rate": 1.9821150141757133e-06, | |
| "loss": 0.6950556039810181, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.525030525030525, | |
| "grad_norm": 3.4228689670562744, | |
| "learning_rate": 1.977846535976026e-06, | |
| "loss": 0.7832509875297546, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.5274725274725274, | |
| "grad_norm": 2.2627415657043457, | |
| "learning_rate": 1.9735812054089542e-06, | |
| "loss": 1.0561403036117554, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.52991452991453, | |
| "grad_norm": 14.761664390563965, | |
| "learning_rate": 1.969319039861835e-06, | |
| "loss": 0.6642997860908508, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.5323565323565322, | |
| "grad_norm": 1.6336398124694824, | |
| "learning_rate": 1.965060056709105e-06, | |
| "loss": 1.0829975605010986, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.5347985347985347, | |
| "grad_norm": 2.2617239952087402, | |
| "learning_rate": 1.960804273312228e-06, | |
| "loss": 0.8906936645507812, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.537240537240537, | |
| "grad_norm": 7.154871463775635, | |
| "learning_rate": 1.9565517070196248e-06, | |
| "loss": 1.0117489099502563, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.5396825396825395, | |
| "grad_norm": 7.616284370422363, | |
| "learning_rate": 1.9523023751665997e-06, | |
| "loss": 0.6691079139709473, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.542124542124542, | |
| "grad_norm": 2.257676839828491, | |
| "learning_rate": 1.9480562950752745e-06, | |
| "loss": 0.9914268255233765, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.5445665445665444, | |
| "grad_norm": 1.5886762142181396, | |
| "learning_rate": 1.9438134840545147e-06, | |
| "loss": 1.0071735382080078, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.547008547008547, | |
| "grad_norm": 1.6075587272644043, | |
| "learning_rate": 1.939573959399858e-06, | |
| "loss": 0.9144080281257629, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.5494505494505493, | |
| "grad_norm": 1.8543524742126465, | |
| "learning_rate": 1.9353377383934475e-06, | |
| "loss": 0.912468433380127, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.5518925518925517, | |
| "grad_norm": 2.2457010746002197, | |
| "learning_rate": 1.931104838303958e-06, | |
| "loss": 0.7604387998580933, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.554334554334554, | |
| "grad_norm": 1.9843976497650146, | |
| "learning_rate": 1.9268752763865285e-06, | |
| "loss": 0.691798210144043, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.5567765567765566, | |
| "grad_norm": 1.6185815334320068, | |
| "learning_rate": 1.9226490698826876e-06, | |
| "loss": 0.7290869951248169, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.559218559218559, | |
| "grad_norm": 1.5648012161254883, | |
| "learning_rate": 1.918426236020286e-06, | |
| "loss": 0.8694143295288086, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.5616605616605614, | |
| "grad_norm": 0.736847460269928, | |
| "learning_rate": 1.91420679201343e-06, | |
| "loss": 0.023200487717986107, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 2.0540359020233154, | |
| "learning_rate": 1.9099907550624034e-06, | |
| "loss": 0.6316545009613037, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.5665445665445663, | |
| "grad_norm": 4.105884075164795, | |
| "learning_rate": 1.9057781423536015e-06, | |
| "loss": 0.9644788503646851, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.5689865689865687, | |
| "grad_norm": 6.087828159332275, | |
| "learning_rate": 1.9015689710594627e-06, | |
| "loss": 0.6429115533828735, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 7.242172718048096, | |
| "learning_rate": 1.897363258338395e-06, | |
| "loss": 0.8835878968238831, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.5738705738705736, | |
| "grad_norm": 3.802177906036377, | |
| "learning_rate": 1.8931610213347096e-06, | |
| "loss": 0.6208938360214233, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.576312576312576, | |
| "grad_norm": 4.764630317687988, | |
| "learning_rate": 1.888962277178548e-06, | |
| "loss": 0.5702378749847412, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.578754578754579, | |
| "grad_norm": 1.3646184206008911, | |
| "learning_rate": 1.884767042985814e-06, | |
| "loss": 1.0015933513641357, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.5811965811965814, | |
| "grad_norm": 7.507686614990234, | |
| "learning_rate": 1.880575335858102e-06, | |
| "loss": 0.297787070274353, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.583638583638584, | |
| "grad_norm": 1.5217941999435425, | |
| "learning_rate": 1.8763871728826282e-06, | |
| "loss": 0.7149800658226013, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.586080586080586, | |
| "grad_norm": 1.9740521907806396, | |
| "learning_rate": 1.8722025711321657e-06, | |
| "loss": 0.998376190662384, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.5885225885225887, | |
| "grad_norm": 1.2570465803146362, | |
| "learning_rate": 1.8680215476649643e-06, | |
| "loss": 0.665241539478302, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.590964590964591, | |
| "grad_norm": 4.761254787445068, | |
| "learning_rate": 1.8638441195246915e-06, | |
| "loss": 0.9342296719551086, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.5934065934065935, | |
| "grad_norm": 2.0453293323516846, | |
| "learning_rate": 1.8596703037403573e-06, | |
| "loss": 0.8592435121536255, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.595848595848596, | |
| "grad_norm": 1.4386849403381348, | |
| "learning_rate": 1.8555001173262449e-06, | |
| "loss": 0.4735715985298157, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.5982905982905984, | |
| "grad_norm": 1.8832699060440063, | |
| "learning_rate": 1.8513335772818452e-06, | |
| "loss": 0.9801812171936035, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.600732600732601, | |
| "grad_norm": 2.282724380493164, | |
| "learning_rate": 1.8471707005917833e-06, | |
| "loss": 0.6964608430862427, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.6031746031746033, | |
| "grad_norm": 1.7532885074615479, | |
| "learning_rate": 1.8430115042257518e-06, | |
| "loss": 0.5790331959724426, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.6056166056166057, | |
| "grad_norm": 2.6640255451202393, | |
| "learning_rate": 1.838856005138438e-06, | |
| "loss": 0.9573779106140137, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.608058608058608, | |
| "grad_norm": 3.7161571979522705, | |
| "learning_rate": 1.8347042202694616e-06, | |
| "loss": 0.6422839760780334, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.6105006105006106, | |
| "grad_norm": 3.1850647926330566, | |
| "learning_rate": 1.8305561665432987e-06, | |
| "loss": 0.7944685816764832, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.612942612942613, | |
| "grad_norm": 1.816838264465332, | |
| "learning_rate": 1.8264118608692166e-06, | |
| "loss": 0.6552348136901855, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 2.1410024166107178, | |
| "learning_rate": 1.8222713201412034e-06, | |
| "loss": 0.9763152599334717, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.617826617826618, | |
| "grad_norm": 1.5893546342849731, | |
| "learning_rate": 1.818134561237901e-06, | |
| "loss": 0.9420812726020813, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.6202686202686203, | |
| "grad_norm": 8.81820297241211, | |
| "learning_rate": 1.814001601022533e-06, | |
| "loss": 0.9948893785476685, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.6227106227106227, | |
| "grad_norm": 1.6226983070373535, | |
| "learning_rate": 1.8098724563428383e-06, | |
| "loss": 0.6544241309165955, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.625152625152625, | |
| "grad_norm": 1.8730573654174805, | |
| "learning_rate": 1.8057471440310048e-06, | |
| "loss": 1.034470796585083, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.6275946275946276, | |
| "grad_norm": 1.7522205114364624, | |
| "learning_rate": 1.8016256809035932e-06, | |
| "loss": 1.0132882595062256, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.63003663003663, | |
| "grad_norm": 2.8230953216552734, | |
| "learning_rate": 1.7975080837614777e-06, | |
| "loss": 0.989703357219696, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.6324786324786325, | |
| "grad_norm": 2.739607334136963, | |
| "learning_rate": 1.79339436938977e-06, | |
| "loss": 0.9275919198989868, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.634920634920635, | |
| "grad_norm": 2.475738048553467, | |
| "learning_rate": 1.7892845545577547e-06, | |
| "loss": 0.7446354627609253, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.6373626373626373, | |
| "grad_norm": 3.7959513664245605, | |
| "learning_rate": 1.7851786560188223e-06, | |
| "loss": 0.5423752069473267, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.6398046398046398, | |
| "grad_norm": 2.074728488922119, | |
| "learning_rate": 1.7810766905103972e-06, | |
| "loss": 0.7950323820114136, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.642246642246642, | |
| "grad_norm": 1.902423620223999, | |
| "learning_rate": 1.776978674753868e-06, | |
| "loss": 0.48773831129074097, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.6446886446886446, | |
| "grad_norm": 1.4304314851760864, | |
| "learning_rate": 1.7728846254545285e-06, | |
| "loss": 0.9862061738967896, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.647130647130647, | |
| "grad_norm": 1.1472234725952148, | |
| "learning_rate": 1.7687945593014988e-06, | |
| "loss": 0.735059916973114, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.6495726495726495, | |
| "grad_norm": 1.9486029148101807, | |
| "learning_rate": 1.764708492967665e-06, | |
| "loss": 1.0259606838226318, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.652014652014652, | |
| "grad_norm": 1.9149264097213745, | |
| "learning_rate": 1.7606264431096048e-06, | |
| "loss": 1.0802158117294312, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.6544566544566544, | |
| "grad_norm": 6.957102298736572, | |
| "learning_rate": 1.7565484263675258e-06, | |
| "loss": 0.9875915050506592, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.656898656898657, | |
| "grad_norm": 2.418081283569336, | |
| "learning_rate": 1.7524744593651948e-06, | |
| "loss": 0.7961604595184326, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.659340659340659, | |
| "grad_norm": 2.5019962787628174, | |
| "learning_rate": 1.7484045587098681e-06, | |
| "loss": 1.029079556465149, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.6617826617826617, | |
| "grad_norm": 39.45793151855469, | |
| "learning_rate": 1.7443387409922266e-06, | |
| "loss": 1.0245277881622314, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.664224664224664, | |
| "grad_norm": 1.2770639657974243, | |
| "learning_rate": 1.740277022786309e-06, | |
| "loss": 1.0204907655715942, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 2.364436149597168, | |
| "learning_rate": 1.7362194206494421e-06, | |
| "loss": 0.6930133700370789, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.669108669108669, | |
| "grad_norm": 2.3246958255767822, | |
| "learning_rate": 1.732165951122171e-06, | |
| "loss": 1.0231374502182007, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.6715506715506714, | |
| "grad_norm": 1.607748031616211, | |
| "learning_rate": 1.7281166307281972e-06, | |
| "loss": 1.094809651374817, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.6739926739926743, | |
| "grad_norm": 2.158128261566162, | |
| "learning_rate": 1.7240714759743084e-06, | |
| "loss": 1.021047830581665, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.6764346764346767, | |
| "grad_norm": 8.213944435119629, | |
| "learning_rate": 1.7200305033503123e-06, | |
| "loss": 0.9594013094902039, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.678876678876679, | |
| "grad_norm": 4.63560676574707, | |
| "learning_rate": 1.7159937293289639e-06, | |
| "loss": 0.3299452066421509, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.6813186813186816, | |
| "grad_norm": 2.0286736488342285, | |
| "learning_rate": 1.711961170365909e-06, | |
| "loss": 1.214423418045044, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.683760683760684, | |
| "grad_norm": 1.665736198425293, | |
| "learning_rate": 1.707932842899605e-06, | |
| "loss": 0.9360992908477783, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.6862026862026864, | |
| "grad_norm": 1.7861151695251465, | |
| "learning_rate": 1.7039087633512652e-06, | |
| "loss": 0.9141231179237366, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.688644688644689, | |
| "grad_norm": 4.079377174377441, | |
| "learning_rate": 1.6998889481247827e-06, | |
| "loss": 0.6146577596664429, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.6910866910866913, | |
| "grad_norm": 3.120830535888672, | |
| "learning_rate": 1.6958734136066708e-06, | |
| "loss": 0.7842304110527039, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.6935286935286937, | |
| "grad_norm": 9.338458061218262, | |
| "learning_rate": 1.6918621761659885e-06, | |
| "loss": 0.4128279983997345, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.695970695970696, | |
| "grad_norm": 5.846481800079346, | |
| "learning_rate": 1.6878552521542825e-06, | |
| "loss": 0.909477710723877, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.6984126984126986, | |
| "grad_norm": 2.9776010513305664, | |
| "learning_rate": 1.6838526579055108e-06, | |
| "loss": 0.6446021795272827, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.700854700854701, | |
| "grad_norm": 2.117492914199829, | |
| "learning_rate": 1.679854409735989e-06, | |
| "loss": 0.9117352962493896, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.7032967032967035, | |
| "grad_norm": 2.0278072357177734, | |
| "learning_rate": 1.6758605239443083e-06, | |
| "loss": 0.6256328225135803, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.705738705738706, | |
| "grad_norm": 5.5183563232421875, | |
| "learning_rate": 1.6718710168112824e-06, | |
| "loss": 0.5338436365127563, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.7081807081807083, | |
| "grad_norm": 3.22253155708313, | |
| "learning_rate": 1.6678859045998724e-06, | |
| "loss": 0.6465069651603699, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.7106227106227108, | |
| "grad_norm": 8.712440490722656, | |
| "learning_rate": 1.663905203555125e-06, | |
| "loss": 0.3656350374221802, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.713064713064713, | |
| "grad_norm": 2.39136004447937, | |
| "learning_rate": 1.6599289299041067e-06, | |
| "loss": 0.5852014422416687, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.7155067155067156, | |
| "grad_norm": 3.29854416847229, | |
| "learning_rate": 1.6559570998558339e-06, | |
| "loss": 0.7199364900588989, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.717948717948718, | |
| "grad_norm": 1.553189754486084, | |
| "learning_rate": 1.6519897296012089e-06, | |
| "loss": 0.7559410333633423, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.7203907203907205, | |
| "grad_norm": 5.676231384277344, | |
| "learning_rate": 1.648026835312954e-06, | |
| "loss": 0.7857324481010437, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.722832722832723, | |
| "grad_norm": 2.2479665279388428, | |
| "learning_rate": 1.644068433145548e-06, | |
| "loss": 0.9991781711578369, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.7252747252747254, | |
| "grad_norm": 19.45795249938965, | |
| "learning_rate": 1.640114539235156e-06, | |
| "loss": 0.6020703911781311, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.727716727716728, | |
| "grad_norm": 1.4817429780960083, | |
| "learning_rate": 1.6361651696995633e-06, | |
| "loss": 1.0305383205413818, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.7301587301587302, | |
| "grad_norm": 3.4105312824249268, | |
| "learning_rate": 1.6322203406381158e-06, | |
| "loss": 1.0053908824920654, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.7326007326007327, | |
| "grad_norm": 1.9684903621673584, | |
| "learning_rate": 1.6282800681316485e-06, | |
| "loss": 0.9223586320877075, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.735042735042735, | |
| "grad_norm": 3.927523374557495, | |
| "learning_rate": 1.6243443682424211e-06, | |
| "loss": 0.6888905167579651, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.7374847374847375, | |
| "grad_norm": 9.635194778442383, | |
| "learning_rate": 1.6204132570140551e-06, | |
| "loss": 0.9834311008453369, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.73992673992674, | |
| "grad_norm": 2.742316722869873, | |
| "learning_rate": 1.616486750471466e-06, | |
| "loss": 0.5603131055831909, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.7423687423687424, | |
| "grad_norm": 2.2433788776397705, | |
| "learning_rate": 1.6125648646207992e-06, | |
| "loss": 0.7219388484954834, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.744810744810745, | |
| "grad_norm": 3.132955312728882, | |
| "learning_rate": 1.608647615449362e-06, | |
| "loss": 0.8298469185829163, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.7472527472527473, | |
| "grad_norm": 2.522810697555542, | |
| "learning_rate": 1.604735018925563e-06, | |
| "loss": 0.9102773070335388, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.7496947496947497, | |
| "grad_norm": 2.429370164871216, | |
| "learning_rate": 1.6008270909988414e-06, | |
| "loss": 0.9825899600982666, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.752136752136752, | |
| "grad_norm": 1.3979560136795044, | |
| "learning_rate": 1.596923847599611e-06, | |
| "loss": 0.694176197052002, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.7545787545787546, | |
| "grad_norm": 3.7129030227661133, | |
| "learning_rate": 1.593025304639183e-06, | |
| "loss": 0.7678108811378479, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.757020757020757, | |
| "grad_norm": 58.61724090576172, | |
| "learning_rate": 1.5891314780097123e-06, | |
| "loss": 0.9679561853408813, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.7594627594627594, | |
| "grad_norm": 3.2823469638824463, | |
| "learning_rate": 1.585242383584124e-06, | |
| "loss": 1.0787243843078613, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 4.105648517608643, | |
| "learning_rate": 1.5813580372160558e-06, | |
| "loss": 1.0055099725723267, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.7643467643467643, | |
| "grad_norm": 1.8101457357406616, | |
| "learning_rate": 1.5774784547397898e-06, | |
| "loss": 0.9336439967155457, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.7667887667887667, | |
| "grad_norm": 3.130258798599243, | |
| "learning_rate": 1.5736036519701876e-06, | |
| "loss": 0.912263035774231, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.9224950075149536, | |
| "learning_rate": 1.5697336447026257e-06, | |
| "loss": 0.7292864918708801, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.7716727716727716, | |
| "grad_norm": 1.59903085231781, | |
| "learning_rate": 1.565868448712935e-06, | |
| "loss": 0.593657374382019, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.774114774114774, | |
| "grad_norm": 1.2354272603988647, | |
| "learning_rate": 1.562008079757329e-06, | |
| "loss": 0.2578456699848175, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.7765567765567765, | |
| "grad_norm": 1.8744750022888184, | |
| "learning_rate": 1.5581525535723502e-06, | |
| "loss": 1.0456628799438477, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.778998778998779, | |
| "grad_norm": 1.6361182928085327, | |
| "learning_rate": 1.5543018858747943e-06, | |
| "loss": 0.9015727043151855, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.7814407814407813, | |
| "grad_norm": 2.5508196353912354, | |
| "learning_rate": 1.550456092361655e-06, | |
| "loss": 0.5647008419036865, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.7838827838827838, | |
| "grad_norm": 5.802591323852539, | |
| "learning_rate": 1.546615188710055e-06, | |
| "loss": 0.341159850358963, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.786324786324786, | |
| "grad_norm": 2.65018630027771, | |
| "learning_rate": 1.5427791905771843e-06, | |
| "loss": 1.0216097831726074, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.7887667887667886, | |
| "grad_norm": 4.260854721069336, | |
| "learning_rate": 1.538948113600237e-06, | |
| "loss": 0.8784246444702148, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.791208791208791, | |
| "grad_norm": 3.1193864345550537, | |
| "learning_rate": 1.5351219733963453e-06, | |
| "loss": 0.9552139043807983, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.7936507936507935, | |
| "grad_norm": 1.618998646736145, | |
| "learning_rate": 1.5313007855625153e-06, | |
| "loss": 0.9732692241668701, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.796092796092796, | |
| "grad_norm": 6.491037368774414, | |
| "learning_rate": 1.5274845656755687e-06, | |
| "loss": 0.3624776303768158, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.7985347985347984, | |
| "grad_norm": 2.8097808361053467, | |
| "learning_rate": 1.5236733292920735e-06, | |
| "loss": 0.8098872303962708, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.800976800976801, | |
| "grad_norm": 2.353226900100708, | |
| "learning_rate": 1.5198670919482839e-06, | |
| "loss": 0.7608856558799744, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.8034188034188032, | |
| "grad_norm": 1.7207751274108887, | |
| "learning_rate": 1.5160658691600737e-06, | |
| "loss": 0.8960850834846497, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.8058608058608057, | |
| "grad_norm": 2.5294456481933594, | |
| "learning_rate": 1.5122696764228772e-06, | |
| "loss": 0.40981659293174744, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.808302808302808, | |
| "grad_norm": 2.1142632961273193, | |
| "learning_rate": 1.5084785292116244e-06, | |
| "loss": 0.6546359658241272, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.8107448107448105, | |
| "grad_norm": 2.852811574935913, | |
| "learning_rate": 1.5046924429806747e-06, | |
| "loss": 1.049178123474121, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.813186813186813, | |
| "grad_norm": 0.45758649706840515, | |
| "learning_rate": 1.50091143316376e-06, | |
| "loss": 0.5754284262657166, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.8156288156288154, | |
| "grad_norm": 1.5217318534851074, | |
| "learning_rate": 1.497135515173917e-06, | |
| "loss": 0.7435483336448669, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.818070818070818, | |
| "grad_norm": 2.425044298171997, | |
| "learning_rate": 1.4933647044034264e-06, | |
| "loss": 0.6329599618911743, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 2.1778621673583984, | |
| "learning_rate": 1.489599016223748e-06, | |
| "loss": 1.040429949760437, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.8229548229548227, | |
| "grad_norm": 1.7928675413131714, | |
| "learning_rate": 1.485838465985463e-06, | |
| "loss": 0.6599953770637512, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 2.825396825396825, | |
| "grad_norm": 6.531580924987793, | |
| "learning_rate": 1.482083069018203e-06, | |
| "loss": 0.7039975523948669, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 2.8278388278388276, | |
| "grad_norm": 2.4885547161102295, | |
| "learning_rate": 1.4783328406306002e-06, | |
| "loss": 0.7224160432815552, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 2.8302808302808304, | |
| "grad_norm": 2.634704351425171, | |
| "learning_rate": 1.4745877961102096e-06, | |
| "loss": 1.0425044298171997, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 2.832722832722833, | |
| "grad_norm": 1.6846414804458618, | |
| "learning_rate": 1.4708479507234596e-06, | |
| "loss": 0.6160850524902344, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.8351648351648353, | |
| "grad_norm": 6.159206390380859, | |
| "learning_rate": 1.4671133197155817e-06, | |
| "loss": 0.6913861036300659, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 2.8376068376068377, | |
| "grad_norm": 0.6180989146232605, | |
| "learning_rate": 1.4633839183105531e-06, | |
| "loss": 0.19272488355636597, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 2.84004884004884, | |
| "grad_norm": 1.4327154159545898, | |
| "learning_rate": 1.4596597617110327e-06, | |
| "loss": 0.8577545285224915, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 2.8424908424908426, | |
| "grad_norm": 2.3003664016723633, | |
| "learning_rate": 1.4559408650982999e-06, | |
| "loss": 0.8021556735038757, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 2.844932844932845, | |
| "grad_norm": 1.9625604152679443, | |
| "learning_rate": 1.4522272436321893e-06, | |
| "loss": 0.8357652425765991, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.8473748473748475, | |
| "grad_norm": 1.8401294946670532, | |
| "learning_rate": 1.4485189124510355e-06, | |
| "loss": 1.0011165142059326, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 2.84981684981685, | |
| "grad_norm": 2.622108221054077, | |
| "learning_rate": 1.4448158866716028e-06, | |
| "loss": 0.15081661939620972, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 2.8522588522588523, | |
| "grad_norm": 2.198842763900757, | |
| "learning_rate": 1.441118181389035e-06, | |
| "loss": 1.0237456560134888, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 2.8547008547008548, | |
| "grad_norm": 1.9740854501724243, | |
| "learning_rate": 1.437425811676781e-06, | |
| "loss": 0.6290860176086426, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 1.7903653383255005, | |
| "learning_rate": 1.4337387925865435e-06, | |
| "loss": 1.0167012214660645, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.8595848595848596, | |
| "grad_norm": 1.402300477027893, | |
| "learning_rate": 1.430057139148211e-06, | |
| "loss": 1.0619688034057617, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 2.862026862026862, | |
| "grad_norm": 5.845098972320557, | |
| "learning_rate": 1.4263808663698015e-06, | |
| "loss": 0.3327184319496155, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 2.8644688644688645, | |
| "grad_norm": 3.894296169281006, | |
| "learning_rate": 1.4227099892373986e-06, | |
| "loss": 0.9415085911750793, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 2.866910866910867, | |
| "grad_norm": 0.5116417407989502, | |
| "learning_rate": 1.4190445227150907e-06, | |
| "loss": 0.5154658555984497, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 2.8693528693528694, | |
| "grad_norm": 1.3756489753723145, | |
| "learning_rate": 1.4153844817449087e-06, | |
| "loss": 0.6424716114997864, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.871794871794872, | |
| "grad_norm": 1.572013020515442, | |
| "learning_rate": 1.4117298812467687e-06, | |
| "loss": 0.7699521780014038, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.8742368742368742, | |
| "grad_norm": 51.821266174316406, | |
| "learning_rate": 1.4080807361184088e-06, | |
| "loss": 0.5482099652290344, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 2.8766788766788767, | |
| "grad_norm": 1.6499661207199097, | |
| "learning_rate": 1.4044370612353281e-06, | |
| "loss": 0.906887412071228, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 2.879120879120879, | |
| "grad_norm": 4.545080661773682, | |
| "learning_rate": 1.400798871450726e-06, | |
| "loss": 0.784338653087616, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 2.8815628815628815, | |
| "grad_norm": 2.03543758392334, | |
| "learning_rate": 1.397166181595443e-06, | |
| "loss": 0.5577901005744934, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.884004884004884, | |
| "grad_norm": 5.208932399749756, | |
| "learning_rate": 1.3935390064779008e-06, | |
| "loss": 0.7476451992988586, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 2.8864468864468864, | |
| "grad_norm": 2.228670835494995, | |
| "learning_rate": 1.3899173608840378e-06, | |
| "loss": 1.051893949508667, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 1.407289743423462, | |
| "learning_rate": 1.3863012595772531e-06, | |
| "loss": 1.076530933380127, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 2.8913308913308913, | |
| "grad_norm": 22.543790817260742, | |
| "learning_rate": 1.3826907172983456e-06, | |
| "loss": 0.846904993057251, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 2.8937728937728937, | |
| "grad_norm": 1.6555728912353516, | |
| "learning_rate": 1.3790857487654535e-06, | |
| "loss": 1.1604909896850586, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.896214896214896, | |
| "grad_norm": 2.013773202896118, | |
| "learning_rate": 1.3754863686739906e-06, | |
| "loss": 0.915320634841919, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 2.8986568986568986, | |
| "grad_norm": 2.000242233276367, | |
| "learning_rate": 1.3718925916965945e-06, | |
| "loss": 0.7186045050621033, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.901098901098901, | |
| "grad_norm": 1.6389049291610718, | |
| "learning_rate": 1.3683044324830573e-06, | |
| "loss": 0.9410088658332825, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.9035409035409034, | |
| "grad_norm": 2.346830129623413, | |
| "learning_rate": 1.3647219056602757e-06, | |
| "loss": 1.0101977586746216, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.905982905982906, | |
| "grad_norm": 1.5173600912094116, | |
| "learning_rate": 1.361145025832182e-06, | |
| "loss": 0.8229511976242065, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.9084249084249083, | |
| "grad_norm": 2.14473557472229, | |
| "learning_rate": 1.3575738075796923e-06, | |
| "loss": 0.9482402801513672, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.9108669108669107, | |
| "grad_norm": 1.9663830995559692, | |
| "learning_rate": 1.35400826546064e-06, | |
| "loss": 0.9494956135749817, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.913308913308913, | |
| "grad_norm": 5.7204909324646, | |
| "learning_rate": 1.350448414009723e-06, | |
| "loss": 1.0107911825180054, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.9157509157509156, | |
| "grad_norm": 2.0800321102142334, | |
| "learning_rate": 1.3468942677384408e-06, | |
| "loss": 0.8393886089324951, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.918192918192918, | |
| "grad_norm": 6.8317461013793945, | |
| "learning_rate": 1.343345841135037e-06, | |
| "loss": 0.46943965554237366, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.9206349206349205, | |
| "grad_norm": 3.705573081970215, | |
| "learning_rate": 1.3398031486644366e-06, | |
| "loss": 0.5753905177116394, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.9230769230769234, | |
| "grad_norm": 1.4765284061431885, | |
| "learning_rate": 1.3362662047681928e-06, | |
| "loss": 0.8073123097419739, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.925518925518926, | |
| "grad_norm": 8.290820121765137, | |
| "learning_rate": 1.3327350238644224e-06, | |
| "loss": 0.6432682871818542, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.927960927960928, | |
| "grad_norm": 33.93027877807617, | |
| "learning_rate": 1.3292096203477533e-06, | |
| "loss": 0.6455587148666382, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.9304029304029307, | |
| "grad_norm": 15.2852201461792, | |
| "learning_rate": 1.3256900085892584e-06, | |
| "loss": 0.5954673290252686, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.932844932844933, | |
| "grad_norm": 2.53251051902771, | |
| "learning_rate": 1.3221762029364043e-06, | |
| "loss": 0.656650960445404, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.9352869352869355, | |
| "grad_norm": 3.878647565841675, | |
| "learning_rate": 1.3186682177129862e-06, | |
| "loss": 0.3129318654537201, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.937728937728938, | |
| "grad_norm": 3.8784711360931396, | |
| "learning_rate": 1.3151660672190744e-06, | |
| "loss": 1.0069366693496704, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.9401709401709404, | |
| "grad_norm": 1.668820858001709, | |
| "learning_rate": 1.3116697657309547e-06, | |
| "loss": 0.7313091158866882, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.942612942612943, | |
| "grad_norm": 2.024500846862793, | |
| "learning_rate": 1.3081793275010699e-06, | |
| "loss": 0.6760754585266113, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.9450549450549453, | |
| "grad_norm": 1.469117522239685, | |
| "learning_rate": 1.3046947667579596e-06, | |
| "loss": 0.9695707559585571, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.9474969474969477, | |
| "grad_norm": 2.2661683559417725, | |
| "learning_rate": 1.301216097706206e-06, | |
| "loss": 1.0303492546081543, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.94993894993895, | |
| "grad_norm": 1.312300682067871, | |
| "learning_rate": 1.2977433345263752e-06, | |
| "loss": 0.9242293238639832, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.9523809523809526, | |
| "grad_norm": 2.9520680904388428, | |
| "learning_rate": 1.2942764913749544e-06, | |
| "loss": 0.6899678707122803, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.954822954822955, | |
| "grad_norm": 1.741718053817749, | |
| "learning_rate": 1.2908155823843033e-06, | |
| "loss": 0.9872897267341614, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.9572649572649574, | |
| "grad_norm": 1.809373378753662, | |
| "learning_rate": 1.2873606216625879e-06, | |
| "loss": 0.8448399305343628, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.95970695970696, | |
| "grad_norm": 2.35263991355896, | |
| "learning_rate": 1.2839116232937271e-06, | |
| "loss": 0.5212328433990479, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.9621489621489623, | |
| "grad_norm": 2.598365068435669, | |
| "learning_rate": 1.280468601337335e-06, | |
| "loss": 1.1081678867340088, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.9645909645909647, | |
| "grad_norm": 11.081291198730469, | |
| "learning_rate": 1.2770315698286643e-06, | |
| "loss": 0.5913952589035034, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.967032967032967, | |
| "grad_norm": 2.6343994140625, | |
| "learning_rate": 1.273600542778546e-06, | |
| "loss": 0.9255035519599915, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.9694749694749696, | |
| "grad_norm": 2.8472864627838135, | |
| "learning_rate": 1.2701755341733363e-06, | |
| "loss": 0.8645012378692627, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.971916971916972, | |
| "grad_norm": 1.3869469165802002, | |
| "learning_rate": 1.2667565579748552e-06, | |
| "loss": 0.9598724246025085, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.9743589743589745, | |
| "grad_norm": 1.7193245887756348, | |
| "learning_rate": 1.2633436281203353e-06, | |
| "loss": 0.5284073948860168, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.976800976800977, | |
| "grad_norm": 4.513336181640625, | |
| "learning_rate": 1.2599367585223573e-06, | |
| "loss": 0.5111241340637207, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.9792429792429793, | |
| "grad_norm": 3.2821226119995117, | |
| "learning_rate": 1.2565359630688029e-06, | |
| "loss": 0.9840971231460571, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.9816849816849818, | |
| "grad_norm": 1.6997895240783691, | |
| "learning_rate": 1.2531412556227883e-06, | |
| "loss": 1.0207282304763794, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.984126984126984, | |
| "grad_norm": 2.6019296646118164, | |
| "learning_rate": 1.2497526500226163e-06, | |
| "loss": 0.940024197101593, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.9865689865689866, | |
| "grad_norm": 2.273214101791382, | |
| "learning_rate": 1.246370160081711e-06, | |
| "loss": 0.9067605137825012, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.989010989010989, | |
| "grad_norm": 1.8762654066085815, | |
| "learning_rate": 1.2429937995885713e-06, | |
| "loss": 0.93479323387146, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.9914529914529915, | |
| "grad_norm": 5.84881067276001, | |
| "learning_rate": 1.2396235823067076e-06, | |
| "loss": 0.6413801312446594, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.993894993894994, | |
| "grad_norm": 1.7108796834945679, | |
| "learning_rate": 1.2362595219745882e-06, | |
| "loss": 1.0565381050109863, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.9963369963369964, | |
| "grad_norm": 1.5569299459457397, | |
| "learning_rate": 1.2329016323055822e-06, | |
| "loss": 0.9824570417404175, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.998778998778999, | |
| "grad_norm": 2.560699939727783, | |
| "learning_rate": 1.2295499269879063e-06, | |
| "loss": 0.5337162613868713, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 3.001221001221001, | |
| "grad_norm": 3.085305690765381, | |
| "learning_rate": 1.2262044196845638e-06, | |
| "loss": 0.6332882046699524, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 3.0036630036630036, | |
| "grad_norm": 3.3725106716156006, | |
| "learning_rate": 1.2228651240332972e-06, | |
| "loss": 0.62852543592453, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 3.006105006105006, | |
| "grad_norm": 1.6869263648986816, | |
| "learning_rate": 1.2195320536465225e-06, | |
| "loss": 1.0432286262512207, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 3.0085470085470085, | |
| "grad_norm": 1.3357821702957153, | |
| "learning_rate": 1.2162052221112828e-06, | |
| "loss": 0.962488055229187, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 3.010989010989011, | |
| "grad_norm": 4.632596492767334, | |
| "learning_rate": 1.2128846429891852e-06, | |
| "loss": 0.5416973233222961, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 3.0134310134310134, | |
| "grad_norm": 1.600440502166748, | |
| "learning_rate": 1.2095703298163526e-06, | |
| "loss": 0.8857253789901733, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 3.015873015873016, | |
| "grad_norm": 0.576468288898468, | |
| "learning_rate": 1.2062622961033632e-06, | |
| "loss": 0.2631528675556183, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 3.0183150183150182, | |
| "grad_norm": 5.257506370544434, | |
| "learning_rate": 1.2029605553351988e-06, | |
| "loss": 0.3512267470359802, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 3.0207570207570207, | |
| "grad_norm": 2.005457639694214, | |
| "learning_rate": 1.199665120971188e-06, | |
| "loss": 0.9261833429336548, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 3.023199023199023, | |
| "grad_norm": 5.405751705169678, | |
| "learning_rate": 1.1963760064449495e-06, | |
| "loss": 0.5271846652030945, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 3.0256410256410255, | |
| "grad_norm": 1.659690499305725, | |
| "learning_rate": 1.1930932251643438e-06, | |
| "loss": 0.6160858869552612, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 3.028083028083028, | |
| "grad_norm": 1.8383840322494507, | |
| "learning_rate": 1.189816790511409e-06, | |
| "loss": 0.8536359667778015, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 3.0305250305250304, | |
| "grad_norm": 2.1919424533843994, | |
| "learning_rate": 1.1865467158423179e-06, | |
| "loss": 0.9045109152793884, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 3.032967032967033, | |
| "grad_norm": 1.5028966665267944, | |
| "learning_rate": 1.1832830144873122e-06, | |
| "loss": 0.6014432907104492, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 3.0354090354090353, | |
| "grad_norm": 2.3299906253814697, | |
| "learning_rate": 1.1800256997506557e-06, | |
| "loss": 0.8661763072013855, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 3.0378510378510377, | |
| "grad_norm": 9.991959571838379, | |
| "learning_rate": 1.176774784910576e-06, | |
| "loss": 0.6161713600158691, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 3.04029304029304, | |
| "grad_norm": 2.847564697265625, | |
| "learning_rate": 1.1735302832192135e-06, | |
| "loss": 0.8722133636474609, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 3.0427350427350426, | |
| "grad_norm": 2.7239389419555664, | |
| "learning_rate": 1.1702922079025647e-06, | |
| "loss": 0.3192221522331238, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 3.045177045177045, | |
| "grad_norm": 0.7756720185279846, | |
| "learning_rate": 1.1670605721604307e-06, | |
| "loss": 0.2883589565753937, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 3.0476190476190474, | |
| "grad_norm": 4.5272135734558105, | |
| "learning_rate": 1.1638353891663602e-06, | |
| "loss": 0.6891329288482666, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 3.05006105006105, | |
| "grad_norm": 4.521149635314941, | |
| "learning_rate": 1.1606166720675999e-06, | |
| "loss": 0.45780226588249207, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 3.0525030525030523, | |
| "grad_norm": 0.8906940221786499, | |
| "learning_rate": 1.157404433985035e-06, | |
| "loss": 0.5027573704719543, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.0549450549450547, | |
| "grad_norm": 9.020967483520508, | |
| "learning_rate": 1.1541986880131455e-06, | |
| "loss": 0.4361349642276764, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 3.057387057387057, | |
| "grad_norm": 2.3300914764404297, | |
| "learning_rate": 1.1509994472199407e-06, | |
| "loss": 0.8963256478309631, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 3.0598290598290596, | |
| "grad_norm": 2.031867027282715, | |
| "learning_rate": 1.1478067246469158e-06, | |
| "loss": 0.4999798536300659, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 3.062271062271062, | |
| "grad_norm": 1.6989192962646484, | |
| "learning_rate": 1.1446205333089922e-06, | |
| "loss": 0.7561573386192322, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 3.064713064713065, | |
| "grad_norm": 4.123907089233398, | |
| "learning_rate": 1.1414408861944695e-06, | |
| "loss": 0.8584511876106262, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 3.0671550671550674, | |
| "grad_norm": 0.06403572857379913, | |
| "learning_rate": 1.1382677962649687e-06, | |
| "loss": 0.3911321461200714, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 3.06959706959707, | |
| "grad_norm": 4.289177894592285, | |
| "learning_rate": 1.1351012764553828e-06, | |
| "loss": 0.8152522444725037, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 3.0720390720390722, | |
| "grad_norm": 2.2127068042755127, | |
| "learning_rate": 1.1319413396738188e-06, | |
| "loss": 0.5816116333007812, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 3.0744810744810747, | |
| "grad_norm": 0.09444202482700348, | |
| "learning_rate": 1.128787998801552e-06, | |
| "loss": 0.20017878711223602, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 2.8026583194732666, | |
| "learning_rate": 1.1256412666929655e-06, | |
| "loss": 0.514468789100647, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 3.0793650793650795, | |
| "grad_norm": 2.7216711044311523, | |
| "learning_rate": 1.1225011561755093e-06, | |
| "loss": 0.6835171580314636, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 3.081807081807082, | |
| "grad_norm": 2.2049448490142822, | |
| "learning_rate": 1.1193676800496326e-06, | |
| "loss": 0.8667712211608887, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 3.0842490842490844, | |
| "grad_norm": 11.64513111114502, | |
| "learning_rate": 1.1162408510887469e-06, | |
| "loss": 0.5643727779388428, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 3.086691086691087, | |
| "grad_norm": 2.371492862701416, | |
| "learning_rate": 1.1131206820391618e-06, | |
| "loss": 0.5264307856559753, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 3.0891330891330893, | |
| "grad_norm": 8.419890403747559, | |
| "learning_rate": 1.1100071856200413e-06, | |
| "loss": 0.11923594772815704, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 3.0915750915750917, | |
| "grad_norm": 4.913514137268066, | |
| "learning_rate": 1.106900374523348e-06, | |
| "loss": 0.32799002528190613, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 3.094017094017094, | |
| "grad_norm": 2.237429141998291, | |
| "learning_rate": 1.1038002614137922e-06, | |
| "loss": 0.8726149797439575, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 3.0964590964590966, | |
| "grad_norm": 5.674133777618408, | |
| "learning_rate": 1.1007068589287814e-06, | |
| "loss": 0.635856568813324, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 3.098901098901099, | |
| "grad_norm": 24.948986053466797, | |
| "learning_rate": 1.0976201796783642e-06, | |
| "loss": 0.6740862131118774, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 3.1013431013431014, | |
| "grad_norm": 2.047201633453369, | |
| "learning_rate": 1.0945402362451871e-06, | |
| "loss": 0.9215976595878601, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 3.103785103785104, | |
| "grad_norm": 6.655368328094482, | |
| "learning_rate": 1.0914670411844338e-06, | |
| "loss": 0.559134304523468, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 3.1062271062271063, | |
| "grad_norm": 1.7727597951889038, | |
| "learning_rate": 1.0884006070237834e-06, | |
| "loss": 0.5720962285995483, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 3.1086691086691087, | |
| "grad_norm": 3.6166863441467285, | |
| "learning_rate": 1.0853409462633507e-06, | |
| "loss": 0.16919654607772827, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 3.111111111111111, | |
| "grad_norm": 2.884989023208618, | |
| "learning_rate": 1.0822880713756422e-06, | |
| "loss": 0.639471173286438, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 3.1135531135531136, | |
| "grad_norm": 1.7115203142166138, | |
| "learning_rate": 1.0792419948054994e-06, | |
| "loss": 0.6552147269248962, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.115995115995116, | |
| "grad_norm": 3.130906343460083, | |
| "learning_rate": 1.0762027289700527e-06, | |
| "loss": 0.2590104043483734, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 3.1184371184371185, | |
| "grad_norm": 2.566354751586914, | |
| "learning_rate": 1.0731702862586686e-06, | |
| "loss": 0.8442977666854858, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 3.120879120879121, | |
| "grad_norm": 2.08247709274292, | |
| "learning_rate": 1.070144679032901e-06, | |
| "loss": 0.37470126152038574, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 3.1233211233211233, | |
| "grad_norm": 0.45577648282051086, | |
| "learning_rate": 1.0671259196264355e-06, | |
| "loss": 0.4773566722869873, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 3.1257631257631258, | |
| "grad_norm": 2.147977590560913, | |
| "learning_rate": 1.064114020345048e-06, | |
| "loss": 0.847014844417572, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 3.128205128205128, | |
| "grad_norm": 6.595324516296387, | |
| "learning_rate": 1.0611089934665438e-06, | |
| "loss": 1.0399620532989502, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 3.1306471306471306, | |
| "grad_norm": 3.0821518898010254, | |
| "learning_rate": 1.0581108512407206e-06, | |
| "loss": 0.8594496250152588, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 3.133089133089133, | |
| "grad_norm": 6.90889310836792, | |
| "learning_rate": 1.055119605889304e-06, | |
| "loss": 0.6531029939651489, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 3.1355311355311355, | |
| "grad_norm": 5.536701679229736, | |
| "learning_rate": 1.0521352696059106e-06, | |
| "loss": 0.8756755590438843, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 3.137973137973138, | |
| "grad_norm": 7.192801475524902, | |
| "learning_rate": 1.0491578545559882e-06, | |
| "loss": 0.5930169820785522, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 3.1404151404151404, | |
| "grad_norm": 5.717547416687012, | |
| "learning_rate": 1.0461873728767735e-06, | |
| "loss": 0.6029551029205322, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 3.142857142857143, | |
| "grad_norm": 4.090261936187744, | |
| "learning_rate": 1.043223836677239e-06, | |
| "loss": 0.8777113556861877, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 3.1452991452991452, | |
| "grad_norm": 14.0100736618042, | |
| "learning_rate": 1.040267258038045e-06, | |
| "loss": 0.9692713022232056, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 3.1477411477411477, | |
| "grad_norm": 3.899435520172119, | |
| "learning_rate": 1.0373176490114874e-06, | |
| "loss": 0.8949326276779175, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 3.15018315018315, | |
| "grad_norm": 5.538814067840576, | |
| "learning_rate": 1.0343750216214546e-06, | |
| "loss": 0.8762179017066956, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 3.1526251526251525, | |
| "grad_norm": 7.619011402130127, | |
| "learning_rate": 1.0314393878633705e-06, | |
| "loss": 0.7504989504814148, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 3.155067155067155, | |
| "grad_norm": 3.597076416015625, | |
| "learning_rate": 1.0285107597041552e-06, | |
| "loss": 0.31154295802116394, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 3.1575091575091574, | |
| "grad_norm": 3.5088987350463867, | |
| "learning_rate": 1.0255891490821657e-06, | |
| "loss": 0.6339558362960815, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 3.15995115995116, | |
| "grad_norm": 5.022501468658447, | |
| "learning_rate": 1.0226745679071555e-06, | |
| "loss": 0.328271746635437, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 3.1623931623931623, | |
| "grad_norm": 4.233664035797119, | |
| "learning_rate": 1.0197670280602234e-06, | |
| "loss": 0.35303497314453125, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 3.1648351648351647, | |
| "grad_norm": 2.7248518466949463, | |
| "learning_rate": 1.016866541393762e-06, | |
| "loss": 0.8729944825172424, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 3.167277167277167, | |
| "grad_norm": 2.3809876441955566, | |
| "learning_rate": 1.0139731197314144e-06, | |
| "loss": 0.7970367074012756, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 3.1697191697191696, | |
| "grad_norm": 5.26347017288208, | |
| "learning_rate": 1.0110867748680229e-06, | |
| "loss": 0.6249693632125854, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 3.172161172161172, | |
| "grad_norm": 2.0786538124084473, | |
| "learning_rate": 1.0082075185695821e-06, | |
| "loss": 0.8957004547119141, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 3.1746031746031744, | |
| "grad_norm": 2.350102424621582, | |
| "learning_rate": 1.0053353625731898e-06, | |
| "loss": 0.773188591003418, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.177045177045177, | |
| "grad_norm": 2.2141921520233154, | |
| "learning_rate": 1.0024703185870009e-06, | |
| "loss": 0.8564462065696716, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 3.1794871794871793, | |
| "grad_norm": 1.9684959650039673, | |
| "learning_rate": 9.99612398290176e-07, | |
| "loss": 0.8819740414619446, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 3.1819291819291817, | |
| "grad_norm": 9.041963577270508, | |
| "learning_rate": 9.967616133328415e-07, | |
| "loss": 0.6753929257392883, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 3.1843711843711846, | |
| "grad_norm": 3.164386510848999, | |
| "learning_rate": 9.939179753360317e-07, | |
| "loss": 0.9383725523948669, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 3.186813186813187, | |
| "grad_norm": 1.6950500011444092, | |
| "learning_rate": 9.910814958916509e-07, | |
| "loss": 0.8148356676101685, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 3.1892551892551895, | |
| "grad_norm": 1.7868210077285767, | |
| "learning_rate": 9.882521865624188e-07, | |
| "loss": 0.8345255255699158, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 3.191697191697192, | |
| "grad_norm": 2.3038735389709473, | |
| "learning_rate": 9.854300588818285e-07, | |
| "loss": 0.5892983078956604, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 3.1941391941391943, | |
| "grad_norm": 3.3323488235473633, | |
| "learning_rate": 9.826151243540976e-07, | |
| "loss": 0.5326892137527466, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 3.1965811965811968, | |
| "grad_norm": 5.719020366668701, | |
| "learning_rate": 9.798073944541209e-07, | |
| "loss": 0.5761935114860535, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 3.199023199023199, | |
| "grad_norm": 4.963831424713135, | |
| "learning_rate": 9.77006880627423e-07, | |
| "loss": 0.35491544008255005, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 3.2014652014652016, | |
| "grad_norm": 1.7607579231262207, | |
| "learning_rate": 9.742135942901152e-07, | |
| "loss": 0.5363562703132629, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 3.203907203907204, | |
| "grad_norm": 15.481447219848633, | |
| "learning_rate": 9.714275468288426e-07, | |
| "loss": 0.43480369448661804, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 3.2063492063492065, | |
| "grad_norm": 2.512791633605957, | |
| "learning_rate": 9.68648749600746e-07, | |
| "loss": 0.9965137839317322, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 3.208791208791209, | |
| "grad_norm": 38.066707611083984, | |
| "learning_rate": 9.658772139334074e-07, | |
| "loss": 0.227127343416214, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 3.2112332112332114, | |
| "grad_norm": 2.2914047241210938, | |
| "learning_rate": 9.631129511248099e-07, | |
| "loss": 0.9076048135757446, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 3.213675213675214, | |
| "grad_norm": 3.0350992679595947, | |
| "learning_rate": 9.603559724432874e-07, | |
| "loss": 0.5686833262443542, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 3.2161172161172162, | |
| "grad_norm": 2.278398036956787, | |
| "learning_rate": 9.576062891274816e-07, | |
| "loss": 0.6908602714538574, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 3.2185592185592187, | |
| "grad_norm": 7.601328372955322, | |
| "learning_rate": 9.548639123862952e-07, | |
| "loss": 0.81014084815979, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 3.221001221001221, | |
| "grad_norm": 4.974308490753174, | |
| "learning_rate": 9.52128853398847e-07, | |
| "loss": 0.6480343341827393, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 3.2234432234432235, | |
| "grad_norm": 5.386457443237305, | |
| "learning_rate": 9.494011233144227e-07, | |
| "loss": 0.6495685577392578, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 3.225885225885226, | |
| "grad_norm": 3.7221124172210693, | |
| "learning_rate": 9.466807332524343e-07, | |
| "loss": 0.885014533996582, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 3.2283272283272284, | |
| "grad_norm": 2.109729051589966, | |
| "learning_rate": 9.439676943023732e-07, | |
| "loss": 0.8729287385940552, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 3.230769230769231, | |
| "grad_norm": 1.1997343301773071, | |
| "learning_rate": 9.412620175237621e-07, | |
| "loss": 0.913487434387207, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 3.2332112332112333, | |
| "grad_norm": 5.304439544677734, | |
| "learning_rate": 9.385637139461151e-07, | |
| "loss": 0.9510135650634766, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 3.2356532356532357, | |
| "grad_norm": 8.256377220153809, | |
| "learning_rate": 9.358727945688877e-07, | |
| "loss": 0.2964293956756592, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.238095238095238, | |
| "grad_norm": 2.2458744049072266, | |
| "learning_rate": 9.331892703614359e-07, | |
| "loss": 0.8582343459129333, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 3.2405372405372406, | |
| "grad_norm": 2.5474815368652344, | |
| "learning_rate": 9.305131522629679e-07, | |
| "loss": 1.0781978368759155, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 3.242979242979243, | |
| "grad_norm": 1.53843092918396, | |
| "learning_rate": 9.27844451182503e-07, | |
| "loss": 0.4822746217250824, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 3.2454212454212454, | |
| "grad_norm": 5.753013610839844, | |
| "learning_rate": 9.251831779988252e-07, | |
| "loss": 0.3543876111507416, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 3.247863247863248, | |
| "grad_norm": 6.03514289855957, | |
| "learning_rate": 9.22529343560439e-07, | |
| "loss": 0.5339376330375671, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 3.2503052503052503, | |
| "grad_norm": 3.018615245819092, | |
| "learning_rate": 9.19882958685524e-07, | |
| "loss": 1.2899425029754639, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 3.2527472527472527, | |
| "grad_norm": 4.987201690673828, | |
| "learning_rate": 9.172440341618951e-07, | |
| "loss": 0.6661590337753296, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 3.255189255189255, | |
| "grad_norm": 3.2230803966522217, | |
| "learning_rate": 9.146125807469525e-07, | |
| "loss": 0.6229037642478943, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 3.2576312576312576, | |
| "grad_norm": 5.4705071449279785, | |
| "learning_rate": 9.119886091676436e-07, | |
| "loss": 0.9204983711242676, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 3.26007326007326, | |
| "grad_norm": 1.7358949184417725, | |
| "learning_rate": 9.093721301204143e-07, | |
| "loss": 0.8217456340789795, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 3.2625152625152625, | |
| "grad_norm": 2.2710583209991455, | |
| "learning_rate": 9.067631542711692e-07, | |
| "loss": 0.5310102701187134, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 3.264957264957265, | |
| "grad_norm": 23.32669448852539, | |
| "learning_rate": 9.041616922552254e-07, | |
| "loss": 0.1262706220149994, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 3.2673992673992673, | |
| "grad_norm": 2.316551923751831, | |
| "learning_rate": 9.015677546772717e-07, | |
| "loss": 0.9631689190864563, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 3.2698412698412698, | |
| "grad_norm": 5.279893398284912, | |
| "learning_rate": 8.989813521113232e-07, | |
| "loss": 0.7836791276931763, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 3.272283272283272, | |
| "grad_norm": 0.7904373407363892, | |
| "learning_rate": 8.964024951006798e-07, | |
| "loss": 0.49453315138816833, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 3.2747252747252746, | |
| "grad_norm": 4.579488277435303, | |
| "learning_rate": 8.938311941578806e-07, | |
| "loss": 0.48266905546188354, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 3.277167277167277, | |
| "grad_norm": 2.3565316200256348, | |
| "learning_rate": 8.912674597646653e-07, | |
| "loss": 0.6459278464317322, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 3.2796092796092795, | |
| "grad_norm": 3.3402786254882812, | |
| "learning_rate": 8.887113023719262e-07, | |
| "loss": 1.0020655393600464, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 3.282051282051282, | |
| "grad_norm": 0.46372556686401367, | |
| "learning_rate": 8.861627323996724e-07, | |
| "loss": 0.08561723679304123, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 3.2844932844932844, | |
| "grad_norm": 2.682774782180786, | |
| "learning_rate": 8.836217602369799e-07, | |
| "loss": 1.0556048154830933, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 3.286935286935287, | |
| "grad_norm": 2.9882302284240723, | |
| "learning_rate": 8.810883962419542e-07, | |
| "loss": 0.9429636001586914, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 3.2893772893772892, | |
| "grad_norm": 0.21104033291339874, | |
| "learning_rate": 8.785626507416855e-07, | |
| "loss": 0.11109757423400879, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 3.2918192918192917, | |
| "grad_norm": 0.5581515431404114, | |
| "learning_rate": 8.760445340322096e-07, | |
| "loss": 0.17564286291599274, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 3.294261294261294, | |
| "grad_norm": 2.4714837074279785, | |
| "learning_rate": 8.735340563784625e-07, | |
| "loss": 0.6768051385879517, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 3.2967032967032965, | |
| "grad_norm": 2.1521193981170654, | |
| "learning_rate": 8.710312280142416e-07, | |
| "loss": 0.9193722605705261, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.299145299145299, | |
| "grad_norm": 13.212026596069336, | |
| "learning_rate": 8.685360591421598e-07, | |
| "loss": 0.9638568758964539, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 3.3015873015873014, | |
| "grad_norm": 2.658658266067505, | |
| "learning_rate": 8.660485599336094e-07, | |
| "loss": 0.8721650838851929, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 3.304029304029304, | |
| "grad_norm": 2.8447883129119873, | |
| "learning_rate": 8.635687405287171e-07, | |
| "loss": 0.7735913991928101, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 3.3064713064713063, | |
| "grad_norm": 1.3805103302001953, | |
| "learning_rate": 8.610966110363014e-07, | |
| "loss": 0.5056965351104736, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 3.3089133089133087, | |
| "grad_norm": 1.813744068145752, | |
| "learning_rate": 8.586321815338361e-07, | |
| "loss": 0.57419753074646, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 3.311355311355311, | |
| "grad_norm": 2.599118232727051, | |
| "learning_rate": 8.56175462067405e-07, | |
| "loss": 0.8707802295684814, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 3.3137973137973136, | |
| "grad_norm": 2.0446720123291016, | |
| "learning_rate": 8.537264626516634e-07, | |
| "loss": 0.5774456262588501, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 3.316239316239316, | |
| "grad_norm": 3.4925484657287598, | |
| "learning_rate": 8.512851932697947e-07, | |
| "loss": 0.9497953653335571, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 3.3186813186813184, | |
| "grad_norm": 3.462381601333618, | |
| "learning_rate": 8.488516638734731e-07, | |
| "loss": 0.8057655692100525, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 3.3211233211233213, | |
| "grad_norm": 1.8965480327606201, | |
| "learning_rate": 8.464258843828202e-07, | |
| "loss": 0.8699415326118469, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 3.3235653235653237, | |
| "grad_norm": 2.465651750564575, | |
| "learning_rate": 8.440078646863664e-07, | |
| "loss": 0.641089141368866, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 3.326007326007326, | |
| "grad_norm": 0.7584552764892578, | |
| "learning_rate": 8.415976146410084e-07, | |
| "loss": 0.09330576658248901, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 3.3284493284493286, | |
| "grad_norm": 2.273700714111328, | |
| "learning_rate": 8.391951440719725e-07, | |
| "loss": 0.5427566766738892, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 3.330891330891331, | |
| "grad_norm": 2.9195804595947266, | |
| "learning_rate": 8.368004627727699e-07, | |
| "loss": 0.8910986185073853, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 4.608819007873535, | |
| "learning_rate": 8.344135805051629e-07, | |
| "loss": 0.7685779929161072, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 3.335775335775336, | |
| "grad_norm": 0.7064034938812256, | |
| "learning_rate": 8.320345069991175e-07, | |
| "loss": 0.5918761491775513, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 3.3382173382173383, | |
| "grad_norm": 3.630662202835083, | |
| "learning_rate": 8.296632519527711e-07, | |
| "loss": 0.6658087372779846, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 3.340659340659341, | |
| "grad_norm": 2.0108442306518555, | |
| "learning_rate": 8.272998250323872e-07, | |
| "loss": 0.7752918004989624, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 3.343101343101343, | |
| "grad_norm": 1.0330065488815308, | |
| "learning_rate": 8.249442358723204e-07, | |
| "loss": 0.5759359002113342, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 3.3455433455433456, | |
| "grad_norm": 3.048520803451538, | |
| "learning_rate": 8.225964940749737e-07, | |
| "loss": 0.5758652687072754, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 3.347985347985348, | |
| "grad_norm": 4.6908392906188965, | |
| "learning_rate": 8.202566092107628e-07, | |
| "loss": 0.8692240118980408, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 3.3504273504273505, | |
| "grad_norm": 11.141556739807129, | |
| "learning_rate": 8.179245908180724e-07, | |
| "loss": 0.5387795567512512, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 3.352869352869353, | |
| "grad_norm": 3.472233533859253, | |
| "learning_rate": 8.156004484032226e-07, | |
| "loss": 0.7473067045211792, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 3.3553113553113554, | |
| "grad_norm": 7.765378475189209, | |
| "learning_rate": 8.132841914404253e-07, | |
| "loss": 0.4999602437019348, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 3.357753357753358, | |
| "grad_norm": 9.492226600646973, | |
| "learning_rate": 8.109758293717505e-07, | |
| "loss": 0.36286643147468567, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 3.3601953601953602, | |
| "grad_norm": 0.9999972581863403, | |
| "learning_rate": 8.086753716070828e-07, | |
| "loss": 0.402780145406723, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 3.3626373626373627, | |
| "grad_norm": 2.837510108947754, | |
| "learning_rate": 8.063828275240873e-07, | |
| "loss": 0.4516952335834503, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 3.365079365079365, | |
| "grad_norm": 4.009491443634033, | |
| "learning_rate": 8.040982064681671e-07, | |
| "loss": 0.8290095925331116, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 3.3675213675213675, | |
| "grad_norm": 2.420555830001831, | |
| "learning_rate": 8.018215177524302e-07, | |
| "loss": 0.8783026337623596, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 3.36996336996337, | |
| "grad_norm": 4.7987284660339355, | |
| "learning_rate": 7.995527706576474e-07, | |
| "loss": 1.161372423171997, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 3.3724053724053724, | |
| "grad_norm": 2.0077738761901855, | |
| "learning_rate": 7.972919744322172e-07, | |
| "loss": 0.5153079032897949, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 3.374847374847375, | |
| "grad_norm": 2.043386459350586, | |
| "learning_rate": 7.950391382921253e-07, | |
| "loss": 0.8760576248168945, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 3.3772893772893773, | |
| "grad_norm": 2.278296709060669, | |
| "learning_rate": 7.927942714209094e-07, | |
| "loss": 0.47707459330558777, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 3.3797313797313797, | |
| "grad_norm": 3.5936970710754395, | |
| "learning_rate": 7.905573829696222e-07, | |
| "loss": 0.3957478404045105, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 3.382173382173382, | |
| "grad_norm": 7.540212154388428, | |
| "learning_rate": 7.883284820567905e-07, | |
| "loss": 0.5244758725166321, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 3.3846153846153846, | |
| "grad_norm": 18.458484649658203, | |
| "learning_rate": 7.861075777683822e-07, | |
| "loss": 0.8487293720245361, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 3.387057387057387, | |
| "grad_norm": 9.975359916687012, | |
| "learning_rate": 7.838946791577669e-07, | |
| "loss": 0.42381957173347473, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 3.3894993894993894, | |
| "grad_norm": 1.6827895641326904, | |
| "learning_rate": 7.816897952456802e-07, | |
| "loss": 0.8452630043029785, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 3.391941391941392, | |
| "grad_norm": 2.2170772552490234, | |
| "learning_rate": 7.794929350201849e-07, | |
| "loss": 0.7993656396865845, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 3.3943833943833943, | |
| "grad_norm": 0.4966033399105072, | |
| "learning_rate": 7.773041074366375e-07, | |
| "loss": 0.38123244047164917, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 3.3968253968253967, | |
| "grad_norm": 1.8956717252731323, | |
| "learning_rate": 7.751233214176485e-07, | |
| "loss": 0.4719703495502472, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 3.399267399267399, | |
| "grad_norm": 3.7957403659820557, | |
| "learning_rate": 7.729505858530489e-07, | |
| "loss": 0.21603846549987793, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 3.4017094017094016, | |
| "grad_norm": 3.2089285850524902, | |
| "learning_rate": 7.70785909599851e-07, | |
| "loss": 0.4859767258167267, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 3.404151404151404, | |
| "grad_norm": 2.2830421924591064, | |
| "learning_rate": 7.686293014822149e-07, | |
| "loss": 0.8922374248504639, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 3.4065934065934065, | |
| "grad_norm": 3.5256621837615967, | |
| "learning_rate": 7.664807702914107e-07, | |
| "loss": 0.8285965919494629, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 3.409035409035409, | |
| "grad_norm": 2.4469144344329834, | |
| "learning_rate": 7.643403247857853e-07, | |
| "loss": 0.4633885622024536, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 3.4114774114774113, | |
| "grad_norm": 3.2874977588653564, | |
| "learning_rate": 7.622079736907219e-07, | |
| "loss": 0.730563223361969, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 3.413919413919414, | |
| "grad_norm": 1.843967318534851, | |
| "learning_rate": 7.600837256986104e-07, | |
| "loss": 0.9653308391571045, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 3.416361416361416, | |
| "grad_norm": 6.217641830444336, | |
| "learning_rate": 7.57967589468806e-07, | |
| "loss": 0.47932058572769165, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 3.4188034188034186, | |
| "grad_norm": 0.45403721928596497, | |
| "learning_rate": 7.558595736275995e-07, | |
| "loss": 0.05683291330933571, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.421245421245421, | |
| "grad_norm": 2.4444427490234375, | |
| "learning_rate": 7.537596867681773e-07, | |
| "loss": 1.0308482646942139, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 3.4236874236874235, | |
| "grad_norm": 0.5573722124099731, | |
| "learning_rate": 7.516679374505911e-07, | |
| "loss": 0.6440561413764954, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 3.426129426129426, | |
| "grad_norm": 3.718284845352173, | |
| "learning_rate": 7.495843342017173e-07, | |
| "loss": 0.6178560853004456, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 3.11525297164917, | |
| "learning_rate": 7.475088855152279e-07, | |
| "loss": 0.923469066619873, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 3.4310134310134313, | |
| "grad_norm": 2.810075521469116, | |
| "learning_rate": 7.454415998515516e-07, | |
| "loss": 0.7372915744781494, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 3.4334554334554337, | |
| "grad_norm": 2.502122640609741, | |
| "learning_rate": 7.433824856378425e-07, | |
| "loss": 0.14429078996181488, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 3.435897435897436, | |
| "grad_norm": 10.276256561279297, | |
| "learning_rate": 7.413315512679436e-07, | |
| "loss": 0.5484145283699036, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 3.4383394383394386, | |
| "grad_norm": 2.402463912963867, | |
| "learning_rate": 7.392888051023542e-07, | |
| "loss": 0.8286385536193848, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 3.440781440781441, | |
| "grad_norm": 1.590881586074829, | |
| "learning_rate": 7.37254255468193e-07, | |
| "loss": 0.9624377489089966, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 3.4432234432234434, | |
| "grad_norm": 2.1349987983703613, | |
| "learning_rate": 7.352279106591676e-07, | |
| "loss": 0.8825662732124329, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 3.445665445665446, | |
| "grad_norm": 3.0658047199249268, | |
| "learning_rate": 7.332097789355388e-07, | |
| "loss": 0.9127561450004578, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 3.4481074481074483, | |
| "grad_norm": 1.7639163732528687, | |
| "learning_rate": 7.31199868524088e-07, | |
| "loss": 0.8078799247741699, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 3.4505494505494507, | |
| "grad_norm": 1.9734654426574707, | |
| "learning_rate": 7.291981876180815e-07, | |
| "loss": 0.6381809115409851, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 3.452991452991453, | |
| "grad_norm": 2.2318012714385986, | |
| "learning_rate": 7.272047443772395e-07, | |
| "loss": 0.760457456111908, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 3.4554334554334556, | |
| "grad_norm": 9.063981056213379, | |
| "learning_rate": 7.252195469277024e-07, | |
| "loss": 0.6253539323806763, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 3.457875457875458, | |
| "grad_norm": 3.0912418365478516, | |
| "learning_rate": 7.232426033619955e-07, | |
| "loss": 0.4733204245567322, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 3.4603174603174605, | |
| "grad_norm": 1.568339228630066, | |
| "learning_rate": 7.212739217389991e-07, | |
| "loss": 0.9539817571640015, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 3.462759462759463, | |
| "grad_norm": 9.57923412322998, | |
| "learning_rate": 7.193135100839142e-07, | |
| "loss": 0.5720884799957275, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 3.4652014652014653, | |
| "grad_norm": 14.26650333404541, | |
| "learning_rate": 7.173613763882297e-07, | |
| "loss": 0.5722582936286926, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 3.4676434676434678, | |
| "grad_norm": 3.157581329345703, | |
| "learning_rate": 7.154175286096886e-07, | |
| "loss": 0.954519510269165, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 3.47008547008547, | |
| "grad_norm": 2.162440061569214, | |
| "learning_rate": 7.134819746722588e-07, | |
| "loss": 0.8875312805175781, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 3.4725274725274726, | |
| "grad_norm": 1.576352834701538, | |
| "learning_rate": 7.115547224660981e-07, | |
| "loss": 0.8703738451004028, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 3.474969474969475, | |
| "grad_norm": 2.352095127105713, | |
| "learning_rate": 7.096357798475231e-07, | |
| "loss": 0.8873903155326843, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 3.4774114774114775, | |
| "grad_norm": 2.0396454334259033, | |
| "learning_rate": 7.077251546389761e-07, | |
| "loss": 0.8595806360244751, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 3.47985347985348, | |
| "grad_norm": 2.4909889698028564, | |
| "learning_rate": 7.058228546289952e-07, | |
| "loss": 0.6372047662734985, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 3.4822954822954824, | |
| "grad_norm": 2.2574751377105713, | |
| "learning_rate": 7.039288875721798e-07, | |
| "loss": 0.8206950426101685, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 3.484737484737485, | |
| "grad_norm": 0.5610913634300232, | |
| "learning_rate": 7.020432611891629e-07, | |
| "loss": 0.1707066297531128, | |
| "step": 2854 | |
| }, | |
| { | |
| "epoch": 3.4871794871794872, | |
| "grad_norm": 8.053951263427734, | |
| "learning_rate": 7.001659831665748e-07, | |
| "loss": 0.6180318593978882, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 3.4896214896214897, | |
| "grad_norm": 8.793201446533203, | |
| "learning_rate": 6.982970611570168e-07, | |
| "loss": 0.29429712891578674, | |
| "step": 2858 | |
| }, | |
| { | |
| "epoch": 3.492063492063492, | |
| "grad_norm": 1.830889344215393, | |
| "learning_rate": 6.964365027790243e-07, | |
| "loss": 0.8592406511306763, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 3.4945054945054945, | |
| "grad_norm": 2.1449406147003174, | |
| "learning_rate": 6.945843156170423e-07, | |
| "loss": 0.9528040885925293, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 3.496947496947497, | |
| "grad_norm": 2.4805285930633545, | |
| "learning_rate": 6.927405072213878e-07, | |
| "loss": 0.467544287443161, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 3.4993894993894994, | |
| "grad_norm": 4.722518444061279, | |
| "learning_rate": 6.909050851082258e-07, | |
| "loss": 0.38818594813346863, | |
| "step": 2866 | |
| }, | |
| { | |
| "epoch": 3.501831501831502, | |
| "grad_norm": 2.0547142028808594, | |
| "learning_rate": 6.89078056759532e-07, | |
| "loss": 0.8755742311477661, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 3.5042735042735043, | |
| "grad_norm": 7.294073581695557, | |
| "learning_rate": 6.872594296230677e-07, | |
| "loss": 0.5849094986915588, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 3.5067155067155067, | |
| "grad_norm": 4.594062328338623, | |
| "learning_rate": 6.854492111123455e-07, | |
| "loss": 0.5189932584762573, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 3.509157509157509, | |
| "grad_norm": 3.3439576625823975, | |
| "learning_rate": 6.836474086066024e-07, | |
| "loss": 0.9283484220504761, | |
| "step": 2874 | |
| }, | |
| { | |
| "epoch": 3.5115995115995116, | |
| "grad_norm": 6.525171279907227, | |
| "learning_rate": 6.81854029450767e-07, | |
| "loss": 0.32967475056648254, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 3.514041514041514, | |
| "grad_norm": 1.570821762084961, | |
| "learning_rate": 6.800690809554313e-07, | |
| "loss": 0.9133099913597107, | |
| "step": 2878 | |
| }, | |
| { | |
| "epoch": 3.5164835164835164, | |
| "grad_norm": 2.403273582458496, | |
| "learning_rate": 6.782925703968195e-07, | |
| "loss": 0.5854375958442688, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 3.518925518925519, | |
| "grad_norm": 3.7819712162017822, | |
| "learning_rate": 6.765245050167599e-07, | |
| "loss": 0.6390686631202698, | |
| "step": 2882 | |
| }, | |
| { | |
| "epoch": 3.5213675213675213, | |
| "grad_norm": 2.2759008407592773, | |
| "learning_rate": 6.74764892022654e-07, | |
| "loss": 0.9842717051506042, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 3.5238095238095237, | |
| "grad_norm": 1.5493816137313843, | |
| "learning_rate": 6.730137385874491e-07, | |
| "loss": 0.9478884339332581, | |
| "step": 2886 | |
| }, | |
| { | |
| "epoch": 3.526251526251526, | |
| "grad_norm": 2.1049609184265137, | |
| "learning_rate": 6.712710518496049e-07, | |
| "loss": 0.777178168296814, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 3.5286935286935286, | |
| "grad_norm": 2.9918575286865234, | |
| "learning_rate": 6.695368389130699e-07, | |
| "loss": 0.8717899918556213, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 3.531135531135531, | |
| "grad_norm": 3.209395170211792, | |
| "learning_rate": 6.678111068472487e-07, | |
| "loss": 0.7953534722328186, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 3.5335775335775335, | |
| "grad_norm": 14.544081687927246, | |
| "learning_rate": 6.660938626869734e-07, | |
| "loss": 0.4765959680080414, | |
| "step": 2894 | |
| }, | |
| { | |
| "epoch": 3.536019536019536, | |
| "grad_norm": 51.49199295043945, | |
| "learning_rate": 6.643851134324767e-07, | |
| "loss": 0.7235844731330872, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 3.5384615384615383, | |
| "grad_norm": 4.060218811035156, | |
| "learning_rate": 6.626848660493623e-07, | |
| "loss": 0.804652750492096, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 3.5409035409035408, | |
| "grad_norm": 12.073833465576172, | |
| "learning_rate": 6.60993127468577e-07, | |
| "loss": 0.867784321308136, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.543345543345543, | |
| "grad_norm": 3.4921793937683105, | |
| "learning_rate": 6.593099045863802e-07, | |
| "loss": 0.13817808032035828, | |
| "step": 2902 | |
| }, | |
| { | |
| "epoch": 3.5457875457875456, | |
| "grad_norm": 1.4257546663284302, | |
| "learning_rate": 6.576352042643192e-07, | |
| "loss": 0.8409507274627686, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 3.548229548229548, | |
| "grad_norm": 4.283762454986572, | |
| "learning_rate": 6.559690333292e-07, | |
| "loss": 0.8512478470802307, | |
| "step": 2906 | |
| }, | |
| { | |
| "epoch": 3.5506715506715505, | |
| "grad_norm": 2.5699775218963623, | |
| "learning_rate": 6.543113985730579e-07, | |
| "loss": 1.0054024457931519, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 3.553113553113553, | |
| "grad_norm": 5.507492542266846, | |
| "learning_rate": 6.526623067531313e-07, | |
| "loss": 0.6415849328041077, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 3.5555555555555554, | |
| "grad_norm": 4.384498119354248, | |
| "learning_rate": 6.510217645918349e-07, | |
| "loss": 0.46229088306427, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 3.557997557997558, | |
| "grad_norm": 2.0857934951782227, | |
| "learning_rate": 6.493897787767291e-07, | |
| "loss": 0.5283727645874023, | |
| "step": 2914 | |
| }, | |
| { | |
| "epoch": 3.5604395604395602, | |
| "grad_norm": 1.9115166664123535, | |
| "learning_rate": 6.477663559604979e-07, | |
| "loss": 0.6623761653900146, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 3.5628815628815627, | |
| "grad_norm": 6.1141533851623535, | |
| "learning_rate": 6.461515027609163e-07, | |
| "loss": 0.6332585215568542, | |
| "step": 2918 | |
| }, | |
| { | |
| "epoch": 3.565323565323565, | |
| "grad_norm": 8.153079986572266, | |
| "learning_rate": 6.44545225760827e-07, | |
| "loss": 0.5882151126861572, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 3.5677655677655675, | |
| "grad_norm": 2.2321126461029053, | |
| "learning_rate": 6.429475315081122e-07, | |
| "loss": 0.8858240246772766, | |
| "step": 2922 | |
| }, | |
| { | |
| "epoch": 3.57020757020757, | |
| "grad_norm": 19.70038414001465, | |
| "learning_rate": 6.413584265156671e-07, | |
| "loss": 0.6081412434577942, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 3.5726495726495724, | |
| "grad_norm": 3.0893778800964355, | |
| "learning_rate": 6.397779172613722e-07, | |
| "loss": 0.454592227935791, | |
| "step": 2926 | |
| }, | |
| { | |
| "epoch": 3.575091575091575, | |
| "grad_norm": 6.8976240158081055, | |
| "learning_rate": 6.382060101880711e-07, | |
| "loss": 0.8145590424537659, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 3.5775335775335773, | |
| "grad_norm": 1.8353841304779053, | |
| "learning_rate": 6.366427117035377e-07, | |
| "loss": 0.8217576146125793, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 3.57997557997558, | |
| "grad_norm": 4.694766044616699, | |
| "learning_rate": 6.350880281804557e-07, | |
| "loss": 0.7602511644363403, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 3.5824175824175826, | |
| "grad_norm": 2.5171759128570557, | |
| "learning_rate": 6.335419659563896e-07, | |
| "loss": 0.7700616717338562, | |
| "step": 2934 | |
| }, | |
| { | |
| "epoch": 3.584859584859585, | |
| "grad_norm": 5.43289041519165, | |
| "learning_rate": 6.320045313337597e-07, | |
| "loss": 0.518511950969696, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 3.5873015873015874, | |
| "grad_norm": 0.7759566903114319, | |
| "learning_rate": 6.304757305798172e-07, | |
| "loss": 0.432235449552536, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 3.58974358974359, | |
| "grad_norm": 2.7056305408477783, | |
| "learning_rate": 6.289555699266174e-07, | |
| "loss": 0.5823948383331299, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 3.5921855921855923, | |
| "grad_norm": 10.587597846984863, | |
| "learning_rate": 6.274440555709947e-07, | |
| "loss": 0.9206511378288269, | |
| "step": 2942 | |
| }, | |
| { | |
| "epoch": 3.5946275946275947, | |
| "grad_norm": 1.4514787197113037, | |
| "learning_rate": 6.259411936745376e-07, | |
| "loss": 0.9449152946472168, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 3.597069597069597, | |
| "grad_norm": 2.0257363319396973, | |
| "learning_rate": 6.244469903635632e-07, | |
| "loss": 0.9899218678474426, | |
| "step": 2946 | |
| }, | |
| { | |
| "epoch": 3.5995115995115996, | |
| "grad_norm": 3.9623706340789795, | |
| "learning_rate": 6.229614517290932e-07, | |
| "loss": 0.48770591616630554, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 3.601953601953602, | |
| "grad_norm": 2.4973347187042236, | |
| "learning_rate": 6.21484583826827e-07, | |
| "loss": 0.5998440980911255, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 3.6043956043956045, | |
| "grad_norm": 4.926875114440918, | |
| "learning_rate": 6.200163926771196e-07, | |
| "loss": 0.28131791949272156, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 3.606837606837607, | |
| "grad_norm": 4.383153915405273, | |
| "learning_rate": 6.185568842649552e-07, | |
| "loss": 0.5602369904518127, | |
| "step": 2954 | |
| }, | |
| { | |
| "epoch": 3.6092796092796093, | |
| "grad_norm": 2.369140625, | |
| "learning_rate": 6.171060645399233e-07, | |
| "loss": 0.7010159492492676, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 3.6117216117216118, | |
| "grad_norm": 3.2974140644073486, | |
| "learning_rate": 6.15663939416195e-07, | |
| "loss": 0.7715173363685608, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 3.614163614163614, | |
| "grad_norm": 1.597782850265503, | |
| "learning_rate": 6.142305147724979e-07, | |
| "loss": 0.8990174531936646, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 3.6166056166056166, | |
| "grad_norm": 3.7081425189971924, | |
| "learning_rate": 6.128057964520934e-07, | |
| "loss": 0.5858969688415527, | |
| "step": 2962 | |
| }, | |
| { | |
| "epoch": 3.619047619047619, | |
| "grad_norm": 1.764650821685791, | |
| "learning_rate": 6.113897902627508e-07, | |
| "loss": 0.8998643159866333, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 3.6214896214896215, | |
| "grad_norm": 2.027956247329712, | |
| "learning_rate": 6.099825019767264e-07, | |
| "loss": 0.8704400658607483, | |
| "step": 2966 | |
| }, | |
| { | |
| "epoch": 3.623931623931624, | |
| "grad_norm": 2.2779312133789062, | |
| "learning_rate": 6.085839373307382e-07, | |
| "loss": 0.9620934724807739, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 3.6263736263736264, | |
| "grad_norm": 2.847346544265747, | |
| "learning_rate": 6.071941020259423e-07, | |
| "loss": 0.4650316834449768, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 3.628815628815629, | |
| "grad_norm": 4.082263469696045, | |
| "learning_rate": 6.058130017279103e-07, | |
| "loss": 0.4654577672481537, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 3.6312576312576312, | |
| "grad_norm": 4.675213813781738, | |
| "learning_rate": 6.044406420666072e-07, | |
| "loss": 0.5305784940719604, | |
| "step": 2974 | |
| }, | |
| { | |
| "epoch": 3.6336996336996337, | |
| "grad_norm": 4.327298164367676, | |
| "learning_rate": 6.030770286363656e-07, | |
| "loss": 0.8460584282875061, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 3.636141636141636, | |
| "grad_norm": 6.675053596496582, | |
| "learning_rate": 6.017221669958662e-07, | |
| "loss": 0.4189061224460602, | |
| "step": 2978 | |
| }, | |
| { | |
| "epoch": 3.6385836385836385, | |
| "grad_norm": 1.6335099935531616, | |
| "learning_rate": 6.003760626681127e-07, | |
| "loss": 0.956732988357544, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 3.641025641025641, | |
| "grad_norm": 6.5811381340026855, | |
| "learning_rate": 5.99038721140411e-07, | |
| "loss": 1.057121992111206, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 3.6434676434676434, | |
| "grad_norm": 1.5813173055648804, | |
| "learning_rate": 5.97710147864345e-07, | |
| "loss": 0.9400102496147156, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 3.645909645909646, | |
| "grad_norm": 3.3870911598205566, | |
| "learning_rate": 5.963903482557566e-07, | |
| "loss": 0.9326266050338745, | |
| "step": 2986 | |
| }, | |
| { | |
| "epoch": 3.6483516483516483, | |
| "grad_norm": 2.8349955081939697, | |
| "learning_rate": 5.950793276947205e-07, | |
| "loss": 0.9676442742347717, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 3.6507936507936507, | |
| "grad_norm": 31.81429100036621, | |
| "learning_rate": 5.937770915255269e-07, | |
| "loss": 0.9522081017494202, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 3.653235653235653, | |
| "grad_norm": 3.3921871185302734, | |
| "learning_rate": 5.924836450566549e-07, | |
| "loss": 0.5230456590652466, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 3.6556776556776556, | |
| "grad_norm": 2.2261812686920166, | |
| "learning_rate": 5.911989935607538e-07, | |
| "loss": 0.419090211391449, | |
| "step": 2994 | |
| }, | |
| { | |
| "epoch": 3.658119658119658, | |
| "grad_norm": 2.2666232585906982, | |
| "learning_rate": 5.899231422746202e-07, | |
| "loss": 0.9825529456138611, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 3.6605616605616604, | |
| "grad_norm": 1.18002188205719, | |
| "learning_rate": 5.886560963991778e-07, | |
| "loss": 0.45276400446891785, | |
| "step": 2998 | |
| }, | |
| { | |
| "epoch": 3.663003663003663, | |
| "grad_norm": 4.351987361907959, | |
| "learning_rate": 5.873978610994557e-07, | |
| "loss": 0.38837531208992004, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.6654456654456653, | |
| "grad_norm": 3.792799234390259, | |
| "learning_rate": 5.861484415045672e-07, | |
| "loss": 0.4969119429588318, | |
| "step": 3002 | |
| }, | |
| { | |
| "epoch": 3.6678876678876677, | |
| "grad_norm": 4.516859531402588, | |
| "learning_rate": 5.849078427076883e-07, | |
| "loss": 0.2892443835735321, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 3.67032967032967, | |
| "grad_norm": 1.7598987817764282, | |
| "learning_rate": 5.836760697660382e-07, | |
| "loss": 0.9143301844596863, | |
| "step": 3006 | |
| }, | |
| { | |
| "epoch": 3.672771672771673, | |
| "grad_norm": 7.990298748016357, | |
| "learning_rate": 5.82453127700858e-07, | |
| "loss": 0.6147029399871826, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 3.6752136752136755, | |
| "grad_norm": 0.5319908857345581, | |
| "learning_rate": 5.812390214973905e-07, | |
| "loss": 0.5243109464645386, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 3.677655677655678, | |
| "grad_norm": 2.6800284385681152, | |
| "learning_rate": 5.800337561048592e-07, | |
| "loss": 0.9062631726264954, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 3.6800976800976803, | |
| "grad_norm": 1.4025696516036987, | |
| "learning_rate": 5.788373364364487e-07, | |
| "loss": 0.9003893733024597, | |
| "step": 3014 | |
| }, | |
| { | |
| "epoch": 3.682539682539683, | |
| "grad_norm": 9.346170425415039, | |
| "learning_rate": 5.776497673692857e-07, | |
| "loss": 0.7075907588005066, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 3.684981684981685, | |
| "grad_norm": 2.3770735263824463, | |
| "learning_rate": 5.764710537444159e-07, | |
| "loss": 0.5896199941635132, | |
| "step": 3018 | |
| }, | |
| { | |
| "epoch": 3.6874236874236876, | |
| "grad_norm": 3.0938150882720947, | |
| "learning_rate": 5.753012003667885e-07, | |
| "loss": 0.6084612011909485, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 3.68986568986569, | |
| "grad_norm": 1.9582159519195557, | |
| "learning_rate": 5.741402120052328e-07, | |
| "loss": 0.5125177502632141, | |
| "step": 3022 | |
| }, | |
| { | |
| "epoch": 3.6923076923076925, | |
| "grad_norm": 2.1140964031219482, | |
| "learning_rate": 5.729880933924421e-07, | |
| "loss": 1.003217101097107, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 3.694749694749695, | |
| "grad_norm": 0.36588796973228455, | |
| "learning_rate": 5.718448492249509e-07, | |
| "loss": 0.5080230236053467, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 3.6971916971916974, | |
| "grad_norm": 3.183983087539673, | |
| "learning_rate": 5.707104841631195e-07, | |
| "loss": 0.7072214484214783, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 3.6996336996337, | |
| "grad_norm": 1.5387071371078491, | |
| "learning_rate": 5.695850028311112e-07, | |
| "loss": 0.9744673371315002, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 3.7020757020757022, | |
| "grad_norm": 2.1208925247192383, | |
| "learning_rate": 5.68468409816877e-07, | |
| "loss": 0.8400413990020752, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 3.7045177045177047, | |
| "grad_norm": 4.847201824188232, | |
| "learning_rate": 5.673607096721346e-07, | |
| "loss": 0.500311017036438, | |
| "step": 3034 | |
| }, | |
| { | |
| "epoch": 3.706959706959707, | |
| "grad_norm": 2.9996325969696045, | |
| "learning_rate": 5.662619069123503e-07, | |
| "loss": 0.5278769135475159, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 3.7094017094017095, | |
| "grad_norm": 2.098602771759033, | |
| "learning_rate": 5.651720060167208e-07, | |
| "loss": 0.5000000596046448, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 3.711843711843712, | |
| "grad_norm": 1.7179620265960693, | |
| "learning_rate": 5.640910114281555e-07, | |
| "loss": 0.9520195722579956, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 3.7142857142857144, | |
| "grad_norm": 2.3502564430236816, | |
| "learning_rate": 5.630189275532574e-07, | |
| "loss": 0.8327752947807312, | |
| "step": 3042 | |
| }, | |
| { | |
| "epoch": 3.716727716727717, | |
| "grad_norm": 2.5049052238464355, | |
| "learning_rate": 5.619557587623057e-07, | |
| "loss": 0.6436217427253723, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 3.7191697191697193, | |
| "grad_norm": 2.9425840377807617, | |
| "learning_rate": 5.609015093892374e-07, | |
| "loss": 0.9164323806762695, | |
| "step": 3046 | |
| }, | |
| { | |
| "epoch": 3.7216117216117217, | |
| "grad_norm": 3.1850688457489014, | |
| "learning_rate": 5.59856183731631e-07, | |
| "loss": 0.5315079689025879, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 3.724053724053724, | |
| "grad_norm": 2.6305289268493652, | |
| "learning_rate": 5.588197860506867e-07, | |
| "loss": 0.7617026567459106, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 3.7264957264957266, | |
| "grad_norm": 3.4540348052978516, | |
| "learning_rate": 5.577923205712124e-07, | |
| "loss": 1.017609715461731, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 3.728937728937729, | |
| "grad_norm": 7.902237415313721, | |
| "learning_rate": 5.567737914816022e-07, | |
| "loss": 0.5209454298019409, | |
| "step": 3054 | |
| }, | |
| { | |
| "epoch": 3.7313797313797314, | |
| "grad_norm": 1.829217791557312, | |
| "learning_rate": 5.557642029338236e-07, | |
| "loss": 0.9426127672195435, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 3.733821733821734, | |
| "grad_norm": 3.1745777130126953, | |
| "learning_rate": 5.547635590433968e-07, | |
| "loss": 0.6483992338180542, | |
| "step": 3058 | |
| }, | |
| { | |
| "epoch": 3.7362637362637363, | |
| "grad_norm": 10.875771522521973, | |
| "learning_rate": 5.53771863889381e-07, | |
| "loss": 0.46888014674186707, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 3.7387057387057387, | |
| "grad_norm": 2.8701348304748535, | |
| "learning_rate": 5.527891215143559e-07, | |
| "loss": 0.5719221830368042, | |
| "step": 3062 | |
| }, | |
| { | |
| "epoch": 3.741147741147741, | |
| "grad_norm": 1.0279072523117065, | |
| "learning_rate": 5.518153359244063e-07, | |
| "loss": 0.3847256898880005, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 3.7435897435897436, | |
| "grad_norm": 2.5575125217437744, | |
| "learning_rate": 5.508505110891045e-07, | |
| "loss": 0.5125806331634521, | |
| "step": 3066 | |
| }, | |
| { | |
| "epoch": 3.746031746031746, | |
| "grad_norm": 1.723737120628357, | |
| "learning_rate": 5.498946509414949e-07, | |
| "loss": 0.8170480132102966, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 3.7484737484737485, | |
| "grad_norm": 1.8103982210159302, | |
| "learning_rate": 5.489477593780787e-07, | |
| "loss": 1.0591984987258911, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 3.750915750915751, | |
| "grad_norm": 6.911821365356445, | |
| "learning_rate": 5.480098402587973e-07, | |
| "loss": 0.645149290561676, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 3.7533577533577533, | |
| "grad_norm": 0.26767414808273315, | |
| "learning_rate": 5.470808974070152e-07, | |
| "loss": 0.4036714732646942, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 3.755799755799756, | |
| "grad_norm": 4.02056884765625, | |
| "learning_rate": 5.461609346095067e-07, | |
| "loss": 0.8655245304107666, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 3.758241758241758, | |
| "grad_norm": 4.357627868652344, | |
| "learning_rate": 5.452499556164402e-07, | |
| "loss": 0.8845657110214233, | |
| "step": 3078 | |
| }, | |
| { | |
| "epoch": 3.7606837606837606, | |
| "grad_norm": 10.457714080810547, | |
| "learning_rate": 5.443479641413607e-07, | |
| "loss": 0.6024913191795349, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 3.763125763125763, | |
| "grad_norm": 2.315418243408203, | |
| "learning_rate": 5.434549638611768e-07, | |
| "loss": 0.9414732456207275, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 3.7655677655677655, | |
| "grad_norm": 1.8591489791870117, | |
| "learning_rate": 5.425709584161457e-07, | |
| "loss": 0.9516326785087585, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 3.768009768009768, | |
| "grad_norm": 1.980412244796753, | |
| "learning_rate": 5.416959514098571e-07, | |
| "loss": 0.9030287265777588, | |
| "step": 3086 | |
| }, | |
| { | |
| "epoch": 3.7704517704517704, | |
| "grad_norm": 0.8802301287651062, | |
| "learning_rate": 5.40829946409219e-07, | |
| "loss": 0.2058449685573578, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 3.772893772893773, | |
| "grad_norm": 3.767972230911255, | |
| "learning_rate": 5.399729469444438e-07, | |
| "loss": 0.8536104559898376, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 3.7753357753357752, | |
| "grad_norm": 2.7339487075805664, | |
| "learning_rate": 5.39124956509033e-07, | |
| "loss": 0.8664818406105042, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 3.7777777777777777, | |
| "grad_norm": 1.868648648262024, | |
| "learning_rate": 5.382859785597643e-07, | |
| "loss": 0.9490870237350464, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 3.78021978021978, | |
| "grad_norm": 3.2051358222961426, | |
| "learning_rate": 5.374560165166752e-07, | |
| "loss": 0.8471544981002808, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 3.7826617826617825, | |
| "grad_norm": 3.188377857208252, | |
| "learning_rate": 5.366350737630515e-07, | |
| "loss": 0.6783183217048645, | |
| "step": 3098 | |
| }, | |
| { | |
| "epoch": 3.785103785103785, | |
| "grad_norm": 12.093615531921387, | |
| "learning_rate": 5.358231536454119e-07, | |
| "loss": 0.8494789004325867, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.7875457875457874, | |
| "grad_norm": 2.5433156490325928, | |
| "learning_rate": 5.350202594734954e-07, | |
| "loss": 0.8256645202636719, | |
| "step": 3102 | |
| }, | |
| { | |
| "epoch": 3.78998778998779, | |
| "grad_norm": 6.241081237792969, | |
| "learning_rate": 5.34226394520247e-07, | |
| "loss": 0.8711805939674377, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 3.7924297924297923, | |
| "grad_norm": 2.3150527477264404, | |
| "learning_rate": 5.33441562021805e-07, | |
| "loss": 1.0078837871551514, | |
| "step": 3106 | |
| }, | |
| { | |
| "epoch": 3.7948717948717947, | |
| "grad_norm": 2.26035737991333, | |
| "learning_rate": 5.326657651774867e-07, | |
| "loss": 0.5672973394393921, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 3.797313797313797, | |
| "grad_norm": 3.3058907985687256, | |
| "learning_rate": 5.318990071497772e-07, | |
| "loss": 0.6369197368621826, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 3.7997557997557996, | |
| "grad_norm": 2.8003060817718506, | |
| "learning_rate": 5.311412910643145e-07, | |
| "loss": 0.5773022174835205, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 3.802197802197802, | |
| "grad_norm": 3.470675468444824, | |
| "learning_rate": 5.303926200098789e-07, | |
| "loss": 0.5989543199539185, | |
| "step": 3114 | |
| }, | |
| { | |
| "epoch": 3.8046398046398044, | |
| "grad_norm": 3.9955947399139404, | |
| "learning_rate": 5.296529970383777e-07, | |
| "loss": 0.44651395082473755, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 3.807081807081807, | |
| "grad_norm": 4.266364097595215, | |
| "learning_rate": 5.289224251648359e-07, | |
| "loss": 0.6023522019386292, | |
| "step": 3118 | |
| }, | |
| { | |
| "epoch": 3.8095238095238093, | |
| "grad_norm": 2.1567165851593018, | |
| "learning_rate": 5.282009073673812e-07, | |
| "loss": 0.9219540953636169, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 3.8119658119658117, | |
| "grad_norm": 4.827529430389404, | |
| "learning_rate": 5.27488446587233e-07, | |
| "loss": 0.5145304203033447, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 3.814407814407814, | |
| "grad_norm": 3.446068048477173, | |
| "learning_rate": 5.267850457286907e-07, | |
| "loss": 0.6707845330238342, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 3.8168498168498166, | |
| "grad_norm": 6.150956630706787, | |
| "learning_rate": 5.26090707659122e-07, | |
| "loss": 0.881208062171936, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 3.819291819291819, | |
| "grad_norm": 7.88019323348999, | |
| "learning_rate": 5.254054352089493e-07, | |
| "loss": 0.48564082384109497, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 3.8217338217338215, | |
| "grad_norm": 2.4069323539733887, | |
| "learning_rate": 5.247292311716413e-07, | |
| "loss": 0.8890138864517212, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 3.824175824175824, | |
| "grad_norm": 1.5671998262405396, | |
| "learning_rate": 5.240620983036986e-07, | |
| "loss": 0.5058675408363342, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 3.8266178266178263, | |
| "grad_norm": 6.1965227127075195, | |
| "learning_rate": 5.234040393246448e-07, | |
| "loss": 1.1437023878097534, | |
| "step": 3134 | |
| }, | |
| { | |
| "epoch": 3.8290598290598292, | |
| "grad_norm": 3.5453076362609863, | |
| "learning_rate": 5.227550569170133e-07, | |
| "loss": 1.039106845855713, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 3.8315018315018317, | |
| "grad_norm": 4.471746444702148, | |
| "learning_rate": 5.221151537263382e-07, | |
| "loss": 0.6547291278839111, | |
| "step": 3138 | |
| }, | |
| { | |
| "epoch": 3.833943833943834, | |
| "grad_norm": 3.5945651531219482, | |
| "learning_rate": 5.214843323611432e-07, | |
| "loss": 0.3847421407699585, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 3.8363858363858365, | |
| "grad_norm": 3.4551937580108643, | |
| "learning_rate": 5.208625953929289e-07, | |
| "loss": 0.7860216498374939, | |
| "step": 3142 | |
| }, | |
| { | |
| "epoch": 3.838827838827839, | |
| "grad_norm": 5.9122633934021, | |
| "learning_rate": 5.202499453561658e-07, | |
| "loss": 0.26646631956100464, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 3.8412698412698414, | |
| "grad_norm": 4.5092620849609375, | |
| "learning_rate": 5.196463847482812e-07, | |
| "loss": 0.5625693202018738, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 3.843711843711844, | |
| "grad_norm": 0.4482984244823456, | |
| "learning_rate": 5.1905191602965e-07, | |
| "loss": 0.12481559067964554, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 3.8461538461538463, | |
| "grad_norm": 5.856686115264893, | |
| "learning_rate": 5.184665416235841e-07, | |
| "loss": 0.5362542271614075, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 3.8485958485958487, | |
| "grad_norm": 4.156497001647949, | |
| "learning_rate": 5.178902639163247e-07, | |
| "loss": 0.7409583330154419, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 3.851037851037851, | |
| "grad_norm": 1.6845171451568604, | |
| "learning_rate": 5.17323085257029e-07, | |
| "loss": 0.5385940074920654, | |
| "step": 3154 | |
| }, | |
| { | |
| "epoch": 3.8534798534798536, | |
| "grad_norm": 1.5355862379074097, | |
| "learning_rate": 5.167650079577636e-07, | |
| "loss": 0.8247669339179993, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 3.855921855921856, | |
| "grad_norm": 4.407171249389648, | |
| "learning_rate": 5.162160342934939e-07, | |
| "loss": 0.8968489170074463, | |
| "step": 3158 | |
| }, | |
| { | |
| "epoch": 3.8583638583638584, | |
| "grad_norm": 8.075994491577148, | |
| "learning_rate": 5.15676166502075e-07, | |
| "loss": 0.09241821616888046, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 3.860805860805861, | |
| "grad_norm": 2.5929574966430664, | |
| "learning_rate": 5.151454067842417e-07, | |
| "loss": 0.4451131224632263, | |
| "step": 3162 | |
| }, | |
| { | |
| "epoch": 3.8632478632478633, | |
| "grad_norm": 1.8862788677215576, | |
| "learning_rate": 5.146237573036012e-07, | |
| "loss": 0.9212697148323059, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 3.8656898656898657, | |
| "grad_norm": 2.396461248397827, | |
| "learning_rate": 5.141112201866231e-07, | |
| "loss": 0.9008550047874451, | |
| "step": 3166 | |
| }, | |
| { | |
| "epoch": 3.868131868131868, | |
| "grad_norm": 2.7560782432556152, | |
| "learning_rate": 5.136077975226314e-07, | |
| "loss": 0.7847106456756592, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 3.8705738705738706, | |
| "grad_norm": 5.181787014007568, | |
| "learning_rate": 5.131134913637951e-07, | |
| "loss": 0.5696348547935486, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 3.873015873015873, | |
| "grad_norm": 8.310593605041504, | |
| "learning_rate": 5.126283037251208e-07, | |
| "loss": 0.5494756102561951, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 3.8754578754578755, | |
| "grad_norm": 2.406679391860962, | |
| "learning_rate": 5.121522365844436e-07, | |
| "loss": 0.5918058156967163, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 3.877899877899878, | |
| "grad_norm": 2.115579128265381, | |
| "learning_rate": 5.116852918824199e-07, | |
| "loss": 0.9309298396110535, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 3.8803418803418803, | |
| "grad_norm": 1.9531852006912231, | |
| "learning_rate": 5.112274715225194e-07, | |
| "loss": 0.858812153339386, | |
| "step": 3178 | |
| }, | |
| { | |
| "epoch": 3.8827838827838828, | |
| "grad_norm": 3.3092024326324463, | |
| "learning_rate": 5.107787773710157e-07, | |
| "loss": 0.8395816087722778, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 3.885225885225885, | |
| "grad_norm": 4.282203197479248, | |
| "learning_rate": 5.103392112569815e-07, | |
| "loss": 0.8726351261138916, | |
| "step": 3182 | |
| }, | |
| { | |
| "epoch": 3.8876678876678876, | |
| "grad_norm": 5.603507995605469, | |
| "learning_rate": 5.099087749722788e-07, | |
| "loss": 0.3810088336467743, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 3.89010989010989, | |
| "grad_norm": 3.650843858718872, | |
| "learning_rate": 5.094874702715529e-07, | |
| "loss": 0.9510683417320251, | |
| "step": 3186 | |
| }, | |
| { | |
| "epoch": 3.8925518925518925, | |
| "grad_norm": 2.743922472000122, | |
| "learning_rate": 5.090752988722245e-07, | |
| "loss": 0.40368887782096863, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 3.894993894993895, | |
| "grad_norm": 0.3727673292160034, | |
| "learning_rate": 5.086722624544829e-07, | |
| "loss": 0.420103520154953, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 3.8974358974358974, | |
| "grad_norm": 2.02138090133667, | |
| "learning_rate": 5.082783626612797e-07, | |
| "loss": 0.8819708824157715, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 3.8998778998779, | |
| "grad_norm": 3.9244892597198486, | |
| "learning_rate": 5.078936010983213e-07, | |
| "loss": 1.0119850635528564, | |
| "step": 3194 | |
| }, | |
| { | |
| "epoch": 3.9023199023199022, | |
| "grad_norm": 6.452670574188232, | |
| "learning_rate": 5.075179793340628e-07, | |
| "loss": 0.5983652472496033, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 3.9047619047619047, | |
| "grad_norm": 5.412775993347168, | |
| "learning_rate": 5.071514988997016e-07, | |
| "loss": 0.1550082117319107, | |
| "step": 3198 | |
| }, | |
| { | |
| "epoch": 3.907203907203907, | |
| "grad_norm": 3.0789589881896973, | |
| "learning_rate": 5.067941612891708e-07, | |
| "loss": 0.9240917563438416, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.9096459096459095, | |
| "grad_norm": 1.7385785579681396, | |
| "learning_rate": 5.06445967959134e-07, | |
| "loss": 0.6053808331489563, | |
| "step": 3202 | |
| }, | |
| { | |
| "epoch": 3.912087912087912, | |
| "grad_norm": 2.076815605163574, | |
| "learning_rate": 5.061069203289777e-07, | |
| "loss": 0.9977898001670837, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 3.9145299145299144, | |
| "grad_norm": 1.4593520164489746, | |
| "learning_rate": 5.057770197808077e-07, | |
| "loss": 0.9548913240432739, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 3.916971916971917, | |
| "grad_norm": 2.623448371887207, | |
| "learning_rate": 5.054562676594414e-07, | |
| "loss": 1.132678508758545, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 3.9194139194139193, | |
| "grad_norm": 1.8026434183120728, | |
| "learning_rate": 5.051446652724042e-07, | |
| "loss": 0.6159650087356567, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 3.9218559218559217, | |
| "grad_norm": 2.9582080841064453, | |
| "learning_rate": 5.048422138899222e-07, | |
| "loss": 0.23612847924232483, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 3.9242979242979246, | |
| "grad_norm": 1.8346482515335083, | |
| "learning_rate": 5.045489147449187e-07, | |
| "loss": 0.9001370668411255, | |
| "step": 3214 | |
| }, | |
| { | |
| "epoch": 3.926739926739927, | |
| "grad_norm": 4.2038726806640625, | |
| "learning_rate": 5.042647690330078e-07, | |
| "loss": 0.921493411064148, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 3.9291819291819294, | |
| "grad_norm": 6.33651065826416, | |
| "learning_rate": 5.039897779124914e-07, | |
| "loss": 0.6150534749031067, | |
| "step": 3218 | |
| }, | |
| { | |
| "epoch": 3.931623931623932, | |
| "grad_norm": 2.513700246810913, | |
| "learning_rate": 5.037239425043525e-07, | |
| "loss": 0.6679733991622925, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 3.9340659340659343, | |
| "grad_norm": 11.929234504699707, | |
| "learning_rate": 5.034672638922512e-07, | |
| "loss": 0.530619740486145, | |
| "step": 3222 | |
| }, | |
| { | |
| "epoch": 3.9365079365079367, | |
| "grad_norm": 3.113684892654419, | |
| "learning_rate": 5.032197431225214e-07, | |
| "loss": 0.8231785297393799, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 3.938949938949939, | |
| "grad_norm": 4.810062885284424, | |
| "learning_rate": 5.029813812041649e-07, | |
| "loss": 0.5280576944351196, | |
| "step": 3226 | |
| }, | |
| { | |
| "epoch": 3.9413919413919416, | |
| "grad_norm": 2.087477922439575, | |
| "learning_rate": 5.027521791088482e-07, | |
| "loss": 0.9266934394836426, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 3.943833943833944, | |
| "grad_norm": 4.400597095489502, | |
| "learning_rate": 5.025321377708989e-07, | |
| "loss": 0.5227733850479126, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 3.9462759462759465, | |
| "grad_norm": 3.1473488807678223, | |
| "learning_rate": 5.023212580873009e-07, | |
| "loss": 0.952559769153595, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 3.948717948717949, | |
| "grad_norm": 14.350162506103516, | |
| "learning_rate": 5.02119540917691e-07, | |
| "loss": 0.5347244143486023, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 3.9511599511599513, | |
| "grad_norm": 2.0704898834228516, | |
| "learning_rate": 5.01926987084356e-07, | |
| "loss": 0.9426727294921875, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 3.9536019536019538, | |
| "grad_norm": 3.468090057373047, | |
| "learning_rate": 5.017435973722293e-07, | |
| "loss": 0.7870326042175293, | |
| "step": 3238 | |
| }, | |
| { | |
| "epoch": 3.956043956043956, | |
| "grad_norm": 2.9406344890594482, | |
| "learning_rate": 5.015693725288866e-07, | |
| "loss": 0.4789937436580658, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 3.9584859584859586, | |
| "grad_norm": 15.776670455932617, | |
| "learning_rate": 5.014043132645438e-07, | |
| "loss": 0.6635629534721375, | |
| "step": 3242 | |
| }, | |
| { | |
| "epoch": 3.960927960927961, | |
| "grad_norm": 4.5655083656311035, | |
| "learning_rate": 5.012484202520545e-07, | |
| "loss": 0.9738138914108276, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 3.9633699633699635, | |
| "grad_norm": 2.4571170806884766, | |
| "learning_rate": 5.01101694126906e-07, | |
| "loss": 0.5079742670059204, | |
| "step": 3246 | |
| }, | |
| { | |
| "epoch": 3.965811965811966, | |
| "grad_norm": 1.870768666267395, | |
| "learning_rate": 5.009641354872178e-07, | |
| "loss": 0.9230693578720093, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 3.9682539682539684, | |
| "grad_norm": 2.229893445968628, | |
| "learning_rate": 5.008357448937387e-07, | |
| "loss": 0.6680663228034973, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 3.970695970695971, | |
| "grad_norm": 25.037006378173828, | |
| "learning_rate": 5.007165228698442e-07, | |
| "loss": 0.4087255597114563, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 3.9731379731379732, | |
| "grad_norm": 2.2511398792266846, | |
| "learning_rate": 5.006064699015351e-07, | |
| "loss": 0.8908025622367859, | |
| "step": 3254 | |
| }, | |
| { | |
| "epoch": 3.9755799755799757, | |
| "grad_norm": 4.969597339630127, | |
| "learning_rate": 5.005055864374352e-07, | |
| "loss": 0.8304935693740845, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 3.978021978021978, | |
| "grad_norm": 2.5601906776428223, | |
| "learning_rate": 5.004138728887892e-07, | |
| "loss": 0.40299245715141296, | |
| "step": 3258 | |
| }, | |
| { | |
| "epoch": 3.9804639804639805, | |
| "grad_norm": 2.45926570892334, | |
| "learning_rate": 5.003313296294612e-07, | |
| "loss": 0.5143805146217346, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 3.982905982905983, | |
| "grad_norm": 1.9476388692855835, | |
| "learning_rate": 5.002579569959336e-07, | |
| "loss": 0.5361751914024353, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 3.9853479853479854, | |
| "grad_norm": 4.383269786834717, | |
| "learning_rate": 5.001937552873049e-07, | |
| "loss": 0.4276546835899353, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 3.987789987789988, | |
| "grad_norm": 4.4012627601623535, | |
| "learning_rate": 5.001387247652891e-07, | |
| "loss": 0.8529163002967834, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 3.9902319902319903, | |
| "grad_norm": 1.4377570152282715, | |
| "learning_rate": 5.000928656542145e-07, | |
| "loss": 0.9019818902015686, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 3.9926739926739927, | |
| "grad_norm": 2.091071605682373, | |
| "learning_rate": 5.000561781410232e-07, | |
| "loss": 0.6819381713867188, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 3.995115995115995, | |
| "grad_norm": 1.7002183198928833, | |
| "learning_rate": 5.000286623752688e-07, | |
| "loss": 0.9077348113059998, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 3.9975579975579976, | |
| "grad_norm": 1.8634474277496338, | |
| "learning_rate": 5.000103184691177e-07, | |
| "loss": 0.8196188807487488, | |
| "step": 3274 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 8.092415809631348, | |
| "learning_rate": 5.000011464973476e-07, | |
| "loss": 0.480937659740448, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 3276, | |
| "total_flos": 3.438047841308639e+18, | |
| "train_loss": 0.8972434957087567, | |
| "train_runtime": 10632.4216, | |
| "train_samples_per_second": 4.93, | |
| "train_steps_per_second": 0.308 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.438047841308639e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |