Instructions to use furproxy/27b-1-lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use furproxy/27b-1-lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/workspace/models/Qwen3.6-27B") model = PeftModel.from_pretrained(base_model, "furproxy/27b-1-lora") - Transformers
How to use furproxy/27b-1-lora with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="furproxy/27b-1-lora") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("furproxy/27b-1-lora", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/27b-1-lora with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/27b-1-lora" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/27b-1-lora", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/furproxy/27b-1-lora
- SGLang
How to use furproxy/27b-1-lora with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/27b-1-lora" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/27b-1-lora", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/27b-1-lora" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/27b-1-lora", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use furproxy/27b-1-lora with Docker Model Runner:
docker model run hf.co/furproxy/27b-1-lora
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1638, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003663003663003663, | |
| "grad_norm": 2.875383138656616, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 2.6876986026763916, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.007326007326007326, | |
| "grad_norm": 0.7562136650085449, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 1.6656783819198608, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01098901098901099, | |
| "grad_norm": 0.39378538727760315, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.8813486099243164, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.014652014652014652, | |
| "grad_norm": 0.1678856462240219, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 2.073314666748047, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.018315018315018316, | |
| "grad_norm": 0.16630569100379944, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 2.2603981494903564, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 1.2313497066497803, | |
| "learning_rate": 2.2e-06, | |
| "loss": 2.0936238765716553, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02564102564102564, | |
| "grad_norm": 0.6229072213172913, | |
| "learning_rate": 2.6e-06, | |
| "loss": 1.786569595336914, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.029304029304029304, | |
| "grad_norm": 0.21894435584545135, | |
| "learning_rate": 3e-06, | |
| "loss": 1.9302886724472046, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03296703296703297, | |
| "grad_norm": 0.8146782517433167, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 1.9158211946487427, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 0.139973446726799, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 1.801032543182373, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.040293040293040296, | |
| "grad_norm": 0.9421126842498779, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 1.4370536804199219, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 0.2687402665615082, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 1.680647611618042, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.047619047619047616, | |
| "grad_norm": 0.15750955045223236, | |
| "learning_rate": 5e-06, | |
| "loss": 1.6444307565689087, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 0.3872029483318329, | |
| "learning_rate": 5.400000000000001e-06, | |
| "loss": 1.9668141603469849, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.054945054945054944, | |
| "grad_norm": 0.7753072381019592, | |
| "learning_rate": 5.8e-06, | |
| "loss": 1.345158576965332, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05860805860805861, | |
| "grad_norm": 4.0286335945129395, | |
| "learning_rate": 6.200000000000001e-06, | |
| "loss": 1.2935595512390137, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06227106227106227, | |
| "grad_norm": 0.11804597079753876, | |
| "learning_rate": 6.600000000000001e-06, | |
| "loss": 1.310804843902588, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.06593406593406594, | |
| "grad_norm": 0.3004632592201233, | |
| "learning_rate": 7e-06, | |
| "loss": 1.6871081590652466, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0695970695970696, | |
| "grad_norm": 0.3273477554321289, | |
| "learning_rate": 7.4e-06, | |
| "loss": 1.4714224338531494, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 0.38304704427719116, | |
| "learning_rate": 7.800000000000002e-06, | |
| "loss": 1.5327234268188477, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 0.17035049200057983, | |
| "learning_rate": 8.2e-06, | |
| "loss": 1.2890535593032837, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.08058608058608059, | |
| "grad_norm": 0.18172013759613037, | |
| "learning_rate": 8.6e-06, | |
| "loss": 1.5969315767288208, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.08424908424908426, | |
| "grad_norm": 0.1807372272014618, | |
| "learning_rate": 9e-06, | |
| "loss": 1.6807602643966675, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.08791208791208792, | |
| "grad_norm": 0.2631019353866577, | |
| "learning_rate": 9.4e-06, | |
| "loss": 1.1396199464797974, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.09157509157509157, | |
| "grad_norm": 0.10772737115621567, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 1.576991319656372, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 0.1471075862646103, | |
| "learning_rate": 9.999991193950434e-06, | |
| "loss": 0.8256194591522217, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0989010989010989, | |
| "grad_norm": 2.466968059539795, | |
| "learning_rate": 9.999920745760685e-06, | |
| "loss": 1.1205874681472778, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 0.4292626678943634, | |
| "learning_rate": 9.99977985048407e-06, | |
| "loss": 1.397847294807434, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.10622710622710622, | |
| "grad_norm": 0.11925622820854187, | |
| "learning_rate": 9.999568510326332e-06, | |
| "loss": 1.2620929479599, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.08886076509952545, | |
| "learning_rate": 9.999286728596034e-06, | |
| "loss": 1.502614140510559, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11355311355311355, | |
| "grad_norm": 0.31627047061920166, | |
| "learning_rate": 9.998934509704524e-06, | |
| "loss": 1.5195817947387695, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.11721611721611722, | |
| "grad_norm": 0.12464763224124908, | |
| "learning_rate": 9.998511859165853e-06, | |
| "loss": 1.4700745344161987, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.12087912087912088, | |
| "grad_norm": 0.2327689528465271, | |
| "learning_rate": 9.998018783596694e-06, | |
| "loss": 1.4286034107208252, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.12454212454212454, | |
| "grad_norm": 0.14609983563423157, | |
| "learning_rate": 9.997455290716233e-06, | |
| "loss": 1.3966401815414429, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 0.4661511182785034, | |
| "learning_rate": 9.996821389346058e-06, | |
| "loss": 1.2351371049880981, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13186813186813187, | |
| "grad_norm": 0.15036970376968384, | |
| "learning_rate": 9.99611708941001e-06, | |
| "loss": 1.5122816562652588, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.13553113553113552, | |
| "grad_norm": 0.1798364669084549, | |
| "learning_rate": 9.995342401934034e-06, | |
| "loss": 1.6327002048492432, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1391941391941392, | |
| "grad_norm": 0.10958249866962433, | |
| "learning_rate": 9.994497339046004e-06, | |
| "loss": 1.124930739402771, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.09284580498933792, | |
| "learning_rate": 9.993581913975538e-06, | |
| "loss": 1.516735553741455, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 0.3077796399593353, | |
| "learning_rate": 9.99259614105378e-06, | |
| "loss": 1.620766043663025, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15018315018315018, | |
| "grad_norm": 0.1259474903345108, | |
| "learning_rate": 9.99154003571319e-06, | |
| "loss": 1.2517590522766113, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 0.10728312283754349, | |
| "learning_rate": 9.990413614487288e-06, | |
| "loss": 1.4343254566192627, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.1575091575091575, | |
| "grad_norm": 0.3186304569244385, | |
| "learning_rate": 9.989216895010406e-06, | |
| "loss": 1.5559083223342896, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.16117216117216118, | |
| "grad_norm": 0.47098618745803833, | |
| "learning_rate": 9.987949896017412e-06, | |
| "loss": 0.7234257459640503, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.16483516483516483, | |
| "grad_norm": 0.07171591371297836, | |
| "learning_rate": 9.986612637343402e-06, | |
| "loss": 0.920280396938324, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1684981684981685, | |
| "grad_norm": 0.15488727390766144, | |
| "learning_rate": 9.985205139923408e-06, | |
| "loss": 1.374828577041626, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.17216117216117216, | |
| "grad_norm": 0.17309579253196716, | |
| "learning_rate": 9.983727425792066e-06, | |
| "loss": 1.40683114528656, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.17582417582417584, | |
| "grad_norm": 0.47913244366645813, | |
| "learning_rate": 9.982179518083255e-06, | |
| "loss": 1.296237826347351, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.1794871794871795, | |
| "grad_norm": 0.2404627501964569, | |
| "learning_rate": 9.980561441029761e-06, | |
| "loss": 1.3930805921554565, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 0.273111492395401, | |
| "learning_rate": 9.978873219962874e-06, | |
| "loss": 1.218533992767334, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18681318681318682, | |
| "grad_norm": 0.524663507938385, | |
| "learning_rate": 9.977114881312008e-06, | |
| "loss": 1.3098607063293457, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 0.15053874254226685, | |
| "learning_rate": 9.975286452604275e-06, | |
| "loss": 1.0048173666000366, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.19413919413919414, | |
| "grad_norm": 0.3083300292491913, | |
| "learning_rate": 9.973387962464066e-06, | |
| "loss": 1.1155184507369995, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.1978021978021978, | |
| "grad_norm": 0.2675297260284424, | |
| "learning_rate": 9.971419440612591e-06, | |
| "loss": 1.3713957071304321, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.20146520146520147, | |
| "grad_norm": 0.12364854663610458, | |
| "learning_rate": 9.969380917867421e-06, | |
| "loss": 1.2191200256347656, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 0.09928097575902939, | |
| "learning_rate": 9.967272426142007e-06, | |
| "loss": 1.275339961051941, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2087912087912088, | |
| "grad_norm": 0.24085743725299835, | |
| "learning_rate": 9.965093998445174e-06, | |
| "loss": 0.6748014092445374, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.21245421245421245, | |
| "grad_norm": 0.3158385157585144, | |
| "learning_rate": 9.962845668880606e-06, | |
| "loss": 1.343500018119812, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.21611721611721613, | |
| "grad_norm": 0.18101683259010315, | |
| "learning_rate": 9.96052747264632e-06, | |
| "loss": 1.3496334552764893, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 0.22877711057662964, | |
| "learning_rate": 9.9581394460341e-06, | |
| "loss": 1.399277925491333, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.22344322344322345, | |
| "grad_norm": 0.1900375783443451, | |
| "learning_rate": 9.955681626428944e-06, | |
| "loss": 1.351351261138916, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2271062271062271, | |
| "grad_norm": 0.2790425419807434, | |
| "learning_rate": 9.95315405230847e-06, | |
| "loss": 1.0575695037841797, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 0.1452687829732895, | |
| "learning_rate": 9.950556763242316e-06, | |
| "loss": 0.9282295107841492, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.23443223443223443, | |
| "grad_norm": 0.4646700322628021, | |
| "learning_rate": 9.947889799891517e-06, | |
| "loss": 1.2313032150268555, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.23809523809523808, | |
| "grad_norm": 0.2979331612586975, | |
| "learning_rate": 9.94515320400788e-06, | |
| "loss": 1.3399178981781006, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.24175824175824176, | |
| "grad_norm": 0.13976508378982544, | |
| "learning_rate": 9.942347018433312e-06, | |
| "loss": 1.439223289489746, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2454212454212454, | |
| "grad_norm": 0.18163283169269562, | |
| "learning_rate": 9.939471287099167e-06, | |
| "loss": 1.3410747051239014, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2490842490842491, | |
| "grad_norm": 0.62637859582901, | |
| "learning_rate": 9.936526055025547e-06, | |
| "loss": 1.3174734115600586, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.25274725274725274, | |
| "grad_norm": 0.33270207047462463, | |
| "learning_rate": 9.933511368320602e-06, | |
| "loss": 1.1196776628494263, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 0.10809160023927689, | |
| "learning_rate": 9.930427274179808e-06, | |
| "loss": 1.3199713230133057, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2600732600732601, | |
| "grad_norm": 0.31561923027038574, | |
| "learning_rate": 9.927273820885223e-06, | |
| "loss": 1.3330121040344238, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.26373626373626374, | |
| "grad_norm": 0.15577837824821472, | |
| "learning_rate": 9.924051057804742e-06, | |
| "loss": 1.2973798513412476, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2673992673992674, | |
| "grad_norm": 0.2457117736339569, | |
| "learning_rate": 9.920759035391308e-06, | |
| "loss": 1.454565405845642, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.27106227106227104, | |
| "grad_norm": 0.8564723134040833, | |
| "learning_rate": 9.917397805182145e-06, | |
| "loss": 1.354149580001831, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.27472527472527475, | |
| "grad_norm": 0.11519961059093475, | |
| "learning_rate": 9.913967419797924e-06, | |
| "loss": 0.9727800488471985, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2783882783882784, | |
| "grad_norm": 0.253650039434433, | |
| "learning_rate": 9.910467932941962e-06, | |
| "loss": 0.6865445375442505, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.28205128205128205, | |
| "grad_norm": 0.2849763333797455, | |
| "learning_rate": 9.90689939939937e-06, | |
| "loss": 1.126017451286316, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.17028306424617767, | |
| "learning_rate": 9.903261875036192e-06, | |
| "loss": 1.3710747957229614, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2893772893772894, | |
| "grad_norm": 0.13653086125850677, | |
| "learning_rate": 9.899555416798546e-06, | |
| "loss": 1.3111331462860107, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 0.5475143790245056, | |
| "learning_rate": 9.895780082711717e-06, | |
| "loss": 0.9597386717796326, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2967032967032967, | |
| "grad_norm": 0.3660220503807068, | |
| "learning_rate": 9.891935931879252e-06, | |
| "loss": 1.5575504302978516, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.30036630036630035, | |
| "grad_norm": 0.12948466837406158, | |
| "learning_rate": 9.888023024482041e-06, | |
| "loss": 1.0819988250732422, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.304029304029304, | |
| "grad_norm": 0.5461969375610352, | |
| "learning_rate": 9.884041421777369e-06, | |
| "loss": 1.4256272315979004, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.16308943927288055, | |
| "learning_rate": 9.879991186097959e-06, | |
| "loss": 1.1550545692443848, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.31135531135531136, | |
| "grad_norm": 0.23025067150592804, | |
| "learning_rate": 9.875872380850992e-06, | |
| "loss": 1.2838108539581299, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.315018315018315, | |
| "grad_norm": 1.0842785835266113, | |
| "learning_rate": 9.871685070517124e-06, | |
| "loss": 1.027992606163025, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.31868131868131866, | |
| "grad_norm": 0.8840537071228027, | |
| "learning_rate": 9.86742932064947e-06, | |
| "loss": 0.8895283341407776, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.32234432234432236, | |
| "grad_norm": 0.4256756901741028, | |
| "learning_rate": 9.863105197872574e-06, | |
| "loss": 1.4210491180419922, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.326007326007326, | |
| "grad_norm": 0.32473987340927124, | |
| "learning_rate": 9.858712769881375e-06, | |
| "loss": 0.940653920173645, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 0.1946435272693634, | |
| "learning_rate": 9.854252105440142e-06, | |
| "loss": 1.523209810256958, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.12392517179250717, | |
| "learning_rate": 9.849723274381395e-06, | |
| "loss": 0.9991880059242249, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.336996336996337, | |
| "grad_norm": 0.18956027925014496, | |
| "learning_rate": 9.845126347604818e-06, | |
| "loss": 1.2698228359222412, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.34065934065934067, | |
| "grad_norm": 0.31590884923934937, | |
| "learning_rate": 9.840461397076147e-06, | |
| "loss": 1.3860504627227783, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.3443223443223443, | |
| "grad_norm": 0.11410943418741226, | |
| "learning_rate": 9.835728495826036e-06, | |
| "loss": 1.1887812614440918, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.34798534798534797, | |
| "grad_norm": 0.29438552260398865, | |
| "learning_rate": 9.830927717948929e-06, | |
| "loss": 1.294023036956787, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3516483516483517, | |
| "grad_norm": 1.1163926124572754, | |
| "learning_rate": 9.826059138601883e-06, | |
| "loss": 1.124396800994873, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3553113553113553, | |
| "grad_norm": 0.09197133034467697, | |
| "learning_rate": 9.821122834003407e-06, | |
| "loss": 1.27751624584198, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 0.23845773935317993, | |
| "learning_rate": 9.816118881432255e-06, | |
| "loss": 1.2824617624282837, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.3626373626373626, | |
| "grad_norm": 0.16290828585624695, | |
| "learning_rate": 9.811047359226224e-06, | |
| "loss": 0.8826183080673218, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 0.24791596829891205, | |
| "learning_rate": 9.805908346780929e-06, | |
| "loss": 1.044391393661499, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.36996336996337, | |
| "grad_norm": 0.2740170955657959, | |
| "learning_rate": 9.80070192454855e-06, | |
| "loss": 1.4561749696731567, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.37362637362637363, | |
| "grad_norm": 0.33053258061408997, | |
| "learning_rate": 9.795428174036591e-06, | |
| "loss": 1.2278764247894287, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.3772893772893773, | |
| "grad_norm": 0.7583060264587402, | |
| "learning_rate": 9.790087177806584e-06, | |
| "loss": 0.7968496084213257, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 0.2668805718421936, | |
| "learning_rate": 9.784679019472809e-06, | |
| "loss": 1.1589165925979614, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.21432484686374664, | |
| "learning_rate": 9.779203783700972e-06, | |
| "loss": 1.4328304529190063, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3882783882783883, | |
| "grad_norm": 0.191499263048172, | |
| "learning_rate": 9.773661556206903e-06, | |
| "loss": 1.0945113897323608, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.39194139194139194, | |
| "grad_norm": 0.15214745700359344, | |
| "learning_rate": 9.768052423755192e-06, | |
| "loss": 1.1581294536590576, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.3956043956043956, | |
| "grad_norm": 0.23848576843738556, | |
| "learning_rate": 9.762376474157839e-06, | |
| "loss": 1.2475342750549316, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3992673992673993, | |
| "grad_norm": 0.2269514501094818, | |
| "learning_rate": 9.756633796272876e-06, | |
| "loss": 1.2841179370880127, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 0.07938987016677856, | |
| "learning_rate": 9.750824480002982e-06, | |
| "loss": 0.623121976852417, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4065934065934066, | |
| "grad_norm": 0.3509514331817627, | |
| "learning_rate": 9.744948616294074e-06, | |
| "loss": 1.364533543586731, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 0.20469792187213898, | |
| "learning_rate": 9.739006297133878e-06, | |
| "loss": 1.0975794792175293, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4139194139194139, | |
| "grad_norm": 0.2600097358226776, | |
| "learning_rate": 9.732997615550495e-06, | |
| "loss": 1.2632966041564941, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4175824175824176, | |
| "grad_norm": 0.15840594470500946, | |
| "learning_rate": 9.726922665610935e-06, | |
| "loss": 1.3373838663101196, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.42124542124542125, | |
| "grad_norm": 0.43822696805000305, | |
| "learning_rate": 9.720781542419662e-06, | |
| "loss": 1.2531630992889404, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4249084249084249, | |
| "grad_norm": 0.5942100286483765, | |
| "learning_rate": 9.714574342117086e-06, | |
| "loss": 1.0207842588424683, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 0.23664861917495728, | |
| "learning_rate": 9.70830116187807e-06, | |
| "loss": 1.5113677978515625, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.43223443223443225, | |
| "grad_norm": 0.3284321427345276, | |
| "learning_rate": 9.701962099910407e-06, | |
| "loss": 1.0360337495803833, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.4358974358974359, | |
| "grad_norm": 0.2513348460197449, | |
| "learning_rate": 9.695557255453273e-06, | |
| "loss": 1.0973368883132935, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 0.45316001772880554, | |
| "learning_rate": 9.68908672877569e-06, | |
| "loss": 0.9152914881706238, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4432234432234432, | |
| "grad_norm": 0.2768547236919403, | |
| "learning_rate": 9.682550621174942e-06, | |
| "loss": 0.8826823830604553, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.4468864468864469, | |
| "grad_norm": 0.14853699505329132, | |
| "learning_rate": 9.675949034974992e-06, | |
| "loss": 0.5798932313919067, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.45054945054945056, | |
| "grad_norm": 0.1571403294801712, | |
| "learning_rate": 9.669282073524892e-06, | |
| "loss": 1.2800544500350952, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.4542124542124542, | |
| "grad_norm": 0.20789006352424622, | |
| "learning_rate": 9.662549841197148e-06, | |
| "loss": 0.893764853477478, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.45787545787545786, | |
| "grad_norm": 0.7506678104400635, | |
| "learning_rate": 9.655752443386092e-06, | |
| "loss": 1.2865655422210693, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.39902183413505554, | |
| "learning_rate": 9.64888998650624e-06, | |
| "loss": 1.1993688344955444, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.4652014652014652, | |
| "grad_norm": 0.3465142846107483, | |
| "learning_rate": 9.641962577990614e-06, | |
| "loss": 0.9851580262184143, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.46886446886446886, | |
| "grad_norm": 0.18256494402885437, | |
| "learning_rate": 9.634970326289071e-06, | |
| "loss": 1.2847747802734375, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.4725274725274725, | |
| "grad_norm": 0.24586841464042664, | |
| "learning_rate": 9.627913340866597e-06, | |
| "loss": 1.3066174983978271, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 0.11027955263853073, | |
| "learning_rate": 9.620791732201595e-06, | |
| "loss": 0.8039655685424805, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.47985347985347987, | |
| "grad_norm": 0.15749269723892212, | |
| "learning_rate": 9.613605611784158e-06, | |
| "loss": 1.1634037494659424, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.4835164835164835, | |
| "grad_norm": 0.23077067732810974, | |
| "learning_rate": 9.606355092114327e-06, | |
| "loss": 1.2528202533721924, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.48717948717948717, | |
| "grad_norm": 0.18674089014530182, | |
| "learning_rate": 9.599040286700317e-06, | |
| "loss": 1.5212275981903076, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.4908424908424908, | |
| "grad_norm": 0.4802699089050293, | |
| "learning_rate": 9.591661310056753e-06, | |
| "loss": 0.8288567662239075, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.4945054945054945, | |
| "grad_norm": 0.1448894888162613, | |
| "learning_rate": 9.58421827770287e-06, | |
| "loss": 1.2230876684188843, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4981684981684982, | |
| "grad_norm": 0.19190412759780884, | |
| "learning_rate": 9.57671130616071e-06, | |
| "loss": 0.9024039506912231, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5018315018315018, | |
| "grad_norm": 0.3073454797267914, | |
| "learning_rate": 9.569140512953296e-06, | |
| "loss": 1.2714391946792603, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5054945054945055, | |
| "grad_norm": 0.3199959993362427, | |
| "learning_rate": 9.561506016602782e-06, | |
| "loss": 0.8202919363975525, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5091575091575091, | |
| "grad_norm": 0.09401345998048782, | |
| "learning_rate": 9.553807936628617e-06, | |
| "loss": 0.8935064673423767, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.21345993876457214, | |
| "learning_rate": 9.546046393545655e-06, | |
| "loss": 1.2741483449935913, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5164835164835165, | |
| "grad_norm": 0.23345427215099335, | |
| "learning_rate": 9.538221508862284e-06, | |
| "loss": 1.2695109844207764, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5201465201465202, | |
| "grad_norm": 0.16931022703647614, | |
| "learning_rate": 9.530333405078512e-06, | |
| "loss": 1.274514079093933, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5238095238095238, | |
| "grad_norm": 0.33658501505851746, | |
| "learning_rate": 9.522382205684053e-06, | |
| "loss": 1.0144422054290771, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5274725274725275, | |
| "grad_norm": 0.21759743988513947, | |
| "learning_rate": 9.514368035156398e-06, | |
| "loss": 1.2731945514678955, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.5311355311355311, | |
| "grad_norm": 0.17717669904232025, | |
| "learning_rate": 9.506291018958857e-06, | |
| "loss": 1.2374247312545776, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5347985347985348, | |
| "grad_norm": 0.337706983089447, | |
| "learning_rate": 9.498151283538608e-06, | |
| "loss": 0.7559359669685364, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 0.035663675516843796, | |
| "learning_rate": 9.489948956324706e-06, | |
| "loss": 0.9581714868545532, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5421245421245421, | |
| "grad_norm": 0.12138810753822327, | |
| "learning_rate": 9.481684165726086e-06, | |
| "loss": 1.0345128774642944, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5457875457875457, | |
| "grad_norm": 0.39733827114105225, | |
| "learning_rate": 9.473357041129572e-06, | |
| "loss": 1.3242045640945435, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 0.16901174187660217, | |
| "learning_rate": 9.464967712897828e-06, | |
| "loss": 1.2276860475540161, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5531135531135531, | |
| "grad_norm": 0.5484493374824524, | |
| "learning_rate": 9.456516312367328e-06, | |
| "loss": 1.2076282501220703, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.5567765567765568, | |
| "grad_norm": 0.17032906413078308, | |
| "learning_rate": 9.448002971846307e-06, | |
| "loss": 0.9942311644554138, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.5604395604395604, | |
| "grad_norm": 0.24507595598697662, | |
| "learning_rate": 9.439427824612673e-06, | |
| "loss": 0.9752069115638733, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 0.40566012263298035, | |
| "learning_rate": 9.430791004911934e-06, | |
| "loss": 1.4564454555511475, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.5677655677655677, | |
| "grad_norm": 0.1568066030740738, | |
| "learning_rate": 9.42209264795509e-06, | |
| "loss": 1.0061030387878418, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.16984346508979797, | |
| "learning_rate": 9.41333288991652e-06, | |
| "loss": 1.2216694355010986, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.575091575091575, | |
| "grad_norm": 0.09158849716186523, | |
| "learning_rate": 9.404511867931847e-06, | |
| "loss": 1.1522339582443237, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.5787545787545788, | |
| "grad_norm": 0.16296543180942535, | |
| "learning_rate": 9.39562972009579e-06, | |
| "loss": 1.293960452079773, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.5824175824175825, | |
| "grad_norm": 0.24195973575115204, | |
| "learning_rate": 9.386686585460011e-06, | |
| "loss": 1.1431677341461182, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 0.1092909500002861, | |
| "learning_rate": 9.377682604030925e-06, | |
| "loss": 1.3567752838134766, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5897435897435898, | |
| "grad_norm": 0.1672687828540802, | |
| "learning_rate": 9.368617916767517e-06, | |
| "loss": 1.5480321645736694, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.5934065934065934, | |
| "grad_norm": 0.18804782629013062, | |
| "learning_rate": 9.359492665579136e-06, | |
| "loss": 1.2884105443954468, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.5970695970695971, | |
| "grad_norm": 0.2078697383403778, | |
| "learning_rate": 9.350306993323265e-06, | |
| "loss": 1.3802863359451294, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6007326007326007, | |
| "grad_norm": 0.16467250883579254, | |
| "learning_rate": 9.34106104380329e-06, | |
| "loss": 1.2509921789169312, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6043956043956044, | |
| "grad_norm": 0.46313583850860596, | |
| "learning_rate": 9.331754961766257e-06, | |
| "loss": 1.140839695930481, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.608058608058608, | |
| "grad_norm": 0.14376887679100037, | |
| "learning_rate": 9.322388892900587e-06, | |
| "loss": 1.201643943786621, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.6117216117216118, | |
| "grad_norm": 0.1362253874540329, | |
| "learning_rate": 9.312962983833815e-06, | |
| "loss": 1.3028783798217773, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 3.4290378093719482, | |
| "learning_rate": 9.303477382130278e-06, | |
| "loss": 0.973407506942749, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.6190476190476191, | |
| "grad_norm": 0.16140861809253693, | |
| "learning_rate": 9.293932236288816e-06, | |
| "loss": 1.2559469938278198, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 0.1613743007183075, | |
| "learning_rate": 9.284327695740441e-06, | |
| "loss": 1.256553292274475, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6263736263736264, | |
| "grad_norm": 0.34570202231407166, | |
| "learning_rate": 9.274663910846004e-06, | |
| "loss": 0.5801024436950684, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.63003663003663, | |
| "grad_norm": 0.28319358825683594, | |
| "learning_rate": 9.264941032893836e-06, | |
| "loss": 1.4648103713989258, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.6336996336996337, | |
| "grad_norm": 16.52604866027832, | |
| "learning_rate": 9.255159214097374e-06, | |
| "loss": 0.6978890895843506, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.6373626373626373, | |
| "grad_norm": 0.19958341121673584, | |
| "learning_rate": 9.245318607592795e-06, | |
| "loss": 1.1675150394439697, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.24133825302124023, | |
| "learning_rate": 9.235419367436602e-06, | |
| "loss": 0.8993176221847534, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6446886446886447, | |
| "grad_norm": 0.20524722337722778, | |
| "learning_rate": 9.225461648603223e-06, | |
| "loss": 0.9288710951805115, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.6483516483516484, | |
| "grad_norm": 0.391886830329895, | |
| "learning_rate": 9.215445606982573e-06, | |
| "loss": 0.9668469429016113, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.652014652014652, | |
| "grad_norm": 0.2540344297885895, | |
| "learning_rate": 9.205371399377628e-06, | |
| "loss": 1.1877306699752808, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.6556776556776557, | |
| "grad_norm": 0.21765393018722534, | |
| "learning_rate": 9.195239183501961e-06, | |
| "loss": 1.1672714948654175, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 0.19967345893383026, | |
| "learning_rate": 9.185049117977276e-06, | |
| "loss": 0.7011613845825195, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.663003663003663, | |
| "grad_norm": 0.7372376322746277, | |
| "learning_rate": 9.17480136233092e-06, | |
| "loss": 0.9566145539283752, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.20093770325183868, | |
| "learning_rate": 9.164496076993395e-06, | |
| "loss": 0.946535587310791, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.6703296703296703, | |
| "grad_norm": 0.2989659607410431, | |
| "learning_rate": 9.154133423295836e-06, | |
| "loss": 1.203826904296875, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.673992673992674, | |
| "grad_norm": 0.25106337666511536, | |
| "learning_rate": 9.143713563467495e-06, | |
| "loss": 1.0666961669921875, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.6776556776556777, | |
| "grad_norm": 0.11923722177743912, | |
| "learning_rate": 9.133236660633192e-06, | |
| "loss": 1.097327709197998, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6813186813186813, | |
| "grad_norm": 0.3943967819213867, | |
| "learning_rate": 9.12270287881077e-06, | |
| "loss": 1.2011562585830688, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.684981684981685, | |
| "grad_norm": 0.1692187637090683, | |
| "learning_rate": 9.112112382908516e-06, | |
| "loss": 1.2239218950271606, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.6886446886446886, | |
| "grad_norm": 0.10792715102434158, | |
| "learning_rate": 9.101465338722596e-06, | |
| "loss": 0.9010005593299866, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.1825140416622162, | |
| "learning_rate": 9.090761912934441e-06, | |
| "loss": 0.8389140367507935, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 0.28178316354751587, | |
| "learning_rate": 9.080002273108155e-06, | |
| "loss": 1.0628230571746826, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6996336996336996, | |
| "grad_norm": 0.11631765961647034, | |
| "learning_rate": 9.069186587687872e-06, | |
| "loss": 0.9880151152610779, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.7032967032967034, | |
| "grad_norm": 0.1353641152381897, | |
| "learning_rate": 9.058315025995142e-06, | |
| "loss": 1.2020447254180908, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.706959706959707, | |
| "grad_norm": 0.6966851949691772, | |
| "learning_rate": 9.047387758226261e-06, | |
| "loss": 1.1148114204406738, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.7106227106227107, | |
| "grad_norm": 0.08536599576473236, | |
| "learning_rate": 9.036404955449615e-06, | |
| "loss": 0.8987938165664673, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.164885014295578, | |
| "learning_rate": 9.025366789603002e-06, | |
| "loss": 1.0990866422653198, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 0.1607430875301361, | |
| "learning_rate": 9.014273433490938e-06, | |
| "loss": 1.1975574493408203, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.7216117216117216, | |
| "grad_norm": 0.3020445704460144, | |
| "learning_rate": 9.003125060781951e-06, | |
| "loss": 1.1362345218658447, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.7252747252747253, | |
| "grad_norm": 0.23285327851772308, | |
| "learning_rate": 8.99192184600587e-06, | |
| "loss": 1.264463186264038, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.7289377289377289, | |
| "grad_norm": 0.1471405327320099, | |
| "learning_rate": 8.98066396455108e-06, | |
| "loss": 0.9352213740348816, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 0.39496904611587524, | |
| "learning_rate": 8.969351592661787e-06, | |
| "loss": 0.8601157665252686, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7362637362637363, | |
| "grad_norm": 0.10789740085601807, | |
| "learning_rate": 8.957984907435254e-06, | |
| "loss": 1.2675104141235352, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.73992673992674, | |
| "grad_norm": 0.13899658620357513, | |
| "learning_rate": 8.946564086819025e-06, | |
| "loss": 0.8569284081459045, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.7435897435897436, | |
| "grad_norm": 0.2528247833251953, | |
| "learning_rate": 8.935089309608152e-06, | |
| "loss": 1.0413234233856201, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.7472527472527473, | |
| "grad_norm": 0.14703120291233063, | |
| "learning_rate": 8.92356075544238e-06, | |
| "loss": 1.0387818813323975, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.7509157509157509, | |
| "grad_norm": 0.39029836654663086, | |
| "learning_rate": 8.911978604803346e-06, | |
| "loss": 0.8937767744064331, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7545787545787546, | |
| "grad_norm": 0.23668618500232697, | |
| "learning_rate": 8.900343039011745e-06, | |
| "loss": 1.1923093795776367, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.7582417582417582, | |
| "grad_norm": 0.4645112454891205, | |
| "learning_rate": 8.888654240224503e-06, | |
| "loss": 1.0234112739562988, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 0.35793423652648926, | |
| "learning_rate": 8.876912391431913e-06, | |
| "loss": 1.2955764532089233, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.7655677655677655, | |
| "grad_norm": 0.09080661088228226, | |
| "learning_rate": 8.86511767645478e-06, | |
| "loss": 0.6778948903083801, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.5035378932952881, | |
| "learning_rate": 8.853270279941533e-06, | |
| "loss": 1.2608743906021118, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7728937728937729, | |
| "grad_norm": 0.48378103971481323, | |
| "learning_rate": 8.841370387365344e-06, | |
| "loss": 1.015937328338623, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.7765567765567766, | |
| "grad_norm": 0.18112313747406006, | |
| "learning_rate": 8.829418185021221e-06, | |
| "loss": 0.5042012929916382, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.7802197802197802, | |
| "grad_norm": 0.2163010537624359, | |
| "learning_rate": 8.817413860023089e-06, | |
| "loss": 0.8268504738807678, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.7838827838827839, | |
| "grad_norm": 0.2586314082145691, | |
| "learning_rate": 8.805357600300863e-06, | |
| "loss": 1.0975161790847778, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.7875457875457875, | |
| "grad_norm": 0.32366475462913513, | |
| "learning_rate": 8.793249594597508e-06, | |
| "loss": 1.2304267883300781, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7912087912087912, | |
| "grad_norm": 0.18010596930980682, | |
| "learning_rate": 8.781090032466079e-06, | |
| "loss": 1.3180345296859741, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.7948717948717948, | |
| "grad_norm": 0.15381111204624176, | |
| "learning_rate": 8.768879104266758e-06, | |
| "loss": 0.894809901714325, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.7985347985347986, | |
| "grad_norm": 0.3722904920578003, | |
| "learning_rate": 8.756617001163869e-06, | |
| "loss": 0.9750258326530457, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.8021978021978022, | |
| "grad_norm": 0.1223578080534935, | |
| "learning_rate": 8.744303915122895e-06, | |
| "loss": 0.8995143175125122, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 0.17111073434352875, | |
| "learning_rate": 8.73194003890746e-06, | |
| "loss": 1.3294800519943237, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8095238095238095, | |
| "grad_norm": 0.8301756978034973, | |
| "learning_rate": 8.719525566076322e-06, | |
| "loss": 1.2234307527542114, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.8131868131868132, | |
| "grad_norm": 0.26485782861709595, | |
| "learning_rate": 8.707060690980334e-06, | |
| "loss": 1.2229658365249634, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.8168498168498168, | |
| "grad_norm": 0.4809357225894928, | |
| "learning_rate": 8.69454560875941e-06, | |
| "loss": 1.2412751913070679, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 0.19155052304267883, | |
| "learning_rate": 8.681980515339464e-06, | |
| "loss": 1.2396912574768066, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.8241758241758241, | |
| "grad_norm": 0.11245301365852356, | |
| "learning_rate": 8.669365607429344e-06, | |
| "loss": 1.467288851737976, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8278388278388278, | |
| "grad_norm": 0.203065425157547, | |
| "learning_rate": 8.656701082517752e-06, | |
| "loss": 1.008663296699524, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.8315018315018315, | |
| "grad_norm": 0.0740152969956398, | |
| "learning_rate": 8.643987138870156e-06, | |
| "loss": 0.5013046860694885, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.8351648351648352, | |
| "grad_norm": 0.06140409782528877, | |
| "learning_rate": 8.631223975525683e-06, | |
| "loss": 0.9132590293884277, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.8388278388278388, | |
| "grad_norm": 0.7447589635848999, | |
| "learning_rate": 8.618411792293997e-06, | |
| "loss": 0.8399595618247986, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 0.43025752902030945, | |
| "learning_rate": 8.605550789752191e-06, | |
| "loss": 1.0485363006591797, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 0.1829681098461151, | |
| "learning_rate": 8.592641169241622e-06, | |
| "loss": 1.2453057765960693, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.8498168498168498, | |
| "grad_norm": 0.24352803826332092, | |
| "learning_rate": 8.579683132864769e-06, | |
| "loss": 1.193666696548462, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.8534798534798534, | |
| "grad_norm": 0.08289831876754761, | |
| "learning_rate": 8.56667688348208e-06, | |
| "loss": 1.2128486633300781, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.13429510593414307, | |
| "learning_rate": 8.553622624708778e-06, | |
| "loss": 0.8921034932136536, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.8608058608058609, | |
| "grad_norm": 0.2065141648054123, | |
| "learning_rate": 8.540520560911688e-06, | |
| "loss": 0.9356565475463867, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8644688644688645, | |
| "grad_norm": 0.8355104923248291, | |
| "learning_rate": 8.527370897206024e-06, | |
| "loss": 1.1900638341903687, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.8681318681318682, | |
| "grad_norm": 0.7807270884513855, | |
| "learning_rate": 8.514173839452194e-06, | |
| "loss": 0.9948893189430237, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 0.13567706942558289, | |
| "learning_rate": 8.50092959425256e-06, | |
| "loss": 1.1165426969528198, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.8754578754578755, | |
| "grad_norm": 0.2913936376571655, | |
| "learning_rate": 8.487638368948221e-06, | |
| "loss": 1.0576797723770142, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 0.502364993095398, | |
| "learning_rate": 8.47430037161575e-06, | |
| "loss": 1.0835438966751099, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8827838827838828, | |
| "grad_norm": 0.7570469379425049, | |
| "learning_rate": 8.460915811063952e-06, | |
| "loss": 1.204832673072815, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.8864468864468864, | |
| "grad_norm": 0.20631951093673706, | |
| "learning_rate": 8.447484896830581e-06, | |
| "loss": 1.2826550006866455, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.8901098901098901, | |
| "grad_norm": 0.18968936800956726, | |
| "learning_rate": 8.43400783917907e-06, | |
| "loss": 0.9170786142349243, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.8937728937728938, | |
| "grad_norm": 0.357719749212265, | |
| "learning_rate": 8.420484849095233e-06, | |
| "loss": 1.1806507110595703, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.10344009846448898, | |
| "learning_rate": 8.406916138283971e-06, | |
| "loss": 1.1227405071258545, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9010989010989011, | |
| "grad_norm": 0.18624161183834076, | |
| "learning_rate": 8.393301919165947e-06, | |
| "loss": 1.067802906036377, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.9047619047619048, | |
| "grad_norm": 0.23537158966064453, | |
| "learning_rate": 8.379642404874261e-06, | |
| "loss": 0.5906503796577454, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.9084249084249084, | |
| "grad_norm": 0.28865331411361694, | |
| "learning_rate": 8.365937809251124e-06, | |
| "loss": 1.2992898225784302, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.9120879120879121, | |
| "grad_norm": 0.25028523802757263, | |
| "learning_rate": 8.352188346844501e-06, | |
| "loss": 1.1510648727416992, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 0.16311851143836975, | |
| "learning_rate": 8.338394232904753e-06, | |
| "loss": 0.8221940398216248, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9194139194139194, | |
| "grad_norm": 0.040686819702386856, | |
| "learning_rate": 8.324555683381276e-06, | |
| "loss": 0.8739909529685974, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.22888082265853882, | |
| "learning_rate": 8.3106729149191e-06, | |
| "loss": 0.954814076423645, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.9267399267399268, | |
| "grad_norm": 0.1041514128446579, | |
| "learning_rate": 8.296746144855525e-06, | |
| "loss": 0.8583929538726807, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.9304029304029304, | |
| "grad_norm": 0.1827676147222519, | |
| "learning_rate": 8.282775591216691e-06, | |
| "loss": 1.1817222833633423, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.9340659340659341, | |
| "grad_norm": 0.21790483593940735, | |
| "learning_rate": 8.268761472714193e-06, | |
| "loss": 1.2396169900894165, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9377289377289377, | |
| "grad_norm": 0.16166400909423828, | |
| "learning_rate": 8.254704008741629e-06, | |
| "loss": 1.1866990327835083, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.9413919413919414, | |
| "grad_norm": 0.1379297822713852, | |
| "learning_rate": 8.240603419371181e-06, | |
| "loss": 0.9622292518615723, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.945054945054945, | |
| "grad_norm": 0.12670904397964478, | |
| "learning_rate": 8.22645992535017e-06, | |
| "loss": 1.1956757307052612, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.9487179487179487, | |
| "grad_norm": 0.09928123652935028, | |
| "learning_rate": 8.2122737480976e-06, | |
| "loss": 1.2315433025360107, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 0.3714980185031891, | |
| "learning_rate": 8.19804510970068e-06, | |
| "loss": 1.2325382232666016, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9560439560439561, | |
| "grad_norm": 0.1642584353685379, | |
| "learning_rate": 8.183774232911362e-06, | |
| "loss": 0.8959419131278992, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.9597069597069597, | |
| "grad_norm": 0.30108144879341125, | |
| "learning_rate": 8.169461341142848e-06, | |
| "loss": 1.0453133583068848, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.9633699633699634, | |
| "grad_norm": 0.2879306972026825, | |
| "learning_rate": 8.155106658466094e-06, | |
| "loss": 0.9845118522644043, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.967032967032967, | |
| "grad_norm": 0.32583609223365784, | |
| "learning_rate": 8.140710409606289e-06, | |
| "loss": 0.657010018825531, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.9706959706959707, | |
| "grad_norm": 0.19444482028484344, | |
| "learning_rate": 8.126272819939364e-06, | |
| "loss": 0.9151591062545776, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 1.4497026205062866, | |
| "learning_rate": 8.111794115488437e-06, | |
| "loss": 1.3719483613967896, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.978021978021978, | |
| "grad_norm": 0.6636638641357422, | |
| "learning_rate": 8.097274522920291e-06, | |
| "loss": 0.6158185601234436, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.9816849816849816, | |
| "grad_norm": 0.23120753467082977, | |
| "learning_rate": 8.082714269541814e-06, | |
| "loss": 0.8738659620285034, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.9853479853479854, | |
| "grad_norm": 0.21588478982448578, | |
| "learning_rate": 8.068113583296456e-06, | |
| "loss": 0.8082484602928162, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 0.37794405221939087, | |
| "learning_rate": 8.053472692760643e-06, | |
| "loss": 1.1449978351593018, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9926739926739927, | |
| "grad_norm": 0.21002766489982605, | |
| "learning_rate": 8.038791827140208e-06, | |
| "loss": 1.1861510276794434, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.9963369963369964, | |
| "grad_norm": 0.2881741523742676, | |
| "learning_rate": 8.0240712162668e-06, | |
| "loss": 1.2074682712554932, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.14409518241882324, | |
| "learning_rate": 8.009311090594297e-06, | |
| "loss": 1.3737009763717651, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.0036630036630036, | |
| "grad_norm": 0.3364971876144409, | |
| "learning_rate": 7.994511681195175e-06, | |
| "loss": 1.010398268699646, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.0073260073260073, | |
| "grad_norm": 0.20106230676174164, | |
| "learning_rate": 7.97967321975691e-06, | |
| "loss": 1.2091031074523926, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.010989010989011, | |
| "grad_norm": 0.3865948021411896, | |
| "learning_rate": 7.964795938578347e-06, | |
| "loss": 0.7033045887947083, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.0146520146520146, | |
| "grad_norm": 0.1542910784482956, | |
| "learning_rate": 7.949880070566058e-06, | |
| "loss": 1.161207914352417, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.0183150183150182, | |
| "grad_norm": 0.17765195667743683, | |
| "learning_rate": 7.9349258492307e-06, | |
| "loss": 1.050184726715088, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.021978021978022, | |
| "grad_norm": 0.13882611691951752, | |
| "learning_rate": 7.91993350868336e-06, | |
| "loss": 1.2430739402770996, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 0.16586051881313324, | |
| "learning_rate": 7.904903283631884e-06, | |
| "loss": 0.9440419673919678, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0293040293040292, | |
| "grad_norm": 0.19672122597694397, | |
| "learning_rate": 7.88983540937721e-06, | |
| "loss": 1.1903315782546997, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.032967032967033, | |
| "grad_norm": 0.23967498540878296, | |
| "learning_rate": 7.87473012180968e-06, | |
| "loss": 1.1820085048675537, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.0366300366300367, | |
| "grad_norm": 0.17814430594444275, | |
| "learning_rate": 7.859587657405353e-06, | |
| "loss": 1.196739912033081, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.0402930402930404, | |
| "grad_norm": 0.1445707082748413, | |
| "learning_rate": 7.84440825322229e-06, | |
| "loss": 1.084715723991394, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.043956043956044, | |
| "grad_norm": 0.18223969638347626, | |
| "learning_rate": 7.829192146896854e-06, | |
| "loss": 1.1364811658859253, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.0476190476190477, | |
| "grad_norm": 0.1871526837348938, | |
| "learning_rate": 7.813939576639993e-06, | |
| "loss": 1.2730778455734253, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.0512820512820513, | |
| "grad_norm": 0.48518720269203186, | |
| "learning_rate": 7.798650781233495e-06, | |
| "loss": 1.1072925329208374, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.054945054945055, | |
| "grad_norm": 0.2393021285533905, | |
| "learning_rate": 7.783326000026266e-06, | |
| "loss": 1.2872074842453003, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.0586080586080586, | |
| "grad_norm": 0.17235060036182404, | |
| "learning_rate": 7.767965472930575e-06, | |
| "loss": 1.2461888790130615, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.0622710622710623, | |
| "grad_norm": 0.501559853553772, | |
| "learning_rate": 7.752569440418297e-06, | |
| "loss": 1.2427866458892822, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.065934065934066, | |
| "grad_norm": 0.2508564889431, | |
| "learning_rate": 7.737138143517153e-06, | |
| "loss": 1.0125867128372192, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.0695970695970696, | |
| "grad_norm": 0.34929409623146057, | |
| "learning_rate": 7.721671823806934e-06, | |
| "loss": 1.1760741472244263, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.0732600732600732, | |
| "grad_norm": 0.11314375698566437, | |
| "learning_rate": 7.70617072341572e-06, | |
| "loss": 1.127938151359558, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 0.1307147890329361, | |
| "learning_rate": 7.690635085016087e-06, | |
| "loss": 0.8474472165107727, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.0805860805860805, | |
| "grad_norm": 0.19595587253570557, | |
| "learning_rate": 7.675065151821313e-06, | |
| "loss": 1.2290217876434326, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.0842490842490842, | |
| "grad_norm": 0.05816657096147537, | |
| "learning_rate": 7.659461167581564e-06, | |
| "loss": 1.065525770187378, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.0879120879120878, | |
| "grad_norm": 0.1757960468530655, | |
| "learning_rate": 7.643823376580087e-06, | |
| "loss": 1.0110828876495361, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.0915750915750915, | |
| "grad_norm": 0.4253121614456177, | |
| "learning_rate": 7.628152023629369e-06, | |
| "loss": 1.0302798748016357, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.0952380952380953, | |
| "grad_norm": 0.17801064252853394, | |
| "learning_rate": 7.61244735406733e-06, | |
| "loss": 1.1227004528045654, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.098901098901099, | |
| "grad_norm": 0.24273711442947388, | |
| "learning_rate": 7.596709613753457e-06, | |
| "loss": 1.1816527843475342, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1025641025641026, | |
| "grad_norm": 1.4160579442977905, | |
| "learning_rate": 7.5809390490649685e-06, | |
| "loss": 0.8195367455482483, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.1062271062271063, | |
| "grad_norm": 0.25498881936073303, | |
| "learning_rate": 7.565135906892954e-06, | |
| "loss": 0.9860736727714539, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.10989010989011, | |
| "grad_norm": 0.2312251329421997, | |
| "learning_rate": 7.549300434638515e-06, | |
| "loss": 0.9298585057258606, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.1135531135531136, | |
| "grad_norm": 0.40971049666404724, | |
| "learning_rate": 7.533432880208879e-06, | |
| "loss": 1.3407394886016846, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.1172161172161172, | |
| "grad_norm": 0.26371797919273376, | |
| "learning_rate": 7.517533492013527e-06, | |
| "loss": 0.7484307289123535, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.120879120879121, | |
| "grad_norm": 0.18549509346485138, | |
| "learning_rate": 7.501602518960308e-06, | |
| "loss": 1.2191801071166992, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.1245421245421245, | |
| "grad_norm": 0.2595532238483429, | |
| "learning_rate": 7.485640210451535e-06, | |
| "loss": 1.0103733539581299, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.1282051282051282, | |
| "grad_norm": 0.1361607164144516, | |
| "learning_rate": 7.469646816380085e-06, | |
| "loss": 1.1822372674942017, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.1318681318681318, | |
| "grad_norm": 0.36064237356185913, | |
| "learning_rate": 7.453622587125479e-06, | |
| "loss": 0.42253974080085754, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.1355311355311355, | |
| "grad_norm": 0.25480979681015015, | |
| "learning_rate": 7.437567773549976e-06, | |
| "loss": 1.1068378686904907, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1391941391941391, | |
| "grad_norm": 0.4690706133842468, | |
| "learning_rate": 7.421482626994635e-06, | |
| "loss": 0.7852658629417419, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.2576858401298523, | |
| "learning_rate": 7.405367399275384e-06, | |
| "loss": 1.1447125673294067, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.1465201465201464, | |
| "grad_norm": 0.1483878493309021, | |
| "learning_rate": 7.389222342679073e-06, | |
| "loss": 1.4121488332748413, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.15018315018315, | |
| "grad_norm": 0.4962548017501831, | |
| "learning_rate": 7.373047709959537e-06, | |
| "loss": 0.4587477743625641, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.05175771191716194, | |
| "learning_rate": 7.356843754333626e-06, | |
| "loss": 1.1379830837249756, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.1575091575091574, | |
| "grad_norm": 0.8406626582145691, | |
| "learning_rate": 7.340610729477242e-06, | |
| "loss": 1.1201821565628052, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.1611721611721613, | |
| "grad_norm": 0.25670814514160156, | |
| "learning_rate": 7.324348889521377e-06, | |
| "loss": 0.8717086315155029, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.164835164835165, | |
| "grad_norm": 0.15090753138065338, | |
| "learning_rate": 7.308058489048125e-06, | |
| "loss": 1.0203039646148682, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.1684981684981686, | |
| "grad_norm": 0.34527626633644104, | |
| "learning_rate": 7.291739783086701e-06, | |
| "loss": 1.2124344110488892, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.1721611721611722, | |
| "grad_norm": 0.05906078591942787, | |
| "learning_rate": 7.275393027109451e-06, | |
| "loss": 0.761792778968811, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.1758241758241759, | |
| "grad_norm": 0.22481679916381836, | |
| "learning_rate": 7.259018477027842e-06, | |
| "loss": 1.1472866535186768, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.1794871794871795, | |
| "grad_norm": 0.1917589157819748, | |
| "learning_rate": 7.242616389188472e-06, | |
| "loss": 1.1815375089645386, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.1831501831501832, | |
| "grad_norm": 0.1334036886692047, | |
| "learning_rate": 7.226187020369039e-06, | |
| "loss": 0.7848197817802429, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.1868131868131868, | |
| "grad_norm": 0.6945109367370605, | |
| "learning_rate": 7.209730627774333e-06, | |
| "loss": 0.93724524974823, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.1904761904761905, | |
| "grad_norm": 0.22903983294963837, | |
| "learning_rate": 7.193247469032209e-06, | |
| "loss": 1.1586498022079468, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.1941391941391941, | |
| "grad_norm": 0.1996290683746338, | |
| "learning_rate": 7.1767378021895464e-06, | |
| "loss": 0.6816765666007996, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.1978021978021978, | |
| "grad_norm": 0.3227924108505249, | |
| "learning_rate": 7.160201885708219e-06, | |
| "loss": 1.3321443796157837, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.2014652014652014, | |
| "grad_norm": 0.1688106805086136, | |
| "learning_rate": 7.143639978461038e-06, | |
| "loss": 0.8470932841300964, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.205128205128205, | |
| "grad_norm": 1.263344645500183, | |
| "learning_rate": 7.127052339727708e-06, | |
| "loss": 0.9178895950317383, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.2087912087912087, | |
| "grad_norm": 0.17517027258872986, | |
| "learning_rate": 7.110439229190762e-06, | |
| "loss": 1.1735132932662964, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2124542124542124, | |
| "grad_norm": 0.36022061109542847, | |
| "learning_rate": 7.093800906931505e-06, | |
| "loss": 1.0736725330352783, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.2161172161172162, | |
| "grad_norm": 0.14516492187976837, | |
| "learning_rate": 7.077137633425928e-06, | |
| "loss": 0.9138533473014832, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.2197802197802199, | |
| "grad_norm": 0.18955856561660767, | |
| "learning_rate": 7.060449669540646e-06, | |
| "loss": 0.8576375842094421, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.2234432234432235, | |
| "grad_norm": 1.3548957109451294, | |
| "learning_rate": 7.043737276528799e-06, | |
| "loss": 0.9260948896408081, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.2271062271062272, | |
| "grad_norm": 0.4962684214115143, | |
| "learning_rate": 7.027000716025975e-06, | |
| "loss": 0.710183322429657, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 0.1915455311536789, | |
| "learning_rate": 7.010240250046109e-06, | |
| "loss": 1.2020713090896606, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.2344322344322345, | |
| "grad_norm": 0.2438817024230957, | |
| "learning_rate": 6.9934561409773724e-06, | |
| "loss": 1.1766732931137085, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.2380952380952381, | |
| "grad_norm": 0.1694273203611374, | |
| "learning_rate": 6.976648651578087e-06, | |
| "loss": 1.1996291875839233, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.2417582417582418, | |
| "grad_norm": 0.16501298546791077, | |
| "learning_rate": 6.959818044972585e-06, | |
| "loss": 0.7851068377494812, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.2454212454212454, | |
| "grad_norm": 0.13900168240070343, | |
| "learning_rate": 6.942964584647109e-06, | |
| "loss": 0.8421606421470642, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.249084249084249, | |
| "grad_norm": 0.9977712035179138, | |
| "learning_rate": 6.926088534445682e-06, | |
| "loss": 1.2277159690856934, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.2527472527472527, | |
| "grad_norm": 0.13499097526073456, | |
| "learning_rate": 6.909190158565973e-06, | |
| "loss": 1.1799771785736084, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.2564102564102564, | |
| "grad_norm": 0.11800684034824371, | |
| "learning_rate": 6.892269721555161e-06, | |
| "loss": 0.8362367153167725, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.26007326007326, | |
| "grad_norm": 0.23727478086948395, | |
| "learning_rate": 6.875327488305805e-06, | |
| "loss": 1.1368072032928467, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.2637362637362637, | |
| "grad_norm": 0.6073961853981018, | |
| "learning_rate": 6.858363724051678e-06, | |
| "loss": 1.1791174411773682, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.2673992673992673, | |
| "grad_norm": 0.23184753954410553, | |
| "learning_rate": 6.841378694363631e-06, | |
| "loss": 1.2035536766052246, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.271062271062271, | |
| "grad_norm": 0.17120184004306793, | |
| "learning_rate": 6.824372665145424e-06, | |
| "loss": 0.9986141324043274, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.2747252747252746, | |
| "grad_norm": 0.279720276594162, | |
| "learning_rate": 6.80734590262958e-06, | |
| "loss": 1.0483033657073975, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.2783882783882783, | |
| "grad_norm": 0.16889701783657074, | |
| "learning_rate": 6.79029867337319e-06, | |
| "loss": 0.9515765905380249, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 0.06983523815870285, | |
| "learning_rate": 6.773231244253766e-06, | |
| "loss": 0.6616621017456055, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.2857142857142856, | |
| "grad_norm": 0.26745325326919556, | |
| "learning_rate": 6.756143882465051e-06, | |
| "loss": 0.552936851978302, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.2893772893772895, | |
| "grad_norm": 1.2814058065414429, | |
| "learning_rate": 6.739036855512835e-06, | |
| "loss": 1.3208832740783691, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.293040293040293, | |
| "grad_norm": 0.24780850112438202, | |
| "learning_rate": 6.721910431210771e-06, | |
| "loss": 1.0253862142562866, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.2967032967032968, | |
| "grad_norm": 0.07103469967842102, | |
| "learning_rate": 6.704764877676181e-06, | |
| "loss": 0.9762220978736877, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.3003663003663004, | |
| "grad_norm": 0.1995328813791275, | |
| "learning_rate": 6.687600463325859e-06, | |
| "loss": 0.5912091732025146, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.304029304029304, | |
| "grad_norm": 0.3986748456954956, | |
| "learning_rate": 6.670417456871871e-06, | |
| "loss": 0.9025965929031372, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.3076923076923077, | |
| "grad_norm": 0.12167935073375702, | |
| "learning_rate": 6.653216127317338e-06, | |
| "loss": 1.2370021343231201, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.3113553113553114, | |
| "grad_norm": 1.2277730703353882, | |
| "learning_rate": 6.635996743952242e-06, | |
| "loss": 1.1707024574279785, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.315018315018315, | |
| "grad_norm": 0.1129893958568573, | |
| "learning_rate": 6.618759576349196e-06, | |
| "loss": 0.9717994928359985, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.3186813186813187, | |
| "grad_norm": 0.11017405986785889, | |
| "learning_rate": 6.601504894359227e-06, | |
| "loss": 0.943675696849823, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.3223443223443223, | |
| "grad_norm": 0.1213424950838089, | |
| "learning_rate": 6.584232968107557e-06, | |
| "loss": 0.9619688987731934, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.326007326007326, | |
| "grad_norm": 0.46485310792922974, | |
| "learning_rate": 6.566944067989366e-06, | |
| "loss": 1.2262362241744995, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.3296703296703296, | |
| "grad_norm": 0.16346322000026703, | |
| "learning_rate": 6.549638464665566e-06, | |
| "loss": 1.1035256385803223, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.8575840592384338, | |
| "learning_rate": 6.532316429058562e-06, | |
| "loss": 1.1614726781845093, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.3369963369963371, | |
| "grad_norm": 0.2909935712814331, | |
| "learning_rate": 6.514978232348003e-06, | |
| "loss": 0.765929639339447, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.3406593406593408, | |
| "grad_norm": 0.27958768606185913, | |
| "learning_rate": 6.497624145966549e-06, | |
| "loss": 0.8523128032684326, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.3443223443223444, | |
| "grad_norm": 0.05668744817376137, | |
| "learning_rate": 6.480254441595615e-06, | |
| "loss": 0.635466456413269, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.347985347985348, | |
| "grad_norm": 0.243035688996315, | |
| "learning_rate": 6.462869391161116e-06, | |
| "loss": 1.1623685359954834, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.3516483516483517, | |
| "grad_norm": 0.18739135563373566, | |
| "learning_rate": 6.445469266829214e-06, | |
| "loss": 0.7601761817932129, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.3553113553113554, | |
| "grad_norm": 0.18719348311424255, | |
| "learning_rate": 6.428054341002058e-06, | |
| "loss": 1.0253567695617676, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.358974358974359, | |
| "grad_norm": 0.44151896238327026, | |
| "learning_rate": 6.41062488631351e-06, | |
| "loss": 0.6381222605705261, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.3626373626373627, | |
| "grad_norm": 0.15360459685325623, | |
| "learning_rate": 6.393181175624893e-06, | |
| "loss": 1.2534339427947998, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.3663003663003663, | |
| "grad_norm": 0.1375494748353958, | |
| "learning_rate": 6.375723482020702e-06, | |
| "loss": 1.0938211679458618, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.36996336996337, | |
| "grad_norm": 0.2471369057893753, | |
| "learning_rate": 6.3582520788043465e-06, | |
| "loss": 1.0957386493682861, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.3736263736263736, | |
| "grad_norm": 0.4160969853401184, | |
| "learning_rate": 6.340767239493851e-06, | |
| "loss": 0.8028813600540161, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.3772893772893773, | |
| "grad_norm": 0.1555003523826599, | |
| "learning_rate": 6.323269237817595e-06, | |
| "loss": 1.1485873460769653, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.380952380952381, | |
| "grad_norm": 0.15514326095581055, | |
| "learning_rate": 6.3057583477100114e-06, | |
| "loss": 0.9761220812797546, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 0.2655651867389679, | |
| "learning_rate": 6.288234843307304e-06, | |
| "loss": 1.3567599058151245, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.3882783882783882, | |
| "grad_norm": 0.22227928042411804, | |
| "learning_rate": 6.270698998943158e-06, | |
| "loss": 1.2285981178283691, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.3919413919413919, | |
| "grad_norm": 0.08404932916164398, | |
| "learning_rate": 6.253151089144443e-06, | |
| "loss": 1.1724284887313843, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.3956043956043955, | |
| "grad_norm": 0.270246684551239, | |
| "learning_rate": 6.235591388626916e-06, | |
| "loss": 1.1640665531158447, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.3992673992673992, | |
| "grad_norm": 0.22832772135734558, | |
| "learning_rate": 6.218020172290912e-06, | |
| "loss": 0.7001198530197144, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.4029304029304028, | |
| "grad_norm": 0.12530791759490967, | |
| "learning_rate": 6.2004377152170595e-06, | |
| "loss": 0.9129507541656494, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.4065934065934065, | |
| "grad_norm": 1.337783932685852, | |
| "learning_rate": 6.182844292661955e-06, | |
| "loss": 0.947498619556427, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.4102564102564101, | |
| "grad_norm": 0.20773783326148987, | |
| "learning_rate": 6.165240180053864e-06, | |
| "loss": 1.2057294845581055, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.4139194139194138, | |
| "grad_norm": 0.21221360564231873, | |
| "learning_rate": 6.147625652988409e-06, | |
| "loss": 1.2334250211715698, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.4175824175824177, | |
| "grad_norm": 0.2252884805202484, | |
| "learning_rate": 6.130000987224252e-06, | |
| "loss": 1.0532145500183105, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.4212454212454213, | |
| "grad_norm": 0.1621122807264328, | |
| "learning_rate": 6.11236645867877e-06, | |
| "loss": 1.1797196865081787, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.424908424908425, | |
| "grad_norm": 0.4637870192527771, | |
| "learning_rate": 6.09472234342376e-06, | |
| "loss": 0.8449276685714722, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.16811135411262512, | |
| "learning_rate": 6.077068917681085e-06, | |
| "loss": 1.383507490158081, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.4322344322344323, | |
| "grad_norm": 0.5571224093437195, | |
| "learning_rate": 6.059406457818372e-06, | |
| "loss": 1.2903873920440674, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "grad_norm": 0.19635361433029175, | |
| "learning_rate": 6.0417352403446815e-06, | |
| "loss": 1.178612232208252, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.4395604395604396, | |
| "grad_norm": 0.5237776637077332, | |
| "learning_rate": 6.024055541906171e-06, | |
| "loss": 1.0071418285369873, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.4432234432234432, | |
| "grad_norm": 0.22411468625068665, | |
| "learning_rate": 6.006367639281773e-06, | |
| "loss": 1.157625436782837, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.4468864468864469, | |
| "grad_norm": 0.17982420325279236, | |
| "learning_rate": 5.988671809378851e-06, | |
| "loss": 0.7583225965499878, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.4505494505494505, | |
| "grad_norm": 0.16371974349021912, | |
| "learning_rate": 5.970968329228884e-06, | |
| "loss": 0.9400377869606018, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.4542124542124542, | |
| "grad_norm": 0.27952075004577637, | |
| "learning_rate": 5.953257475983104e-06, | |
| "loss": 0.818259060382843, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.4578754578754578, | |
| "grad_norm": 0.10578649491071701, | |
| "learning_rate": 5.935539526908178e-06, | |
| "loss": 1.1710015535354614, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.4615384615384617, | |
| "grad_norm": 0.18775279819965363, | |
| "learning_rate": 5.917814759381857e-06, | |
| "loss": 0.811826765537262, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.4652014652014653, | |
| "grad_norm": 0.6075829267501831, | |
| "learning_rate": 5.900083450888636e-06, | |
| "loss": 1.0531878471374512, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.468864468864469, | |
| "grad_norm": 0.47543665766716003, | |
| "learning_rate": 5.882345879015412e-06, | |
| "loss": 1.5944072008132935, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.4725274725274726, | |
| "grad_norm": 0.19731402397155762, | |
| "learning_rate": 5.864602321447133e-06, | |
| "loss": 1.0520607233047485, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.4761904761904763, | |
| "grad_norm": 0.7726924419403076, | |
| "learning_rate": 5.846853055962456e-06, | |
| "loss": 1.2063556909561157, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.47985347985348, | |
| "grad_norm": 0.15811191499233246, | |
| "learning_rate": 5.829098360429397e-06, | |
| "loss": 1.1925911903381348, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.4835164835164836, | |
| "grad_norm": 0.4315653145313263, | |
| "learning_rate": 5.811338512800983e-06, | |
| "loss": 1.0306977033615112, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.4871794871794872, | |
| "grad_norm": 0.16353972256183624, | |
| "learning_rate": 5.793573791110888e-06, | |
| "loss": 0.8834646940231323, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.4908424908424909, | |
| "grad_norm": 0.13097569346427917, | |
| "learning_rate": 5.775804473469104e-06, | |
| "loss": 1.2225075960159302, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.4945054945054945, | |
| "grad_norm": 0.16530992090702057, | |
| "learning_rate": 5.758030838057562e-06, | |
| "loss": 1.1620936393737793, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.4981684981684982, | |
| "grad_norm": 0.06754113733768463, | |
| "learning_rate": 5.7402531631257975e-06, | |
| "loss": 0.43427881598472595, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.5018315018315018, | |
| "grad_norm": 0.3140871226787567, | |
| "learning_rate": 5.722471726986577e-06, | |
| "loss": 1.1740379333496094, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.5054945054945055, | |
| "grad_norm": 1.0577014684677124, | |
| "learning_rate": 5.7046868080115554e-06, | |
| "loss": 0.9447726011276245, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.5091575091575091, | |
| "grad_norm": 0.18314354121685028, | |
| "learning_rate": 5.686898684626909e-06, | |
| "loss": 0.8410064578056335, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.5128205128205128, | |
| "grad_norm": 0.41597187519073486, | |
| "learning_rate": 5.6691076353089836e-06, | |
| "loss": 1.0812100172042847, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.5164835164835164, | |
| "grad_norm": 0.6020085215568542, | |
| "learning_rate": 5.651313938579925e-06, | |
| "loss": 1.0242727994918823, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.52014652014652, | |
| "grad_norm": 0.23912610113620758, | |
| "learning_rate": 5.633517873003329e-06, | |
| "loss": 1.0295336246490479, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.5238095238095237, | |
| "grad_norm": 0.18377065658569336, | |
| "learning_rate": 5.615719717179877e-06, | |
| "loss": 1.2712222337722778, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.5274725274725274, | |
| "grad_norm": 0.41018202900886536, | |
| "learning_rate": 5.59791974974297e-06, | |
| "loss": 0.825635552406311, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.531135531135531, | |
| "grad_norm": 0.17661726474761963, | |
| "learning_rate": 5.580118249354371e-06, | |
| "loss": 1.1780991554260254, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.5347985347985347, | |
| "grad_norm": 0.2789783477783203, | |
| "learning_rate": 5.562315494699845e-06, | |
| "loss": 1.231903314590454, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.38799595832824707, | |
| "learning_rate": 5.544511764484788e-06, | |
| "loss": 1.0772476196289062, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.542124542124542, | |
| "grad_norm": 0.12171674519777298, | |
| "learning_rate": 5.526707337429871e-06, | |
| "loss": 0.896016001701355, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.5457875457875456, | |
| "grad_norm": 2.428612232208252, | |
| "learning_rate": 5.508902492266676e-06, | |
| "loss": 1.067119836807251, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.5494505494505495, | |
| "grad_norm": 0.12386277318000793, | |
| "learning_rate": 5.491097507733326e-06, | |
| "loss": 1.2031583786010742, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.5531135531135531, | |
| "grad_norm": 0.6815460920333862, | |
| "learning_rate": 5.473292662570131e-06, | |
| "loss": 0.5977136492729187, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.5567765567765568, | |
| "grad_norm": 0.2659470736980438, | |
| "learning_rate": 5.455488235515214e-06, | |
| "loss": 1.0548949241638184, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.5604395604395604, | |
| "grad_norm": 0.14139074087142944, | |
| "learning_rate": 5.4376845053001585e-06, | |
| "loss": 1.2025469541549683, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.564102564102564, | |
| "grad_norm": 0.3239622414112091, | |
| "learning_rate": 5.41988175064563e-06, | |
| "loss": 0.8870663642883301, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.5677655677655677, | |
| "grad_norm": 0.1715698391199112, | |
| "learning_rate": 5.402080250257031e-06, | |
| "loss": 0.8264884948730469, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.5714285714285714, | |
| "grad_norm": 0.3756884038448334, | |
| "learning_rate": 5.384280282820126e-06, | |
| "loss": 0.974433422088623, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.575091575091575, | |
| "grad_norm": 0.21274326741695404, | |
| "learning_rate": 5.3664821269966714e-06, | |
| "loss": 0.7866367101669312, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.578754578754579, | |
| "grad_norm": 0.16765311360359192, | |
| "learning_rate": 5.348686061420078e-06, | |
| "loss": 1.1772246360778809, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.5824175824175826, | |
| "grad_norm": 0.31508204340934753, | |
| "learning_rate": 5.330892364691018e-06, | |
| "loss": 0.9374992251396179, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.5860805860805862, | |
| "grad_norm": 0.19520021975040436, | |
| "learning_rate": 5.3131013153730916e-06, | |
| "loss": 1.2052057981491089, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.5897435897435899, | |
| "grad_norm": 0.13115696609020233, | |
| "learning_rate": 5.295313191988447e-06, | |
| "loss": 1.2084887027740479, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.5934065934065935, | |
| "grad_norm": 0.7018041014671326, | |
| "learning_rate": 5.277528273013425e-06, | |
| "loss": 0.609009325504303, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.5970695970695972, | |
| "grad_norm": 0.16342367231845856, | |
| "learning_rate": 5.259746836874203e-06, | |
| "loss": 1.2070071697235107, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.6007326007326008, | |
| "grad_norm": 0.14944781363010406, | |
| "learning_rate": 5.2419691619424396e-06, | |
| "loss": 1.091475486755371, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.6043956043956045, | |
| "grad_norm": 0.3255991041660309, | |
| "learning_rate": 5.224195526530897e-06, | |
| "loss": 0.8270645141601562, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.6080586080586081, | |
| "grad_norm": 0.3492541015148163, | |
| "learning_rate": 5.206426208889113e-06, | |
| "loss": 1.054788589477539, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.6117216117216118, | |
| "grad_norm": 0.1763620376586914, | |
| "learning_rate": 5.18866148719902e-06, | |
| "loss": 0.9270405769348145, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.6153846153846154, | |
| "grad_norm": 0.37243205308914185, | |
| "learning_rate": 5.170901639570605e-06, | |
| "loss": 1.163893222808838, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.619047619047619, | |
| "grad_norm": 0.20206376910209656, | |
| "learning_rate": 5.153146944037545e-06, | |
| "loss": 0.978087842464447, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.6227106227106227, | |
| "grad_norm": 0.1183437779545784, | |
| "learning_rate": 5.135397678552869e-06, | |
| "loss": 1.1707783937454224, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.6263736263736264, | |
| "grad_norm": 0.19354073703289032, | |
| "learning_rate": 5.11765412098459e-06, | |
| "loss": 0.8640797138214111, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.63003663003663, | |
| "grad_norm": 0.13133522868156433, | |
| "learning_rate": 5.099916549111365e-06, | |
| "loss": 1.2608891725540161, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.6336996336996337, | |
| "grad_norm": 0.12352308630943298, | |
| "learning_rate": 5.082185240618146e-06, | |
| "loss": 0.9242100715637207, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.6373626373626373, | |
| "grad_norm": 0.20042523741722107, | |
| "learning_rate": 5.064460473091823e-06, | |
| "loss": 1.2130396366119385, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "grad_norm": 0.5343178510665894, | |
| "learning_rate": 5.046742524016899e-06, | |
| "loss": 0.7868685722351074, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.6446886446886446, | |
| "grad_norm": 0.22285579144954681, | |
| "learning_rate": 5.029031670771119e-06, | |
| "loss": 1.1629694700241089, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.6483516483516483, | |
| "grad_norm": 0.05358535423874855, | |
| "learning_rate": 5.0113281906211485e-06, | |
| "loss": 0.8543750643730164, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.652014652014652, | |
| "grad_norm": 0.51787930727005, | |
| "learning_rate": 4.99363236071823e-06, | |
| "loss": 0.9102002382278442, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.6556776556776556, | |
| "grad_norm": 0.15900075435638428, | |
| "learning_rate": 4.975944458093831e-06, | |
| "loss": 1.042647361755371, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.6593406593406592, | |
| "grad_norm": 0.7878313064575195, | |
| "learning_rate": 4.958264759655319e-06, | |
| "loss": 1.166403889656067, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.6630036630036629, | |
| "grad_norm": 0.25159403681755066, | |
| "learning_rate": 4.940593542181629e-06, | |
| "loss": 0.7760780453681946, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.43353399634361267, | |
| "learning_rate": 4.922931082318917e-06, | |
| "loss": 1.2228432893753052, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.6703296703296702, | |
| "grad_norm": 0.13905330002307892, | |
| "learning_rate": 4.905277656576243e-06, | |
| "loss": 1.241356611251831, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.673992673992674, | |
| "grad_norm": 0.14692410826683044, | |
| "learning_rate": 4.8876335413212305e-06, | |
| "loss": 1.1272119283676147, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.6776556776556777, | |
| "grad_norm": 0.2749308943748474, | |
| "learning_rate": 4.86999901277575e-06, | |
| "loss": 1.240549921989441, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.6813186813186813, | |
| "grad_norm": 0.2792946994304657, | |
| "learning_rate": 4.852374347011591e-06, | |
| "loss": 0.8525235056877136, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.684981684981685, | |
| "grad_norm": 0.10312946885824203, | |
| "learning_rate": 4.834759819946137e-06, | |
| "loss": 1.2491165399551392, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.6886446886446886, | |
| "grad_norm": 0.18224607408046722, | |
| "learning_rate": 4.817155707338048e-06, | |
| "loss": 1.0514307022094727, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 0.1929437667131424, | |
| "learning_rate": 4.799562284782944e-06, | |
| "loss": 0.8574016690254211, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.695970695970696, | |
| "grad_norm": 3.134303092956543, | |
| "learning_rate": 4.78197982770909e-06, | |
| "loss": 1.052782654762268, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.6996336996336996, | |
| "grad_norm": 0.31613659858703613, | |
| "learning_rate": 4.7644086113730855e-06, | |
| "loss": 0.8344395756721497, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.7032967032967035, | |
| "grad_norm": 0.03118388168513775, | |
| "learning_rate": 4.746848910855558e-06, | |
| "loss": 0.9363417625427246, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.7069597069597071, | |
| "grad_norm": 0.22982323169708252, | |
| "learning_rate": 4.729301001056842e-06, | |
| "loss": 0.959007978439331, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.7106227106227108, | |
| "grad_norm": 0.20650729537010193, | |
| "learning_rate": 4.711765156692697e-06, | |
| "loss": 1.4183884859085083, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.29698464274406433, | |
| "learning_rate": 4.694241652289992e-06, | |
| "loss": 1.221863865852356, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.717948717948718, | |
| "grad_norm": 0.23697128891944885, | |
| "learning_rate": 4.676730762182407e-06, | |
| "loss": 0.47039785981178284, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.7216117216117217, | |
| "grad_norm": 0.16988730430603027, | |
| "learning_rate": 4.659232760506149e-06, | |
| "loss": 0.6852482557296753, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.7252747252747254, | |
| "grad_norm": 0.28183671832084656, | |
| "learning_rate": 4.641747921195657e-06, | |
| "loss": 1.152092456817627, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.728937728937729, | |
| "grad_norm": 0.614911675453186, | |
| "learning_rate": 4.624276517979298e-06, | |
| "loss": 0.9434917569160461, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.7326007326007327, | |
| "grad_norm": 0.19769613444805145, | |
| "learning_rate": 4.606818824375109e-06, | |
| "loss": 1.1814640760421753, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.7362637362637363, | |
| "grad_norm": 0.2297975867986679, | |
| "learning_rate": 4.589375113686492e-06, | |
| "loss": 1.1364959478378296, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.73992673992674, | |
| "grad_norm": 0.14440670609474182, | |
| "learning_rate": 4.571945658997944e-06, | |
| "loss": 1.0138092041015625, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.7435897435897436, | |
| "grad_norm": 0.0975588783621788, | |
| "learning_rate": 4.554530733170788e-06, | |
| "loss": 0.9809228181838989, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.7472527472527473, | |
| "grad_norm": 0.14117704331874847, | |
| "learning_rate": 4.5371306088388856e-06, | |
| "loss": 1.239669680595398, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.750915750915751, | |
| "grad_norm": 0.228751540184021, | |
| "learning_rate": 4.519745558404387e-06, | |
| "loss": 0.8224953413009644, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.7545787545787546, | |
| "grad_norm": 0.7675461173057556, | |
| "learning_rate": 4.502375854033453e-06, | |
| "loss": 1.2402071952819824, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.7582417582417582, | |
| "grad_norm": 0.11008962988853455, | |
| "learning_rate": 4.4850217676519995e-06, | |
| "loss": 0.5849726796150208, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.7619047619047619, | |
| "grad_norm": 0.3784470558166504, | |
| "learning_rate": 4.46768357094144e-06, | |
| "loss": 1.023523211479187, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.7655677655677655, | |
| "grad_norm": 0.16590407490730286, | |
| "learning_rate": 4.4503615353344346e-06, | |
| "loss": 1.091076135635376, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.7692307692307692, | |
| "grad_norm": 0.16390341520309448, | |
| "learning_rate": 4.433055932010635e-06, | |
| "loss": 1.2073513269424438, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.7728937728937728, | |
| "grad_norm": 1.0820486545562744, | |
| "learning_rate": 4.4157670318924454e-06, | |
| "loss": 0.5969150066375732, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.7765567765567765, | |
| "grad_norm": 0.16822820901870728, | |
| "learning_rate": 4.398495105640774e-06, | |
| "loss": 0.8644286394119263, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.7802197802197801, | |
| "grad_norm": 0.2925519645214081, | |
| "learning_rate": 4.381240423650805e-06, | |
| "loss": 0.9442048072814941, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.7838827838827838, | |
| "grad_norm": 0.4801477789878845, | |
| "learning_rate": 4.364003256047758e-06, | |
| "loss": 1.279288649559021, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.7875457875457874, | |
| "grad_norm": 0.1608477234840393, | |
| "learning_rate": 4.346783872682662e-06, | |
| "loss": 1.2263715267181396, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.791208791208791, | |
| "grad_norm": 0.13645397126674652, | |
| "learning_rate": 4.329582543128131e-06, | |
| "loss": 0.9317041635513306, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 0.258089154958725, | |
| "learning_rate": 4.312399536674141e-06, | |
| "loss": 0.9728096723556519, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.7985347985347986, | |
| "grad_norm": 2.324678421020508, | |
| "learning_rate": 4.295235122323822e-06, | |
| "loss": 1.1650446653366089, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.8021978021978022, | |
| "grad_norm": 0.2941892147064209, | |
| "learning_rate": 4.278089568789231e-06, | |
| "loss": 1.1338319778442383, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.8058608058608059, | |
| "grad_norm": 0.08391306549310684, | |
| "learning_rate": 4.260963144487168e-06, | |
| "loss": 0.6776608824729919, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.8095238095238095, | |
| "grad_norm": 0.4403177797794342, | |
| "learning_rate": 4.2438561175349505e-06, | |
| "loss": 0.8319576382637024, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.8131868131868132, | |
| "grad_norm": 0.34780237078666687, | |
| "learning_rate": 4.2267687557462345e-06, | |
| "loss": 0.7826079726219177, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.8168498168498168, | |
| "grad_norm": 0.11686074733734131, | |
| "learning_rate": 4.209701326626812e-06, | |
| "loss": 0.795200526714325, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.8205128205128205, | |
| "grad_norm": 0.12649790942668915, | |
| "learning_rate": 4.192654097370423e-06, | |
| "loss": 0.8667728900909424, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.8241758241758241, | |
| "grad_norm": 0.5255754590034485, | |
| "learning_rate": 4.175627334854575e-06, | |
| "loss": 1.1568585634231567, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.8278388278388278, | |
| "grad_norm": 0.10009193420410156, | |
| "learning_rate": 4.1586213056363724e-06, | |
| "loss": 0.8747377991676331, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.8315018315018317, | |
| "grad_norm": 0.17984943091869354, | |
| "learning_rate": 4.141636275948324e-06, | |
| "loss": 1.1325833797454834, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8351648351648353, | |
| "grad_norm": 0.17336614429950714, | |
| "learning_rate": 4.1246725116941964e-06, | |
| "loss": 1.166914463043213, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.838827838827839, | |
| "grad_norm": 0.20862817764282227, | |
| "learning_rate": 4.10773027844484e-06, | |
| "loss": 1.1623833179473877, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.8424908424908426, | |
| "grad_norm": 0.15748779475688934, | |
| "learning_rate": 4.090809841434029e-06, | |
| "loss": 1.164290428161621, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 0.31245389580726624, | |
| "learning_rate": 4.073911465554319e-06, | |
| "loss": 0.8208089470863342, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.84981684981685, | |
| "grad_norm": 0.18492500483989716, | |
| "learning_rate": 4.057035415352892e-06, | |
| "loss": 1.1237512826919556, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.8534798534798536, | |
| "grad_norm": 0.14317144453525543, | |
| "learning_rate": 4.0401819550274165e-06, | |
| "loss": 0.7784026861190796, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.8571428571428572, | |
| "grad_norm": 0.1404157131910324, | |
| "learning_rate": 4.023351348421915e-06, | |
| "loss": 1.176824688911438, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.8608058608058609, | |
| "grad_norm": 0.14713755249977112, | |
| "learning_rate": 4.006543859022628e-06, | |
| "loss": 1.1646744012832642, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.8644688644688645, | |
| "grad_norm": 0.19819317758083344, | |
| "learning_rate": 3.989759749953893e-06, | |
| "loss": 1.256286382675171, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.8681318681318682, | |
| "grad_norm": 0.034005679190158844, | |
| "learning_rate": 3.972999283974026e-06, | |
| "loss": 0.9847078323364258, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.8717948717948718, | |
| "grad_norm": 0.17176663875579834, | |
| "learning_rate": 3.956262723471203e-06, | |
| "loss": 1.1373211145401, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.8754578754578755, | |
| "grad_norm": 0.3484453558921814, | |
| "learning_rate": 3.9395503304593565e-06, | |
| "loss": 0.5924882292747498, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.879120879120879, | |
| "grad_norm": 0.16725610196590424, | |
| "learning_rate": 3.922862366574074e-06, | |
| "loss": 1.1780312061309814, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.8827838827838828, | |
| "grad_norm": 0.1592869609594345, | |
| "learning_rate": 3.906199093068497e-06, | |
| "loss": 0.9455581903457642, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.8864468864468864, | |
| "grad_norm": 0.4676535129547119, | |
| "learning_rate": 3.889560770809239e-06, | |
| "loss": 1.1824193000793457, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.89010989010989, | |
| "grad_norm": 0.13471902906894684, | |
| "learning_rate": 3.872947660272295e-06, | |
| "loss": 0.9769763350486755, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.8937728937728937, | |
| "grad_norm": 0.24125701189041138, | |
| "learning_rate": 3.856360021538964e-06, | |
| "loss": 0.8109256029129028, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.8974358974358974, | |
| "grad_norm": 0.27469104528427124, | |
| "learning_rate": 3.8397981142917815e-06, | |
| "loss": 1.2156492471694946, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.901098901098901, | |
| "grad_norm": 0.16270951926708221, | |
| "learning_rate": 3.823262197810454e-06, | |
| "loss": 1.183699369430542, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 0.37082114815711975, | |
| "learning_rate": 3.806752530967792e-06, | |
| "loss": 1.2584105730056763, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.9084249084249083, | |
| "grad_norm": 0.9490067362785339, | |
| "learning_rate": 3.790269372225668e-06, | |
| "loss": 0.6401211023330688, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.912087912087912, | |
| "grad_norm": 0.15817776322364807, | |
| "learning_rate": 3.773812979630964e-06, | |
| "loss": 0.9084805250167847, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.9157509157509156, | |
| "grad_norm": 0.1630954146385193, | |
| "learning_rate": 3.7573836108115303e-06, | |
| "loss": 1.1366910934448242, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.9194139194139193, | |
| "grad_norm": 0.3042643964290619, | |
| "learning_rate": 3.740981522972159e-06, | |
| "loss": 0.514860987663269, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 0.25363633036613464, | |
| "learning_rate": 3.724606972890551e-06, | |
| "loss": 0.9003884792327881, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.9267399267399268, | |
| "grad_norm": 0.22043615579605103, | |
| "learning_rate": 3.7082602169132995e-06, | |
| "loss": 0.8399287462234497, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.9304029304029304, | |
| "grad_norm": 0.1377377063035965, | |
| "learning_rate": 3.6919415109518776e-06, | |
| "loss": 1.1453593969345093, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.934065934065934, | |
| "grad_norm": 0.22288501262664795, | |
| "learning_rate": 3.6756511104786254e-06, | |
| "loss": 0.770913302898407, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.9377289377289377, | |
| "grad_norm": 0.16052567958831787, | |
| "learning_rate": 3.6593892705227586e-06, | |
| "loss": 1.003678321838379, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.9413919413919414, | |
| "grad_norm": 0.3310210704803467, | |
| "learning_rate": 3.643156245666377e-06, | |
| "loss": 1.1094727516174316, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.945054945054945, | |
| "grad_norm": 0.18297162652015686, | |
| "learning_rate": 3.626952290040463e-06, | |
| "loss": 0.8664683103561401, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.9487179487179487, | |
| "grad_norm": 0.1476193070411682, | |
| "learning_rate": 3.6107776573209263e-06, | |
| "loss": 0.8188486099243164, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.9523809523809523, | |
| "grad_norm": 0.20653630793094635, | |
| "learning_rate": 3.59463260072462e-06, | |
| "loss": 1.1757713556289673, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.9560439560439562, | |
| "grad_norm": 0.12281164526939392, | |
| "learning_rate": 3.5785173730053667e-06, | |
| "loss": 1.2063580751419067, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.9597069597069599, | |
| "grad_norm": 0.07409633696079254, | |
| "learning_rate": 3.5624322264500246e-06, | |
| "loss": 0.7450681328773499, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.9633699633699635, | |
| "grad_norm": 0.14261014759540558, | |
| "learning_rate": 3.5463774128745232e-06, | |
| "loss": 0.881243884563446, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.9670329670329672, | |
| "grad_norm": 0.12503007054328918, | |
| "learning_rate": 3.530353183619918e-06, | |
| "loss": 1.161426067352295, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.9706959706959708, | |
| "grad_norm": 0.11768339574337006, | |
| "learning_rate": 3.514359789548466e-06, | |
| "loss": 1.1456844806671143, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.9743589743589745, | |
| "grad_norm": 0.23258298635482788, | |
| "learning_rate": 3.4983974810396927e-06, | |
| "loss": 1.0247056484222412, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.978021978021978, | |
| "grad_norm": 0.28044548630714417, | |
| "learning_rate": 3.4824665079864735e-06, | |
| "loss": 1.1190541982650757, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.9816849816849818, | |
| "grad_norm": 0.12241950631141663, | |
| "learning_rate": 3.466567119791123e-06, | |
| "loss": 1.126396656036377, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.9853479853479854, | |
| "grad_norm": 0.05192271247506142, | |
| "learning_rate": 3.4506995653614873e-06, | |
| "loss": 0.7499899864196777, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.989010989010989, | |
| "grad_norm": 0.5542802214622498, | |
| "learning_rate": 3.4348640931070463e-06, | |
| "loss": 0.981029748916626, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.9926739926739927, | |
| "grad_norm": 0.24883772432804108, | |
| "learning_rate": 3.4190609509350338e-06, | |
| "loss": 1.0121923685073853, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.9963369963369964, | |
| "grad_norm": 0.1047605574131012, | |
| "learning_rate": 3.403290386246544e-06, | |
| "loss": 0.9460771679878235, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.14438582956790924, | |
| "learning_rate": 3.3875526459326714e-06, | |
| "loss": 1.0866570472717285, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.0036630036630036, | |
| "grad_norm": 0.13476252555847168, | |
| "learning_rate": 3.3718479763706324e-06, | |
| "loss": 1.140030860900879, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.0073260073260073, | |
| "grad_norm": 0.39099064469337463, | |
| "learning_rate": 3.356176623419915e-06, | |
| "loss": 1.1750749349594116, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.010989010989011, | |
| "grad_norm": 0.15205055475234985, | |
| "learning_rate": 3.340538832418436e-06, | |
| "loss": 1.1374648809432983, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.0146520146520146, | |
| "grad_norm": 0.4141819179058075, | |
| "learning_rate": 3.3249348481786904e-06, | |
| "loss": 1.2270292043685913, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.0183150183150182, | |
| "grad_norm": 0.2034660130739212, | |
| "learning_rate": 3.3093649149839148e-06, | |
| "loss": 0.8838691711425781, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.021978021978022, | |
| "grad_norm": 0.12057554721832275, | |
| "learning_rate": 3.2938292765842817e-06, | |
| "loss": 1.1789038181304932, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.0256410256410255, | |
| "grad_norm": 0.2732870578765869, | |
| "learning_rate": 3.2783281761930673e-06, | |
| "loss": 0.8000632524490356, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.029304029304029, | |
| "grad_norm": 0.10481557995080948, | |
| "learning_rate": 3.262861856482849e-06, | |
| "loss": 1.2116031646728516, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.032967032967033, | |
| "grad_norm": 0.19622857868671417, | |
| "learning_rate": 3.247430559581706e-06, | |
| "loss": 0.9533130526542664, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.0366300366300365, | |
| "grad_norm": 0.15817232429981232, | |
| "learning_rate": 3.2320345270694263e-06, | |
| "loss": 0.6461672186851501, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.04029304029304, | |
| "grad_norm": 0.30624663829803467, | |
| "learning_rate": 3.216673999973734e-06, | |
| "loss": 0.893692672252655, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.043956043956044, | |
| "grad_norm": 0.20368322730064392, | |
| "learning_rate": 3.201349218766506e-06, | |
| "loss": 1.2045972347259521, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.0476190476190474, | |
| "grad_norm": 0.3576587736606598, | |
| "learning_rate": 3.186060423360009e-06, | |
| "loss": 1.1595624685287476, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 0.15144126117229462, | |
| "learning_rate": 3.170807853103146e-06, | |
| "loss": 0.8582723736763, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.0549450549450547, | |
| "grad_norm": 0.3102099895477295, | |
| "learning_rate": 3.155591746777713e-06, | |
| "loss": 1.3617991209030151, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.0586080586080584, | |
| "grad_norm": 0.13681809604167938, | |
| "learning_rate": 3.140412342594648e-06, | |
| "loss": 1.1718530654907227, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.062271062271062, | |
| "grad_norm": 0.14834214746952057, | |
| "learning_rate": 3.12526987819032e-06, | |
| "loss": 0.8169500827789307, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.065934065934066, | |
| "grad_norm": 0.3267018795013428, | |
| "learning_rate": 3.1101645906227924e-06, | |
| "loss": 1.1410131454467773, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.06959706959707, | |
| "grad_norm": 0.2117215096950531, | |
| "learning_rate": 3.0950967163681177e-06, | |
| "loss": 1.1394081115722656, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.0732600732600734, | |
| "grad_norm": 0.18249589204788208, | |
| "learning_rate": 3.08006649131664e-06, | |
| "loss": 1.1663340330123901, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.076923076923077, | |
| "grad_norm": 0.15602019429206848, | |
| "learning_rate": 3.0650741507693004e-06, | |
| "loss": 1.1466034650802612, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.0805860805860807, | |
| "grad_norm": 0.3648541271686554, | |
| "learning_rate": 3.0501199294339435e-06, | |
| "loss": 0.8573122620582581, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.0842490842490844, | |
| "grad_norm": 0.1585971862077713, | |
| "learning_rate": 3.0352040614216555e-06, | |
| "loss": 1.1506117582321167, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.087912087912088, | |
| "grad_norm": 0.17453494668006897, | |
| "learning_rate": 3.0203267802430915e-06, | |
| "loss": 1.0754824876785278, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.0915750915750917, | |
| "grad_norm": 0.1620694249868393, | |
| "learning_rate": 3.0054883188048266e-06, | |
| "loss": 1.1398316621780396, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.0952380952380953, | |
| "grad_norm": 0.06643152236938477, | |
| "learning_rate": 2.9906889094057062e-06, | |
| "loss": 0.4219062924385071, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.098901098901099, | |
| "grad_norm": 0.7500718235969543, | |
| "learning_rate": 2.9759287837332007e-06, | |
| "loss": 0.9941345453262329, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.1025641025641026, | |
| "grad_norm": 0.17045439779758453, | |
| "learning_rate": 2.961208172859794e-06, | |
| "loss": 0.84036785364151, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.1062271062271063, | |
| "grad_norm": 0.2622012197971344, | |
| "learning_rate": 2.946527307239359e-06, | |
| "loss": 0.8539763689041138, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.10989010989011, | |
| "grad_norm": 0.42088064551353455, | |
| "learning_rate": 2.9318864167035452e-06, | |
| "loss": 0.985520601272583, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.1135531135531136, | |
| "grad_norm": 0.3410366475582123, | |
| "learning_rate": 2.9172857304581857e-06, | |
| "loss": 0.900378942489624, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.1172161172161172, | |
| "grad_norm": 0.3229033052921295, | |
| "learning_rate": 2.902725477079711e-06, | |
| "loss": 1.1304961442947388, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.120879120879121, | |
| "grad_norm": 0.4168906807899475, | |
| "learning_rate": 2.8882058845115633e-06, | |
| "loss": 1.0916647911071777, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.1245421245421245, | |
| "grad_norm": 0.34625813364982605, | |
| "learning_rate": 2.873727180060637e-06, | |
| "loss": 0.909528374671936, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.128205128205128, | |
| "grad_norm": 0.2843841016292572, | |
| "learning_rate": 2.8592895903937124e-06, | |
| "loss": 0.8306626677513123, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.131868131868132, | |
| "grad_norm": 0.23222553730010986, | |
| "learning_rate": 2.8448933415339085e-06, | |
| "loss": 0.9491928815841675, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.1355311355311355, | |
| "grad_norm": 0.1373523473739624, | |
| "learning_rate": 2.8305386588571517e-06, | |
| "loss": 0.45827817916870117, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.139194139194139, | |
| "grad_norm": 0.1498524397611618, | |
| "learning_rate": 2.816225767088638e-06, | |
| "loss": 0.4394649267196655, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 0.203684002161026, | |
| "learning_rate": 2.801954890299322e-06, | |
| "loss": 0.9728699922561646, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.1465201465201464, | |
| "grad_norm": 0.4509437382221222, | |
| "learning_rate": 2.7877262519024027e-06, | |
| "loss": 1.19068443775177, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.15018315018315, | |
| "grad_norm": 0.37610286474227905, | |
| "learning_rate": 2.7735400746498302e-06, | |
| "loss": 1.306997299194336, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 1.0810068845748901, | |
| "learning_rate": 2.7593965806288204e-06, | |
| "loss": 0.8269945979118347, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.1575091575091574, | |
| "grad_norm": 0.11437740176916122, | |
| "learning_rate": 2.7452959912583744e-06, | |
| "loss": 1.174338698387146, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.161172161172161, | |
| "grad_norm": 0.1378648579120636, | |
| "learning_rate": 2.7312385272858087e-06, | |
| "loss": 1.1485635042190552, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.1648351648351647, | |
| "grad_norm": 0.1592278927564621, | |
| "learning_rate": 2.7172244087833077e-06, | |
| "loss": 1.208397388458252, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.1684981684981683, | |
| "grad_norm": 0.1117565929889679, | |
| "learning_rate": 2.7032538551444776e-06, | |
| "loss": 1.175192952156067, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.172161172161172, | |
| "grad_norm": 0.12953658401966095, | |
| "learning_rate": 2.6893270850809024e-06, | |
| "loss": 0.3722214698791504, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.1758241758241756, | |
| "grad_norm": 0.27550461888313293, | |
| "learning_rate": 2.6754443166187267e-06, | |
| "loss": 1.2698341608047485, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.1794871794871793, | |
| "grad_norm": 0.3155595660209656, | |
| "learning_rate": 2.661605767095248e-06, | |
| "loss": 0.8203377723693848, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.183150183150183, | |
| "grad_norm": 0.17399396002292633, | |
| "learning_rate": 2.6478116531554997e-06, | |
| "loss": 1.01655912399292, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.186813186813187, | |
| "grad_norm": 0.1317910999059677, | |
| "learning_rate": 2.6340621907488777e-06, | |
| "loss": 0.8621305823326111, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 0.2792171835899353, | |
| "learning_rate": 2.620357595125742e-06, | |
| "loss": 0.9206136465072632, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.1941391941391943, | |
| "grad_norm": 0.15142017602920532, | |
| "learning_rate": 2.6066980808340553e-06, | |
| "loss": 1.1463533639907837, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.197802197802198, | |
| "grad_norm": 0.2487816959619522, | |
| "learning_rate": 2.5930838617160304e-06, | |
| "loss": 0.8177496790885925, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.2014652014652016, | |
| "grad_norm": 0.176479309797287, | |
| "learning_rate": 2.579515150904767e-06, | |
| "loss": 1.2105001211166382, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 0.2774566113948822, | |
| "learning_rate": 2.5659921608209325e-06, | |
| "loss": 1.165309190750122, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.208791208791209, | |
| "grad_norm": 0.18029530346393585, | |
| "learning_rate": 2.5525151031694214e-06, | |
| "loss": 0.5955395102500916, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.2124542124542126, | |
| "grad_norm": 0.44882774353027344, | |
| "learning_rate": 2.5390841889360483e-06, | |
| "loss": 0.7616056203842163, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.2161172161172162, | |
| "grad_norm": 1.0745935440063477, | |
| "learning_rate": 2.525699628384249e-06, | |
| "loss": 0.6935135722160339, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.21978021978022, | |
| "grad_norm": 0.17599527537822723, | |
| "learning_rate": 2.5123616310517797e-06, | |
| "loss": 1.1335649490356445, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.2234432234432235, | |
| "grad_norm": 0.38573309779167175, | |
| "learning_rate": 2.4990704057474405e-06, | |
| "loss": 0.8549797534942627, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.227106227106227, | |
| "grad_norm": 0.5317236185073853, | |
| "learning_rate": 2.485826160547807e-06, | |
| "loss": 0.9798864126205444, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.230769230769231, | |
| "grad_norm": 0.12072915583848953, | |
| "learning_rate": 2.4726291027939775e-06, | |
| "loss": 1.137038230895996, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.2344322344322345, | |
| "grad_norm": 0.13340038061141968, | |
| "learning_rate": 2.459479439088314e-06, | |
| "loss": 1.1505991220474243, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.238095238095238, | |
| "grad_norm": 0.19366510212421417, | |
| "learning_rate": 2.4463773752912232e-06, | |
| "loss": 1.1624219417572021, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.241758241758242, | |
| "grad_norm": 0.2833138108253479, | |
| "learning_rate": 2.4333231165179226e-06, | |
| "loss": 0.5617607831954956, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.2454212454212454, | |
| "grad_norm": 0.14608268439769745, | |
| "learning_rate": 2.420316867135232e-06, | |
| "loss": 1.1109657287597656, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.249084249084249, | |
| "grad_norm": 2.9962241649627686, | |
| "learning_rate": 2.407358830758381e-06, | |
| "loss": 0.6706120371818542, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.2527472527472527, | |
| "grad_norm": 0.044207386672496796, | |
| "learning_rate": 2.394449210247811e-06, | |
| "loss": 0.6224187016487122, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 0.20471802353858948, | |
| "learning_rate": 2.381588207706003e-06, | |
| "loss": 0.6815849542617798, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.26007326007326, | |
| "grad_norm": 0.3602707087993622, | |
| "learning_rate": 2.3687760244743198e-06, | |
| "loss": 1.157220482826233, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.2637362637362637, | |
| "grad_norm": 0.8389260172843933, | |
| "learning_rate": 2.356012861129845e-06, | |
| "loss": 0.7905306220054626, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.2673992673992673, | |
| "grad_norm": 0.12152452766895294, | |
| "learning_rate": 2.3432989174822496e-06, | |
| "loss": 0.998111367225647, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.271062271062271, | |
| "grad_norm": 0.15299645066261292, | |
| "learning_rate": 2.330634392570658e-06, | |
| "loss": 0.9482631683349609, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.2747252747252746, | |
| "grad_norm": 0.22156605124473572, | |
| "learning_rate": 2.3180194846605367e-06, | |
| "loss": 0.9491860866546631, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.2783882783882783, | |
| "grad_norm": 0.1533634215593338, | |
| "learning_rate": 2.3054543912405896e-06, | |
| "loss": 1.1562466621398926, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.282051282051282, | |
| "grad_norm": 0.12872643768787384, | |
| "learning_rate": 2.2929393090196663e-06, | |
| "loss": 0.7593182921409607, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.25250881910324097, | |
| "learning_rate": 2.2804744339236796e-06, | |
| "loss": 0.7431901097297668, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.2893772893772892, | |
| "grad_norm": 0.1763988882303238, | |
| "learning_rate": 2.268059961092541e-06, | |
| "loss": 1.127759575843811, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.293040293040293, | |
| "grad_norm": 0.2666459381580353, | |
| "learning_rate": 2.255696084877107e-06, | |
| "loss": 0.8839851021766663, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.2967032967032965, | |
| "grad_norm": 0.18553560972213745, | |
| "learning_rate": 2.2433829988361316e-06, | |
| "loss": 1.2005871534347534, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.3003663003663, | |
| "grad_norm": 0.20974372327327728, | |
| "learning_rate": 2.231120895733245e-06, | |
| "loss": 1.2160831689834595, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.304029304029304, | |
| "grad_norm": 0.27016669511795044, | |
| "learning_rate": 2.2189099675339233e-06, | |
| "loss": 0.8103601336479187, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 0.2763507664203644, | |
| "learning_rate": 2.206750405402493e-06, | |
| "loss": 1.232648491859436, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.311355311355311, | |
| "grad_norm": 0.20276162028312683, | |
| "learning_rate": 2.194642399699138e-06, | |
| "loss": 1.0822112560272217, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.315018315018315, | |
| "grad_norm": 0.1820443570613861, | |
| "learning_rate": 2.1825861399769126e-06, | |
| "loss": 0.9380193948745728, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.3186813186813184, | |
| "grad_norm": 0.20645156502723694, | |
| "learning_rate": 2.17058181497878e-06, | |
| "loss": 0.8565780520439148, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.3223443223443225, | |
| "grad_norm": 0.30140256881713867, | |
| "learning_rate": 2.1586296126346566e-06, | |
| "loss": 0.8535648584365845, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.326007326007326, | |
| "grad_norm": 0.27577510476112366, | |
| "learning_rate": 2.1467297200584677e-06, | |
| "loss": 1.2173646688461304, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.32967032967033, | |
| "grad_norm": 0.1859835982322693, | |
| "learning_rate": 2.134882323545221e-06, | |
| "loss": 1.0475445985794067, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.5028762817382812, | |
| "learning_rate": 2.123087608568088e-06, | |
| "loss": 0.7030253410339355, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.336996336996337, | |
| "grad_norm": 0.17414085566997528, | |
| "learning_rate": 2.1113457597754977e-06, | |
| "loss": 1.058994174003601, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.340659340659341, | |
| "grad_norm": 0.195421501994133, | |
| "learning_rate": 2.0996569609882555e-06, | |
| "loss": 0.8695497512817383, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.3443223443223444, | |
| "grad_norm": 0.1678563356399536, | |
| "learning_rate": 2.0880213951966564e-06, | |
| "loss": 0.7928240299224854, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.347985347985348, | |
| "grad_norm": 0.15970492362976074, | |
| "learning_rate": 2.076439244557622e-06, | |
| "loss": 0.6427817344665527, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.3516483516483517, | |
| "grad_norm": 0.15121600031852722, | |
| "learning_rate": 2.064910690391849e-06, | |
| "loss": 1.1278434991836548, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.3553113553113554, | |
| "grad_norm": 2.746044397354126, | |
| "learning_rate": 2.053435913180976e-06, | |
| "loss": 0.6882444024085999, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 0.14493419229984283, | |
| "learning_rate": 2.0420150925647476e-06, | |
| "loss": 0.9737670421600342, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.3626373626373627, | |
| "grad_norm": 0.1830594837665558, | |
| "learning_rate": 2.0306484073382144e-06, | |
| "loss": 0.9390268325805664, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.3663003663003663, | |
| "grad_norm": 0.17552392184734344, | |
| "learning_rate": 2.019336035448922e-06, | |
| "loss": 0.8130999207496643, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.36996336996337, | |
| "grad_norm": 0.4816751182079315, | |
| "learning_rate": 2.008078153994131e-06, | |
| "loss": 0.9279530644416809, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.3736263736263736, | |
| "grad_norm": 0.2578529119491577, | |
| "learning_rate": 1.99687493921805e-06, | |
| "loss": 1.3056340217590332, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.3772893772893773, | |
| "grad_norm": 0.24960176646709442, | |
| "learning_rate": 1.9857265665090637e-06, | |
| "loss": 1.138514757156372, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.20973335206508636, | |
| "learning_rate": 1.9746332103969994e-06, | |
| "loss": 1.196106195449829, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.3846153846153846, | |
| "grad_norm": 0.4483489990234375, | |
| "learning_rate": 1.9635950445503867e-06, | |
| "loss": 0.952997624874115, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.3882783882783882, | |
| "grad_norm": 0.9477534890174866, | |
| "learning_rate": 1.9526122417737396e-06, | |
| "loss": 0.5085421204566956, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.391941391941392, | |
| "grad_norm": 0.17980064451694489, | |
| "learning_rate": 1.941684974004857e-06, | |
| "loss": 0.9798279404640198, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.3956043956043955, | |
| "grad_norm": 0.21208752691745758, | |
| "learning_rate": 1.930813412312129e-06, | |
| "loss": 1.1446267366409302, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.399267399267399, | |
| "grad_norm": 0.14319059252738953, | |
| "learning_rate": 1.919997726891847e-06, | |
| "loss": 0.5433471202850342, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.402930402930403, | |
| "grad_norm": 0.25561878085136414, | |
| "learning_rate": 1.909238087065559e-06, | |
| "loss": 1.1503570079803467, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.4065934065934065, | |
| "grad_norm": 0.13398070633411407, | |
| "learning_rate": 1.8985346612774058e-06, | |
| "loss": 0.8720892667770386, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 0.1498894989490509, | |
| "learning_rate": 1.8878876170914862e-06, | |
| "loss": 1.14559006690979, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.413919413919414, | |
| "grad_norm": 0.3363962769508362, | |
| "learning_rate": 1.877297121189233e-06, | |
| "loss": 0.8333287239074707, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.4175824175824174, | |
| "grad_norm": 0.15025848150253296, | |
| "learning_rate": 1.8667633393668097e-06, | |
| "loss": 0.8138965368270874, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.421245421245421, | |
| "grad_norm": 0.21664276719093323, | |
| "learning_rate": 1.856286436532506e-06, | |
| "loss": 0.689363420009613, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.4249084249084247, | |
| "grad_norm": 1.3246759176254272, | |
| "learning_rate": 1.845866576704165e-06, | |
| "loss": 0.7871432900428772, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.4285714285714284, | |
| "grad_norm": 0.38431447744369507, | |
| "learning_rate": 1.8355039230066068e-06, | |
| "loss": 0.7976049184799194, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.4322344322344325, | |
| "grad_norm": 0.07272528856992722, | |
| "learning_rate": 1.8251986376690806e-06, | |
| "loss": 0.734397292137146, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.435897435897436, | |
| "grad_norm": 0.23010677099227905, | |
| "learning_rate": 1.8149508820227258e-06, | |
| "loss": 0.8264967799186707, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.4395604395604398, | |
| "grad_norm": 0.2325713038444519, | |
| "learning_rate": 1.8047608164980393e-06, | |
| "loss": 1.1099257469177246, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.4432234432234434, | |
| "grad_norm": 0.2141243815422058, | |
| "learning_rate": 1.7946286006223728e-06, | |
| "loss": 0.7992602586746216, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.446886446886447, | |
| "grad_norm": 0.3261476755142212, | |
| "learning_rate": 1.7845543930174288e-06, | |
| "loss": 0.7330154776573181, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.4505494505494507, | |
| "grad_norm": 0.19834889471530914, | |
| "learning_rate": 1.7745383513967784e-06, | |
| "loss": 1.0567998886108398, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.4542124542124544, | |
| "grad_norm": 0.13338837027549744, | |
| "learning_rate": 1.7645806325633975e-06, | |
| "loss": 0.9307959675788879, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.457875457875458, | |
| "grad_norm": 0.0941123366355896, | |
| "learning_rate": 1.7546813924072064e-06, | |
| "loss": 0.7225639820098877, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 0.11015522480010986, | |
| "learning_rate": 1.7448407859026267e-06, | |
| "loss": 0.8351444602012634, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.4652014652014653, | |
| "grad_norm": 0.16074956953525543, | |
| "learning_rate": 1.7350589671061657e-06, | |
| "loss": 1.1353893280029297, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.468864468864469, | |
| "grad_norm": 0.21541282534599304, | |
| "learning_rate": 1.7253360891539963e-06, | |
| "loss": 1.1350133419036865, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.4725274725274726, | |
| "grad_norm": 0.18318095803260803, | |
| "learning_rate": 1.7156723042595602e-06, | |
| "loss": 0.7882329821586609, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.4761904761904763, | |
| "grad_norm": 0.20827817916870117, | |
| "learning_rate": 1.7060677637111863e-06, | |
| "loss": 0.9048058390617371, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.47985347985348, | |
| "grad_norm": 0.3399142622947693, | |
| "learning_rate": 1.6965226178697237e-06, | |
| "loss": 0.988274335861206, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.4835164835164836, | |
| "grad_norm": 0.03409822657704353, | |
| "learning_rate": 1.6870370161661852e-06, | |
| "loss": 0.9388930201530457, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.4871794871794872, | |
| "grad_norm": 0.11549941450357437, | |
| "learning_rate": 1.6776111070994129e-06, | |
| "loss": 1.1141780614852905, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.490842490842491, | |
| "grad_norm": 0.21529677510261536, | |
| "learning_rate": 1.6682450382337445e-06, | |
| "loss": 0.9177558422088623, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.4945054945054945, | |
| "grad_norm": 0.21112927794456482, | |
| "learning_rate": 1.65893895619671e-06, | |
| "loss": 0.8482896685600281, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.498168498168498, | |
| "grad_norm": 0.3684331476688385, | |
| "learning_rate": 1.6496930066767381e-06, | |
| "loss": 0.8899385333061218, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.501831501831502, | |
| "grad_norm": 0.5180490016937256, | |
| "learning_rate": 1.6405073344208652e-06, | |
| "loss": 1.1375821828842163, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.5054945054945055, | |
| "grad_norm": 0.14490839838981628, | |
| "learning_rate": 1.6313820832324833e-06, | |
| "loss": 0.8489875793457031, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.509157509157509, | |
| "grad_norm": 0.26114216446876526, | |
| "learning_rate": 1.6223173959690766e-06, | |
| "loss": 1.0175533294677734, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.5128205128205128, | |
| "grad_norm": 0.07394483685493469, | |
| "learning_rate": 1.6133134145399895e-06, | |
| "loss": 0.679277777671814, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.5164835164835164, | |
| "grad_norm": 0.22844818234443665, | |
| "learning_rate": 1.6043702799042097e-06, | |
| "loss": 0.8118609189987183, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.52014652014652, | |
| "grad_norm": 0.946811854839325, | |
| "learning_rate": 1.5954881320681541e-06, | |
| "loss": 0.9923216700553894, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.5238095238095237, | |
| "grad_norm": 0.46443161368370056, | |
| "learning_rate": 1.586667110083481e-06, | |
| "loss": 0.8106738924980164, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.5274725274725274, | |
| "grad_norm": 0.1713973730802536, | |
| "learning_rate": 1.5779073520449115e-06, | |
| "loss": 0.9600465893745422, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.531135531135531, | |
| "grad_norm": 0.023757750168442726, | |
| "learning_rate": 1.5692089950880671e-06, | |
| "loss": 0.9061873555183411, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.5347985347985347, | |
| "grad_norm": 0.13470512628555298, | |
| "learning_rate": 1.5605721753873273e-06, | |
| "loss": 0.8136062622070312, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.5384615384615383, | |
| "grad_norm": 0.6172438859939575, | |
| "learning_rate": 1.5519970281536947e-06, | |
| "loss": 1.1290100812911987, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.542124542124542, | |
| "grad_norm": 0.229129359126091, | |
| "learning_rate": 1.5434836876326723e-06, | |
| "loss": 0.7960153222084045, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.5457875457875456, | |
| "grad_norm": 0.23978465795516968, | |
| "learning_rate": 1.5350322871021738e-06, | |
| "loss": 0.8506826162338257, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.5494505494505493, | |
| "grad_norm": 0.4824867844581604, | |
| "learning_rate": 1.5266429588704294e-06, | |
| "loss": 1.025938868522644, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.553113553113553, | |
| "grad_norm": 0.18570579588413239, | |
| "learning_rate": 1.518315834273915e-06, | |
| "loss": 0.7308077216148376, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.5567765567765566, | |
| "grad_norm": 0.11341089010238647, | |
| "learning_rate": 1.510051043675297e-06, | |
| "loss": 0.37588629126548767, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.5604395604395602, | |
| "grad_norm": 0.19933566451072693, | |
| "learning_rate": 1.5018487164613931e-06, | |
| "loss": 1.1432240009307861, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 0.2133670151233673, | |
| "learning_rate": 1.4937089810411428e-06, | |
| "loss": 1.141809105873108, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.5677655677655675, | |
| "grad_norm": 0.3012371361255646, | |
| "learning_rate": 1.4856319648436034e-06, | |
| "loss": 0.9912227988243103, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.16756081581115723, | |
| "learning_rate": 1.4776177943159484e-06, | |
| "loss": 1.1359539031982422, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.575091575091575, | |
| "grad_norm": 0.9049347043037415, | |
| "learning_rate": 1.4696665949214889e-06, | |
| "loss": 0.5541988611221313, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.578754578754579, | |
| "grad_norm": 0.1514206826686859, | |
| "learning_rate": 1.4617784911377158e-06, | |
| "loss": 1.2034826278686523, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.5824175824175826, | |
| "grad_norm": 0.5452237129211426, | |
| "learning_rate": 1.4539536064543453e-06, | |
| "loss": 0.9588869214057922, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.586080586080586, | |
| "grad_norm": 0.43010222911834717, | |
| "learning_rate": 1.446192063371385e-06, | |
| "loss": 0.90684974193573, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.58974358974359, | |
| "grad_norm": 0.2458840310573578, | |
| "learning_rate": 1.4384939833972197e-06, | |
| "loss": 1.0172938108444214, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.5934065934065935, | |
| "grad_norm": 0.1467057466506958, | |
| "learning_rate": 1.4308594870467056e-06, | |
| "loss": 1.1102759838104248, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.597069597069597, | |
| "grad_norm": 0.6239453554153442, | |
| "learning_rate": 1.4232886938392893e-06, | |
| "loss": 0.8101827502250671, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.600732600732601, | |
| "grad_norm": 0.18740800023078918, | |
| "learning_rate": 1.4157817222971312e-06, | |
| "loss": 1.1065106391906738, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.6043956043956045, | |
| "grad_norm": 0.5177209377288818, | |
| "learning_rate": 1.4083386899432489e-06, | |
| "loss": 1.074950933456421, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.608058608058608, | |
| "grad_norm": 0.18076905608177185, | |
| "learning_rate": 1.4009597132996842e-06, | |
| "loss": 1.2177599668502808, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 2.6117216117216118, | |
| "grad_norm": 0.7345294952392578, | |
| "learning_rate": 1.393644907885674e-06, | |
| "loss": 1.3366779088974, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 0.14318975806236267, | |
| "learning_rate": 1.3863943882158417e-06, | |
| "loss": 1.1753196716308594, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.619047619047619, | |
| "grad_norm": 0.1478182077407837, | |
| "learning_rate": 1.379208267798406e-06, | |
| "loss": 1.1063532829284668, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.6227106227106227, | |
| "grad_norm": 0.274620920419693, | |
| "learning_rate": 1.3720866591334045e-06, | |
| "loss": 1.0099287033081055, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 2.6263736263736264, | |
| "grad_norm": 0.365405797958374, | |
| "learning_rate": 1.3650296737109292e-06, | |
| "loss": 1.0578190088272095, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 2.63003663003663, | |
| "grad_norm": 0.21154209971427917, | |
| "learning_rate": 1.3580374220093868e-06, | |
| "loss": 1.346867322921753, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 2.6336996336996337, | |
| "grad_norm": 0.19875630736351013, | |
| "learning_rate": 1.3511100134937625e-06, | |
| "loss": 1.1731492280960083, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 2.6373626373626373, | |
| "grad_norm": 0.3672538101673126, | |
| "learning_rate": 1.3442475566139093e-06, | |
| "loss": 1.13294517993927, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.641025641025641, | |
| "grad_norm": 0.3166159689426422, | |
| "learning_rate": 1.3374501588028546e-06, | |
| "loss": 1.1464821100234985, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 2.6446886446886446, | |
| "grad_norm": 0.28593555092811584, | |
| "learning_rate": 1.3307179264751082e-06, | |
| "loss": 1.1436622142791748, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 2.6483516483516483, | |
| "grad_norm": 0.3862296938896179, | |
| "learning_rate": 1.3240509650250083e-06, | |
| "loss": 0.8166991472244263, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 2.652014652014652, | |
| "grad_norm": 0.17282630503177643, | |
| "learning_rate": 1.3174493788250605e-06, | |
| "loss": 0.8451816439628601, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 2.6556776556776556, | |
| "grad_norm": 0.22388476133346558, | |
| "learning_rate": 1.3109132712243117e-06, | |
| "loss": 1.1225379705429077, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.659340659340659, | |
| "grad_norm": 0.15126027166843414, | |
| "learning_rate": 1.3044427445467276e-06, | |
| "loss": 0.7850918769836426, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 2.663003663003663, | |
| "grad_norm": 0.05748463794589043, | |
| "learning_rate": 1.2980379000895946e-06, | |
| "loss": 0.7346314191818237, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.4359929859638214, | |
| "learning_rate": 1.2916988381219303e-06, | |
| "loss": 1.1165975332260132, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 2.67032967032967, | |
| "grad_norm": 0.20032697916030884, | |
| "learning_rate": 1.2854256578829148e-06, | |
| "loss": 0.7857989072799683, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 2.6739926739926743, | |
| "grad_norm": 0.06527489423751831, | |
| "learning_rate": 1.2792184575803392e-06, | |
| "loss": 0.8251097798347473, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.677655677655678, | |
| "grad_norm": 0.28165748715400696, | |
| "learning_rate": 1.2730773343890662e-06, | |
| "loss": 0.8670933842658997, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 2.6813186813186816, | |
| "grad_norm": 0.3216964602470398, | |
| "learning_rate": 1.2670023844495071e-06, | |
| "loss": 1.1086490154266357, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 2.684981684981685, | |
| "grad_norm": 0.1745329648256302, | |
| "learning_rate": 1.2609937028661226e-06, | |
| "loss": 0.908940315246582, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 2.688644688644689, | |
| "grad_norm": 0.12318509072065353, | |
| "learning_rate": 1.2550513837059261e-06, | |
| "loss": 1.0815136432647705, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 2.6923076923076925, | |
| "grad_norm": 0.37340617179870605, | |
| "learning_rate": 1.2491755199970188e-06, | |
| "loss": 0.6923399567604065, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.695970695970696, | |
| "grad_norm": 0.20242176949977875, | |
| "learning_rate": 1.2433662037271263e-06, | |
| "loss": 0.8187569379806519, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 2.6996336996337, | |
| "grad_norm": 0.6501942873001099, | |
| "learning_rate": 1.2376235258421628e-06, | |
| "loss": 0.4654901623725891, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 2.7032967032967035, | |
| "grad_norm": 0.21495883166790009, | |
| "learning_rate": 1.2319475762448084e-06, | |
| "loss": 1.17780339717865, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 2.706959706959707, | |
| "grad_norm": 0.34030434489250183, | |
| "learning_rate": 1.2263384437930969e-06, | |
| "loss": 0.7136227488517761, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 2.7106227106227108, | |
| "grad_norm": 0.8260899782180786, | |
| "learning_rate": 1.2207962162990287e-06, | |
| "loss": 1.1193125247955322, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.7142857142857144, | |
| "grad_norm": 0.217088520526886, | |
| "learning_rate": 1.2153209805271943e-06, | |
| "loss": 1.132580280303955, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 2.717948717948718, | |
| "grad_norm": 0.6372915506362915, | |
| "learning_rate": 1.2099128221934164e-06, | |
| "loss": 1.0393377542495728, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 2.7216117216117217, | |
| "grad_norm": 0.14800269901752472, | |
| "learning_rate": 1.2045718259634083e-06, | |
| "loss": 1.1727163791656494, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 2.7252747252747254, | |
| "grad_norm": 0.1804278939962387, | |
| "learning_rate": 1.1992980754514497e-06, | |
| "loss": 1.1531107425689697, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 2.728937728937729, | |
| "grad_norm": 0.4734005331993103, | |
| "learning_rate": 1.1940916532190739e-06, | |
| "loss": 0.5333794951438904, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.7326007326007327, | |
| "grad_norm": 0.24780096113681793, | |
| "learning_rate": 1.1889526407737776e-06, | |
| "loss": 1.1573615074157715, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 2.7362637362637363, | |
| "grad_norm": 0.2443196028470993, | |
| "learning_rate": 1.1838811185677466e-06, | |
| "loss": 0.6827471256256104, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 2.73992673992674, | |
| "grad_norm": 0.5809857249259949, | |
| "learning_rate": 1.1788771659965935e-06, | |
| "loss": 1.2393468618392944, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 2.7435897435897436, | |
| "grad_norm": 0.4661528170108795, | |
| "learning_rate": 1.173940861398117e-06, | |
| "loss": 1.1121079921722412, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 2.7472527472527473, | |
| "grad_norm": 0.27154994010925293, | |
| "learning_rate": 1.1690722820510723e-06, | |
| "loss": 0.7914168834686279, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.750915750915751, | |
| "grad_norm": 0.18499144911766052, | |
| "learning_rate": 1.164271504173964e-06, | |
| "loss": 1.0800108909606934, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 2.7545787545787546, | |
| "grad_norm": 0.37535104155540466, | |
| "learning_rate": 1.159538602923855e-06, | |
| "loss": 1.1396592855453491, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 2.758241758241758, | |
| "grad_norm": 0.31983864307403564, | |
| "learning_rate": 1.1548736523951822e-06, | |
| "loss": 1.1717373132705688, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 0.04418055713176727, | |
| "learning_rate": 1.1502767256186053e-06, | |
| "loss": 0.9536030292510986, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 2.7655677655677655, | |
| "grad_norm": 0.13261856138706207, | |
| "learning_rate": 1.1457478945598591e-06, | |
| "loss": 1.0200964212417603, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.29484888911247253, | |
| "learning_rate": 1.1412872301186253e-06, | |
| "loss": 0.9747733473777771, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 2.772893772893773, | |
| "grad_norm": 0.1159660741686821, | |
| "learning_rate": 1.1368948021274269e-06, | |
| "loss": 1.116559624671936, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 2.7765567765567765, | |
| "grad_norm": 0.3250535726547241, | |
| "learning_rate": 1.1325706793505317e-06, | |
| "loss": 1.064975380897522, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 2.78021978021978, | |
| "grad_norm": 0.18949034810066223, | |
| "learning_rate": 1.1283149294828773e-06, | |
| "loss": 1.0048205852508545, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 2.7838827838827838, | |
| "grad_norm": 0.39678439497947693, | |
| "learning_rate": 1.1241276191490097e-06, | |
| "loss": 0.5427751541137695, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.7875457875457874, | |
| "grad_norm": 1.3462748527526855, | |
| "learning_rate": 1.120008813902044e-06, | |
| "loss": 0.7995284199714661, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 2.791208791208791, | |
| "grad_norm": 0.15732638537883759, | |
| "learning_rate": 1.1159585782226325e-06, | |
| "loss": 0.8446041345596313, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 2.7948717948717947, | |
| "grad_norm": 0.2313084453344345, | |
| "learning_rate": 1.1119769755179595e-06, | |
| "loss": 1.1773189306259155, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 2.7985347985347984, | |
| "grad_norm": 0.14309756457805634, | |
| "learning_rate": 1.1080640681207485e-06, | |
| "loss": 1.1459267139434814, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 2.802197802197802, | |
| "grad_norm": 0.1798963099718094, | |
| "learning_rate": 1.104219917288284e-06, | |
| "loss": 1.1224641799926758, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.8058608058608057, | |
| "grad_norm": 0.24613995850086212, | |
| "learning_rate": 1.100444583201454e-06, | |
| "loss": 1.016000509262085, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 2.8095238095238093, | |
| "grad_norm": 0.178895965218544, | |
| "learning_rate": 1.0967381249638085e-06, | |
| "loss": 0.7900265455245972, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 2.813186813186813, | |
| "grad_norm": 0.2273297756910324, | |
| "learning_rate": 1.0931006006006324e-06, | |
| "loss": 1.347412109375, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 2.8168498168498166, | |
| "grad_norm": 0.21277707815170288, | |
| "learning_rate": 1.089532067058039e-06, | |
| "loss": 0.9508707523345947, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 0.19118960201740265, | |
| "learning_rate": 1.0860325802020772e-06, | |
| "loss": 0.8098848462104797, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.824175824175824, | |
| "grad_norm": 0.14161139726638794, | |
| "learning_rate": 1.0826021948178566e-06, | |
| "loss": 0.9036679863929749, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 2.8278388278388276, | |
| "grad_norm": 0.1456916779279709, | |
| "learning_rate": 1.0792409646086922e-06, | |
| "loss": 1.1096038818359375, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 2.8315018315018317, | |
| "grad_norm": 0.5517901182174683, | |
| "learning_rate": 1.0759489421952602e-06, | |
| "loss": 1.1584891080856323, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 2.8351648351648353, | |
| "grad_norm": 0.11882911622524261, | |
| "learning_rate": 1.0727261791147784e-06, | |
| "loss": 1.2254421710968018, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 2.838827838827839, | |
| "grad_norm": 0.23024114966392517, | |
| "learning_rate": 1.0695727258201938e-06, | |
| "loss": 0.8998859524726868, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.8424908424908426, | |
| "grad_norm": 0.07301481068134308, | |
| "learning_rate": 1.0664886316793988e-06, | |
| "loss": 0.6015828847885132, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 2.8461538461538463, | |
| "grad_norm": 0.12812356650829315, | |
| "learning_rate": 1.0634739449744534e-06, | |
| "loss": 1.156007170677185, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 2.84981684981685, | |
| "grad_norm": 0.1785007119178772, | |
| "learning_rate": 1.0605287129008337e-06, | |
| "loss": 1.0002185106277466, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 2.8534798534798536, | |
| "grad_norm": 0.5185611844062805, | |
| "learning_rate": 1.0576529815666892e-06, | |
| "loss": 1.142732858657837, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.2965824007987976, | |
| "learning_rate": 1.0548467959921217e-06, | |
| "loss": 0.6404973864555359, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.860805860805861, | |
| "grad_norm": 0.1833876669406891, | |
| "learning_rate": 1.0521102001084835e-06, | |
| "loss": 0.7605476975440979, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 2.8644688644688645, | |
| "grad_norm": 0.5239128470420837, | |
| "learning_rate": 1.0494432367576862e-06, | |
| "loss": 0.9357516169548035, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 2.868131868131868, | |
| "grad_norm": 0.3669067621231079, | |
| "learning_rate": 1.0468459476915317e-06, | |
| "loss": 0.7723519206047058, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 2.871794871794872, | |
| "grad_norm": 0.40440791845321655, | |
| "learning_rate": 1.044318373571057e-06, | |
| "loss": 0.735063374042511, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 2.8754578754578755, | |
| "grad_norm": 0.09417320787906647, | |
| "learning_rate": 1.0418605539659014e-06, | |
| "loss": 0.7597877979278564, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.879120879120879, | |
| "grad_norm": 0.40769991278648376, | |
| "learning_rate": 1.0394725273536817e-06, | |
| "loss": 0.8062982559204102, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 2.8827838827838828, | |
| "grad_norm": 0.2923339307308197, | |
| "learning_rate": 1.0371543311193944e-06, | |
| "loss": 1.0577229261398315, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 2.8864468864468864, | |
| "grad_norm": 0.3852575719356537, | |
| "learning_rate": 1.034906001554827e-06, | |
| "loss": 1.0765886306762695, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 2.89010989010989, | |
| "grad_norm": 0.12469828873872757, | |
| "learning_rate": 1.0327275738579934e-06, | |
| "loss": 0.9185457229614258, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 2.8937728937728937, | |
| "grad_norm": 0.3026789724826813, | |
| "learning_rate": 1.0306190821325792e-06, | |
| "loss": 1.2011407613754272, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.8974358974358974, | |
| "grad_norm": 0.2271515429019928, | |
| "learning_rate": 1.0285805593874105e-06, | |
| "loss": 0.8856844305992126, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 2.901098901098901, | |
| "grad_norm": 0.11647852510213852, | |
| "learning_rate": 1.026612037535935e-06, | |
| "loss": 0.8170561790466309, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 2.9047619047619047, | |
| "grad_norm": 0.12595300376415253, | |
| "learning_rate": 1.0247135473957253e-06, | |
| "loss": 0.8300210237503052, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 2.9084249084249083, | |
| "grad_norm": 0.21114195883274078, | |
| "learning_rate": 1.0228851186879932e-06, | |
| "loss": 1.1618390083312988, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 2.912087912087912, | |
| "grad_norm": 0.2155926376581192, | |
| "learning_rate": 1.0211267800371263e-06, | |
| "loss": 1.2564477920532227, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.9157509157509156, | |
| "grad_norm": 0.40559911727905273, | |
| "learning_rate": 1.01943855897024e-06, | |
| "loss": 0.628135085105896, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 2.9194139194139193, | |
| "grad_norm": 0.22793929278850555, | |
| "learning_rate": 1.0178204819167451e-06, | |
| "loss": 1.1933345794677734, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 2.9230769230769234, | |
| "grad_norm": 0.30155590176582336, | |
| "learning_rate": 1.0162725742079355e-06, | |
| "loss": 0.804075300693512, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 2.926739926739927, | |
| "grad_norm": 0.4184918999671936, | |
| "learning_rate": 1.0147948600765919e-06, | |
| "loss": 1.19660484790802, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 2.9304029304029307, | |
| "grad_norm": 0.1114964559674263, | |
| "learning_rate": 1.0133873626565994e-06, | |
| "loss": 0.8411705493927002, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.9340659340659343, | |
| "grad_norm": 0.19453909993171692, | |
| "learning_rate": 1.0120501039825902e-06, | |
| "loss": 1.1576671600341797, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 2.937728937728938, | |
| "grad_norm": 0.06379074603319168, | |
| "learning_rate": 1.0107831049895937e-06, | |
| "loss": 0.9160769581794739, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 2.9413919413919416, | |
| "grad_norm": 0.19216328859329224, | |
| "learning_rate": 1.009586385512713e-06, | |
| "loss": 1.1501901149749756, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 2.9450549450549453, | |
| "grad_norm": 1.06039297580719, | |
| "learning_rate": 1.0084599642868117e-06, | |
| "loss": 0.7293557524681091, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 2.948717948717949, | |
| "grad_norm": 0.30301105976104736, | |
| "learning_rate": 1.0074038589462206e-06, | |
| "loss": 1.062568187713623, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.9523809523809526, | |
| "grad_norm": 0.22648635506629944, | |
| "learning_rate": 1.0064180860244631e-06, | |
| "loss": 0.9736372828483582, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 2.956043956043956, | |
| "grad_norm": 0.6904452443122864, | |
| "learning_rate": 1.0055026609539963e-06, | |
| "loss": 0.7423111796379089, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 2.95970695970696, | |
| "grad_norm": 0.1181085854768753, | |
| "learning_rate": 1.004657598065967e-06, | |
| "loss": 0.8746036291122437, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 2.9633699633699635, | |
| "grad_norm": 0.25594648718833923, | |
| "learning_rate": 1.0038829105899911e-06, | |
| "loss": 1.4269702434539795, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 2.967032967032967, | |
| "grad_norm": 0.6465041041374207, | |
| "learning_rate": 1.0031786106539428e-06, | |
| "loss": 1.2628575563430786, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.970695970695971, | |
| "grad_norm": 0.17348702251911163, | |
| "learning_rate": 1.0025447092837677e-06, | |
| "loss": 0.964820921421051, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 2.9743589743589745, | |
| "grad_norm": 3.296407699584961, | |
| "learning_rate": 1.0019812164033077e-06, | |
| "loss": 0.7985995411872864, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 2.978021978021978, | |
| "grad_norm": 0.11664870381355286, | |
| "learning_rate": 1.0014881408341481e-06, | |
| "loss": 0.9173464775085449, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 2.9816849816849818, | |
| "grad_norm": 0.10260229557752609, | |
| "learning_rate": 1.0010654902954773e-06, | |
| "loss": 0.9848383069038391, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 2.9853479853479854, | |
| "grad_norm": 0.200631782412529, | |
| "learning_rate": 1.0007132714039676e-06, | |
| "loss": 1.4417872428894043, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.989010989010989, | |
| "grad_norm": 0.32539039850234985, | |
| "learning_rate": 1.0004314896736694e-06, | |
| "loss": 1.0627717971801758, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 2.9926739926739927, | |
| "grad_norm": 0.17502747476100922, | |
| "learning_rate": 1.0002201495159287e-06, | |
| "loss": 0.8705639839172363, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 2.9963369963369964, | |
| "grad_norm": 0.19851884245872498, | |
| "learning_rate": 1.0000792542393144e-06, | |
| "loss": 1.3285937309265137, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.10878675431013107, | |
| "learning_rate": 1.0000088060495672e-06, | |
| "loss": 1.0932306051254272, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1638, | |
| "total_flos": 8.4482141520606e+18, | |
| "train_loss": 1.0599846049178943, | |
| "train_runtime": 55254.3839, | |
| "train_samples_per_second": 0.711, | |
| "train_steps_per_second": 0.03 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1638, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.4482141520606e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |