Instructions to use furproxy/27b-7-lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use furproxy/27b-7-lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/workspace/models/Qwen3.6-27B") model = PeftModel.from_pretrained(base_model, "furproxy/27b-7-lora") - Transformers
How to use furproxy/27b-7-lora with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="furproxy/27b-7-lora") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("furproxy/27b-7-lora", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/27b-7-lora with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/27b-7-lora" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/27b-7-lora", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/furproxy/27b-7-lora
- SGLang
How to use furproxy/27b-7-lora with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/27b-7-lora" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/27b-7-lora", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/27b-7-lora" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/27b-7-lora", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use furproxy/27b-7-lora with Docker Model Runner:
docker model run hf.co/furproxy/27b-7-lora
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1638, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003663003663003663, | |
| "grad_norm": 2.7161898612976074, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 2.6832668781280518, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.007326007326007326, | |
| "grad_norm": 1.0459221601486206, | |
| "learning_rate": 3e-06, | |
| "loss": 1.6646876335144043, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01098901098901099, | |
| "grad_norm": 0.2796992063522339, | |
| "learning_rate": 5e-06, | |
| "loss": 1.8732850551605225, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.014652014652014652, | |
| "grad_norm": 0.3104994297027588, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 1.9880081415176392, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.018315018315018316, | |
| "grad_norm": 0.17194640636444092, | |
| "learning_rate": 9e-06, | |
| "loss": 2.0404136180877686, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 0.6366355419158936, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 1.7833327054977417, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02564102564102564, | |
| "grad_norm": 0.19716234505176544, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 1.6680744886398315, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.029304029304029304, | |
| "grad_norm": 0.3166082799434662, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.5191091299057007, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03296703296703297, | |
| "grad_norm": 0.6405833959579468, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 1.437489628791809, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 0.10038357973098755, | |
| "learning_rate": 1.9e-05, | |
| "loss": 1.6290624141693115, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.040293040293040296, | |
| "grad_norm": 0.311852365732193, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.8650764226913452, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 0.24845249950885773, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 0.9759135842323303, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.047619047619047616, | |
| "grad_norm": 0.32957103848457336, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.30423903465271, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 0.2035657912492752, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 1.0214941501617432, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.054945054945054944, | |
| "grad_norm": 0.4259459972381592, | |
| "learning_rate": 2.9e-05, | |
| "loss": 1.1116687059402466, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05860805860805861, | |
| "grad_norm": 0.295806884765625, | |
| "learning_rate": 3.1e-05, | |
| "loss": 1.0386732816696167, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06227106227106227, | |
| "grad_norm": 0.05838385224342346, | |
| "learning_rate": 3.3e-05, | |
| "loss": 1.1950305700302124, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.06593406593406594, | |
| "grad_norm": 0.0978633388876915, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.5224722623825073, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0695970695970696, | |
| "grad_norm": 0.3066957890987396, | |
| "learning_rate": 3.7e-05, | |
| "loss": 1.309075951576233, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 0.160082146525383, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 1.3190542459487915, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 0.34093159437179565, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.985637903213501, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.08058608058608059, | |
| "grad_norm": 0.3740093410015106, | |
| "learning_rate": 4.3e-05, | |
| "loss": 1.4261013269424438, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.08424908424908426, | |
| "grad_norm": 0.2005496323108673, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.5266393423080444, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.08791208791208792, | |
| "grad_norm": 0.13309991359710693, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.9458646774291992, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.09157509157509157, | |
| "grad_norm": 0.15211951732635498, | |
| "learning_rate": 4.9e-05, | |
| "loss": 1.4107502698898315, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 0.22298896312713623, | |
| "learning_rate": 4.9999955969752164e-05, | |
| "loss": 0.7173015475273132, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0989010989010989, | |
| "grad_norm": 0.2911456227302551, | |
| "learning_rate": 4.999960372880343e-05, | |
| "loss": 0.8890910744667053, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 0.9632299542427063, | |
| "learning_rate": 4.9998899252420356e-05, | |
| "loss": 1.2817891836166382, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.10622710622710622, | |
| "grad_norm": 0.5119697451591492, | |
| "learning_rate": 4.9997842551631656e-05, | |
| "loss": 1.1215670108795166, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.1322525441646576, | |
| "learning_rate": 4.999643364298017e-05, | |
| "loss": 1.3118717670440674, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11355311355311355, | |
| "grad_norm": 0.1301015466451645, | |
| "learning_rate": 4.9994672548522613e-05, | |
| "loss": 1.3311526775360107, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.11721611721611722, | |
| "grad_norm": 0.11371763795614243, | |
| "learning_rate": 4.999255929582926e-05, | |
| "loss": 1.3023815155029297, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.12087912087912088, | |
| "grad_norm": 0.14048679172992706, | |
| "learning_rate": 4.9990093917983465e-05, | |
| "loss": 1.2759833335876465, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.12454212454212454, | |
| "grad_norm": 0.23039552569389343, | |
| "learning_rate": 4.9987276453581165e-05, | |
| "loss": 1.2587000131607056, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 0.4443025290966034, | |
| "learning_rate": 4.998410694673029e-05, | |
| "loss": 1.1879582405090332, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13186813186813187, | |
| "grad_norm": 0.22435733675956726, | |
| "learning_rate": 4.998058544705005e-05, | |
| "loss": 1.3695639371871948, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.13553113553113552, | |
| "grad_norm": 0.22466352581977844, | |
| "learning_rate": 4.997671200967017e-05, | |
| "loss": 1.5553536415100098, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1391941391941392, | |
| "grad_norm": 0.05474912002682686, | |
| "learning_rate": 4.997248669523002e-05, | |
| "loss": 1.0360654592514038, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.17411059141159058, | |
| "learning_rate": 4.9967909569877686e-05, | |
| "loss": 1.3734983205795288, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 0.22640125453472137, | |
| "learning_rate": 4.99629807052689e-05, | |
| "loss": 1.5512079000473022, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15018315018315018, | |
| "grad_norm": 0.16909095644950867, | |
| "learning_rate": 4.995770017856595e-05, | |
| "loss": 1.1615397930145264, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 0.14065229892730713, | |
| "learning_rate": 4.995206807243644e-05, | |
| "loss": 1.3165048360824585, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.1575091575091575, | |
| "grad_norm": 0.2557665705680847, | |
| "learning_rate": 4.994608447505203e-05, | |
| "loss": 1.456904411315918, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.16117216117216118, | |
| "grad_norm": 0.18825113773345947, | |
| "learning_rate": 4.993974948008705e-05, | |
| "loss": 0.7387548685073853, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.16483516483516483, | |
| "grad_norm": 0.09088045358657837, | |
| "learning_rate": 4.9933063186717006e-05, | |
| "loss": 0.8501173257827759, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1684981684981685, | |
| "grad_norm": 0.31493327021598816, | |
| "learning_rate": 4.992602569961704e-05, | |
| "loss": 1.2714766263961792, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.17216117216117216, | |
| "grad_norm": 0.3955460786819458, | |
| "learning_rate": 4.991863712896033e-05, | |
| "loss": 1.29978609085083, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.17582417582417584, | |
| "grad_norm": 0.22290943562984467, | |
| "learning_rate": 4.991089759041628e-05, | |
| "loss": 1.1851716041564941, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.1794871794871795, | |
| "grad_norm": 0.1698511391878128, | |
| "learning_rate": 4.99028072051488e-05, | |
| "loss": 1.2791502475738525, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 0.2091524451971054, | |
| "learning_rate": 4.989436609981437e-05, | |
| "loss": 1.143870234489441, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18681318681318682, | |
| "grad_norm": 0.39920395612716675, | |
| "learning_rate": 4.988557440656004e-05, | |
| "loss": 1.2132782936096191, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 0.23149904608726501, | |
| "learning_rate": 4.987643226302138e-05, | |
| "loss": 0.8638072609901428, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.19413919413919414, | |
| "grad_norm": 0.8971022367477417, | |
| "learning_rate": 4.9866939812320326e-05, | |
| "loss": 1.0543807744979858, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.1978021978021978, | |
| "grad_norm": 0.14404769241809845, | |
| "learning_rate": 4.9857097203062955e-05, | |
| "loss": 1.27614426612854, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.20146520146520147, | |
| "grad_norm": 0.20676514506340027, | |
| "learning_rate": 4.984690458933711e-05, | |
| "loss": 1.1264268159866333, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 0.2630727291107178, | |
| "learning_rate": 4.983636213071004e-05, | |
| "loss": 1.1916111707687378, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2087912087912088, | |
| "grad_norm": 0.3776465356349945, | |
| "learning_rate": 4.982546999222587e-05, | |
| "loss": 0.6021360158920288, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.21245421245421245, | |
| "grad_norm": 0.3124425709247589, | |
| "learning_rate": 4.981422834440303e-05, | |
| "loss": 1.2633193731307983, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.21611721611721613, | |
| "grad_norm": 0.21436505019664764, | |
| "learning_rate": 4.98026373632316e-05, | |
| "loss": 1.2517153024673462, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 0.165570467710495, | |
| "learning_rate": 4.97906972301705e-05, | |
| "loss": 1.329060673713684, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.22344322344322345, | |
| "grad_norm": 0.1781710535287857, | |
| "learning_rate": 4.9778408132144715e-05, | |
| "loss": 1.2554113864898682, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2271062271062271, | |
| "grad_norm": 0.4390529692173004, | |
| "learning_rate": 4.976577026154235e-05, | |
| "loss": 0.9952642321586609, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 1.2797257900238037, | |
| "learning_rate": 4.9752783816211576e-05, | |
| "loss": 0.872045636177063, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.23443223443223443, | |
| "grad_norm": 0.3392629325389862, | |
| "learning_rate": 4.973944899945758e-05, | |
| "loss": 1.1586322784423828, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.23809523809523808, | |
| "grad_norm": 0.216371089220047, | |
| "learning_rate": 4.9725766020039395e-05, | |
| "loss": 1.2613385915756226, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.24175824175824176, | |
| "grad_norm": 0.44755294919013977, | |
| "learning_rate": 4.971173509216656e-05, | |
| "loss": 1.3555878400802612, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2454212454212454, | |
| "grad_norm": 0.20577383041381836, | |
| "learning_rate": 4.969735643549583e-05, | |
| "loss": 1.2522915601730347, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2490842490842491, | |
| "grad_norm": 0.33351951837539673, | |
| "learning_rate": 4.968263027512773e-05, | |
| "loss": 1.2353262901306152, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.25274725274725274, | |
| "grad_norm": 0.12553884088993073, | |
| "learning_rate": 4.966755684160301e-05, | |
| "loss": 1.057889699935913, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 0.17903219163417816, | |
| "learning_rate": 4.9652136370899035e-05, | |
| "loss": 1.23538076877594, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2600732600732601, | |
| "grad_norm": 0.3907378315925598, | |
| "learning_rate": 4.963636910442611e-05, | |
| "loss": 1.2505638599395752, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.26373626373626374, | |
| "grad_norm": 0.1998424232006073, | |
| "learning_rate": 4.96202552890237e-05, | |
| "loss": 1.2176811695098877, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2673992673992674, | |
| "grad_norm": 0.4392535388469696, | |
| "learning_rate": 4.960379517695654e-05, | |
| "loss": 1.3697282075881958, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.27106227106227104, | |
| "grad_norm": 0.3896879255771637, | |
| "learning_rate": 4.958698902591072e-05, | |
| "loss": 1.2809630632400513, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.27472527472527475, | |
| "grad_norm": 0.3242920935153961, | |
| "learning_rate": 4.9569837098989626e-05, | |
| "loss": 0.9012686014175415, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2783882783882784, | |
| "grad_norm": 0.13880665600299835, | |
| "learning_rate": 4.9552339664709807e-05, | |
| "loss": 0.6081559658050537, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.28205128205128205, | |
| "grad_norm": 0.27750471234321594, | |
| "learning_rate": 4.9534496996996845e-05, | |
| "loss": 1.0718085765838623, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.4269710183143616, | |
| "learning_rate": 4.951630937518096e-05, | |
| "loss": 1.3045586347579956, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2893772893772894, | |
| "grad_norm": 0.36114686727523804, | |
| "learning_rate": 4.949777708399273e-05, | |
| "loss": 1.24015212059021, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 0.18885226547718048, | |
| "learning_rate": 4.947890041355858e-05, | |
| "loss": 0.9190669655799866, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2967032967032967, | |
| "grad_norm": 0.42237764596939087, | |
| "learning_rate": 4.9459679659396257e-05, | |
| "loss": 1.4927023649215698, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.30036630036630035, | |
| "grad_norm": 0.1873409003019333, | |
| "learning_rate": 4.944011512241021e-05, | |
| "loss": 1.0130228996276855, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.304029304029304, | |
| "grad_norm": 0.290294885635376, | |
| "learning_rate": 4.942020710888684e-05, | |
| "loss": 1.3621708154678345, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.7741953134536743, | |
| "learning_rate": 4.939995593048979e-05, | |
| "loss": 1.1007283926010132, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.31135531135531136, | |
| "grad_norm": 0.4494468867778778, | |
| "learning_rate": 4.937936190425495e-05, | |
| "loss": 1.2320328950881958, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.315018315018315, | |
| "grad_norm": 1.186848759651184, | |
| "learning_rate": 4.9358425352585616e-05, | |
| "loss": 1.0239619016647339, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.31868131868131866, | |
| "grad_norm": 0.1502193659543991, | |
| "learning_rate": 4.933714660324735e-05, | |
| "loss": 0.816228449344635, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.32234432234432236, | |
| "grad_norm": 0.24374300241470337, | |
| "learning_rate": 4.931552598936287e-05, | |
| "loss": 1.370795726776123, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.326007326007326, | |
| "grad_norm": 0.16081377863883972, | |
| "learning_rate": 4.929356384940688e-05, | |
| "loss": 0.8959931135177612, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 0.21084611117839813, | |
| "learning_rate": 4.927126052720071e-05, | |
| "loss": 1.447354793548584, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.13184507191181183, | |
| "learning_rate": 4.924861637190698e-05, | |
| "loss": 0.954731285572052, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.336996336996337, | |
| "grad_norm": 0.2652509808540344, | |
| "learning_rate": 4.922563173802409e-05, | |
| "loss": 1.2110737562179565, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.34065934065934067, | |
| "grad_norm": 0.34187304973602295, | |
| "learning_rate": 4.9202306985380734e-05, | |
| "loss": 1.2186378240585327, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.3443223443223443, | |
| "grad_norm": 0.08439934998750687, | |
| "learning_rate": 4.917864247913018e-05, | |
| "loss": 1.155535101890564, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.34798534798534797, | |
| "grad_norm": 0.22483469545841217, | |
| "learning_rate": 4.9154638589744646e-05, | |
| "loss": 1.2381874322891235, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3516483516483517, | |
| "grad_norm": 10.98539924621582, | |
| "learning_rate": 4.913029569300942e-05, | |
| "loss": 1.0877535343170166, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3553113553113553, | |
| "grad_norm": 0.09030856937170029, | |
| "learning_rate": 4.9105614170017034e-05, | |
| "loss": 1.2255364656448364, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 0.22610144317150116, | |
| "learning_rate": 4.908059440716127e-05, | |
| "loss": 1.2344918251037598, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.3626373626373626, | |
| "grad_norm": 0.6382534503936768, | |
| "learning_rate": 4.9055236796131115e-05, | |
| "loss": 0.8511998653411865, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 0.22709640860557556, | |
| "learning_rate": 4.902954173390464e-05, | |
| "loss": 0.9911755323410034, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.36996336996337, | |
| "grad_norm": 0.2304632067680359, | |
| "learning_rate": 4.900350962274275e-05, | |
| "loss": 1.4108072519302368, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.37362637362637363, | |
| "grad_norm": 0.2119007110595703, | |
| "learning_rate": 4.897714087018296e-05, | |
| "loss": 1.1905288696289062, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.3772893772893773, | |
| "grad_norm": 0.2922574579715729, | |
| "learning_rate": 4.895043588903292e-05, | |
| "loss": 0.7706769704818726, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 0.6245097517967224, | |
| "learning_rate": 4.892339509736404e-05, | |
| "loss": 1.1153967380523682, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.36032259464263916, | |
| "learning_rate": 4.889601891850486e-05, | |
| "loss": 1.3866379261016846, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3882783882783883, | |
| "grad_norm": 0.14774373173713684, | |
| "learning_rate": 4.886830778103452e-05, | |
| "loss": 1.0544565916061401, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.39194139194139194, | |
| "grad_norm": 0.2895050346851349, | |
| "learning_rate": 4.884026211877596e-05, | |
| "loss": 1.1082898378372192, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.3956043956043956, | |
| "grad_norm": 0.2137245386838913, | |
| "learning_rate": 4.881188237078919e-05, | |
| "loss": 1.2029824256896973, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3992673992673993, | |
| "grad_norm": 0.3577767014503479, | |
| "learning_rate": 4.878316898136437e-05, | |
| "loss": 1.2338331937789917, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 0.09472601860761642, | |
| "learning_rate": 4.875412240001491e-05, | |
| "loss": 0.6112377047538757, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4065934065934066, | |
| "grad_norm": 0.1504858434200287, | |
| "learning_rate": 4.872474308147037e-05, | |
| "loss": 1.3192267417907715, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 0.3618045449256897, | |
| "learning_rate": 4.869503148566939e-05, | |
| "loss": 1.0624542236328125, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4139194139194139, | |
| "grad_norm": 0.13315747678279877, | |
| "learning_rate": 4.866498807775247e-05, | |
| "loss": 1.2139613628387451, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4175824175824176, | |
| "grad_norm": 0.2177940309047699, | |
| "learning_rate": 4.8634613328054674e-05, | |
| "loss": 1.2820316553115845, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.42124542124542125, | |
| "grad_norm": 0.3730919063091278, | |
| "learning_rate": 4.8603907712098305e-05, | |
| "loss": 1.2036633491516113, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4249084249084249, | |
| "grad_norm": 0.5930754542350769, | |
| "learning_rate": 4.8572871710585424e-05, | |
| "loss": 0.9775714874267578, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 5.749536991119385, | |
| "learning_rate": 4.854150580939035e-05, | |
| "loss": 1.4643810987472534, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.43223443223443225, | |
| "grad_norm": 0.18177424371242523, | |
| "learning_rate": 4.850981049955203e-05, | |
| "loss": 0.99868243932724, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.4358974358974359, | |
| "grad_norm": 0.5519245266914368, | |
| "learning_rate": 4.847778627726636e-05, | |
| "loss": 1.051274299621582, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 0.24456854164600372, | |
| "learning_rate": 4.844543364387844e-05, | |
| "loss": 0.8957317471504211, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4432234432234432, | |
| "grad_norm": 0.32938405871391296, | |
| "learning_rate": 4.8412753105874703e-05, | |
| "loss": 0.8530710339546204, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.4468864468864469, | |
| "grad_norm": 0.08594862371683121, | |
| "learning_rate": 4.837974517487496e-05, | |
| "loss": 0.560033917427063, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.45054945054945056, | |
| "grad_norm": 0.12811991572380066, | |
| "learning_rate": 4.8346410367624465e-05, | |
| "loss": 1.2348781824111938, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.4542124542124542, | |
| "grad_norm": 0.21825870871543884, | |
| "learning_rate": 4.831274920598574e-05, | |
| "loss": 0.8636214733123779, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.45787545787545786, | |
| "grad_norm": 0.6492200493812561, | |
| "learning_rate": 4.8278762216930456e-05, | |
| "loss": 1.246092677116394, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.16339761018753052, | |
| "learning_rate": 4.8244449932531195e-05, | |
| "loss": 1.1555366516113281, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.4652014652014652, | |
| "grad_norm": 0.12176702171564102, | |
| "learning_rate": 4.820981288995307e-05, | |
| "loss": 0.9462042450904846, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.46886446886446886, | |
| "grad_norm": 0.10259034484624863, | |
| "learning_rate": 4.8174851631445354e-05, | |
| "loss": 1.2327078580856323, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.4725274725274725, | |
| "grad_norm": 0.2983081638813019, | |
| "learning_rate": 4.8139566704332984e-05, | |
| "loss": 1.2545617818832397, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 0.18848823010921478, | |
| "learning_rate": 4.810395866100797e-05, | |
| "loss": 0.7314871549606323, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.47985347985347987, | |
| "grad_norm": 1.1380740404129028, | |
| "learning_rate": 4.8068028058920795e-05, | |
| "loss": 1.1386513710021973, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.4835164835164835, | |
| "grad_norm": 0.19013704359531403, | |
| "learning_rate": 4.803177546057163e-05, | |
| "loss": 1.207440972328186, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.48717948717948717, | |
| "grad_norm": 0.49117130041122437, | |
| "learning_rate": 4.799520143350158e-05, | |
| "loss": 1.478100299835205, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.4908424908424908, | |
| "grad_norm": 0.32450738549232483, | |
| "learning_rate": 4.795830655028376e-05, | |
| "loss": 0.7695617079734802, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.4945054945054945, | |
| "grad_norm": 0.14151829481124878, | |
| "learning_rate": 4.792109138851435e-05, | |
| "loss": 1.180545449256897, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4981684981684982, | |
| "grad_norm": 0.21694627404212952, | |
| "learning_rate": 4.7883556530803554e-05, | |
| "loss": 0.8736183643341064, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5018315018315018, | |
| "grad_norm": 0.9315363764762878, | |
| "learning_rate": 4.7845702564766475e-05, | |
| "loss": 1.2287445068359375, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5054945054945055, | |
| "grad_norm": 0.12385514378547668, | |
| "learning_rate": 4.7807530083013906e-05, | |
| "loss": 0.814042329788208, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5091575091575091, | |
| "grad_norm": 0.10513313859701157, | |
| "learning_rate": 4.776903968314308e-05, | |
| "loss": 0.8786470890045166, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.213555246591568, | |
| "learning_rate": 4.7730231967728275e-05, | |
| "loss": 1.2300586700439453, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5164835164835165, | |
| "grad_norm": 0.20062805712223053, | |
| "learning_rate": 4.769110754431142e-05, | |
| "loss": 1.2230390310287476, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5201465201465202, | |
| "grad_norm": 0.21544235944747925, | |
| "learning_rate": 4.765166702539256e-05, | |
| "loss": 1.2219314575195312, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5238095238095238, | |
| "grad_norm": 0.22437822818756104, | |
| "learning_rate": 4.761191102842027e-05, | |
| "loss": 0.9741434454917908, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5274725274725275, | |
| "grad_norm": 0.09989945590496063, | |
| "learning_rate": 4.757184017578198e-05, | |
| "loss": 1.2340394258499146, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.5311355311355311, | |
| "grad_norm": 0.14188872277736664, | |
| "learning_rate": 4.7531455094794284e-05, | |
| "loss": 1.197536587715149, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5347985347985348, | |
| "grad_norm": 0.1335064321756363, | |
| "learning_rate": 4.7490756417693036e-05, | |
| "loss": 0.7367426753044128, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 0.02857016585767269, | |
| "learning_rate": 4.7449744781623526e-05, | |
| "loss": 0.9376294016838074, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5421245421245421, | |
| "grad_norm": 0.11979032307863235, | |
| "learning_rate": 4.740842082863043e-05, | |
| "loss": 1.0236124992370605, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5457875457875457, | |
| "grad_norm": 0.19949960708618164, | |
| "learning_rate": 4.736678520564786e-05, | |
| "loss": 1.290779709815979, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 0.12269338220357895, | |
| "learning_rate": 4.732483856448913e-05, | |
| "loss": 1.1912894248962402, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5531135531135531, | |
| "grad_norm": 0.15550191700458527, | |
| "learning_rate": 4.7282581561836644e-05, | |
| "loss": 1.1734073162078857, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.5567765567765568, | |
| "grad_norm": 0.19052956998348236, | |
| "learning_rate": 4.724001485923153e-05, | |
| "loss": 0.9569897055625916, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.5604395604395604, | |
| "grad_norm": 0.3564753830432892, | |
| "learning_rate": 4.7197139123063366e-05, | |
| "loss": 0.9688905477523804, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 0.25113749504089355, | |
| "learning_rate": 4.715395502455967e-05, | |
| "loss": 1.3545844554901123, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.5677655677655677, | |
| "grad_norm": 0.19413875043392181, | |
| "learning_rate": 4.711046323977545e-05, | |
| "loss": 0.9748039245605469, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.20445436239242554, | |
| "learning_rate": 4.70666644495826e-05, | |
| "loss": 1.2018651962280273, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.575091575091575, | |
| "grad_norm": 0.1748535931110382, | |
| "learning_rate": 4.702255933965924e-05, | |
| "loss": 1.1204524040222168, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.5787545787545788, | |
| "grad_norm": 0.13978832960128784, | |
| "learning_rate": 4.697814860047895e-05, | |
| "loss": 1.273799180984497, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.5824175824175825, | |
| "grad_norm": 0.1604635864496231, | |
| "learning_rate": 4.6933432927300054e-05, | |
| "loss": 1.1062840223312378, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 0.1707131415605545, | |
| "learning_rate": 4.6888413020154626e-05, | |
| "loss": 1.3164299726486206, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5897435897435898, | |
| "grad_norm": 0.13679248094558716, | |
| "learning_rate": 4.6843089583837586e-05, | |
| "loss": 1.5054590702056885, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.5934065934065934, | |
| "grad_norm": 0.19371454417705536, | |
| "learning_rate": 4.6797463327895676e-05, | |
| "loss": 1.2403850555419922, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.5970695970695971, | |
| "grad_norm": 0.09500681608915329, | |
| "learning_rate": 4.6751534966616314e-05, | |
| "loss": 1.3421348333358765, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6007326007326007, | |
| "grad_norm": 0.1618986278772354, | |
| "learning_rate": 4.670530521901645e-05, | |
| "loss": 1.2023552656173706, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6043956043956044, | |
| "grad_norm": 0.13862641155719757, | |
| "learning_rate": 4.6658774808831284e-05, | |
| "loss": 1.1014868021011353, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.608058608058608, | |
| "grad_norm": 0.2911272644996643, | |
| "learning_rate": 4.6611944464502935e-05, | |
| "loss": 1.1684032678604126, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.6117216117216118, | |
| "grad_norm": 0.24178026616573334, | |
| "learning_rate": 4.6564814919169075e-05, | |
| "loss": 1.2577779293060303, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.5293629169464111, | |
| "learning_rate": 4.651738691065139e-05, | |
| "loss": 0.8592604994773865, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.6190476190476191, | |
| "grad_norm": 0.09567166119813919, | |
| "learning_rate": 4.646966118144407e-05, | |
| "loss": 1.2142037153244019, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 0.13777339458465576, | |
| "learning_rate": 4.642163847870221e-05, | |
| "loss": 1.207306981086731, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6263736263736264, | |
| "grad_norm": 0.22208669781684875, | |
| "learning_rate": 4.637331955423002e-05, | |
| "loss": 0.5593523979187012, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.63003663003663, | |
| "grad_norm": 0.15060071647167206, | |
| "learning_rate": 4.6324705164469174e-05, | |
| "loss": 1.4146814346313477, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.6336996336996337, | |
| "grad_norm": 0.2521788775920868, | |
| "learning_rate": 4.6275796070486874e-05, | |
| "loss": 0.6819853782653809, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.6373626373626373, | |
| "grad_norm": 0.1835089921951294, | |
| "learning_rate": 4.622659303796397e-05, | |
| "loss": 1.135895013809204, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.30718883872032166, | |
| "learning_rate": 4.6177096837183016e-05, | |
| "loss": 0.8732522130012512, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6446886446886447, | |
| "grad_norm": 0.19664013385772705, | |
| "learning_rate": 4.612730824301611e-05, | |
| "loss": 0.9108962416648865, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.6483516483516484, | |
| "grad_norm": 0.24300748109817505, | |
| "learning_rate": 4.6077228034912865e-05, | |
| "loss": 0.944155216217041, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.652014652014652, | |
| "grad_norm": 0.25140050053596497, | |
| "learning_rate": 4.602685699688814e-05, | |
| "loss": 1.1503783464431763, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.6556776556776557, | |
| "grad_norm": 0.18550491333007812, | |
| "learning_rate": 4.5976195917509804e-05, | |
| "loss": 1.1416871547698975, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 0.19337521493434906, | |
| "learning_rate": 4.592524558988638e-05, | |
| "loss": 0.6880902647972107, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.663003663003663, | |
| "grad_norm": 0.27510422468185425, | |
| "learning_rate": 4.58740068116546e-05, | |
| "loss": 0.9372468590736389, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.14954572916030884, | |
| "learning_rate": 4.582248038496698e-05, | |
| "loss": 0.9180594682693481, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.6703296703296703, | |
| "grad_norm": 0.23267677426338196, | |
| "learning_rate": 4.577066711647918e-05, | |
| "loss": 1.1724467277526855, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.673992673992674, | |
| "grad_norm": 0.1276102066040039, | |
| "learning_rate": 4.571856781733748e-05, | |
| "loss": 1.0390164852142334, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.6776556776556777, | |
| "grad_norm": 0.1551157683134079, | |
| "learning_rate": 4.566618330316596e-05, | |
| "loss": 1.081437587738037, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6813186813186813, | |
| "grad_norm": 0.3087083399295807, | |
| "learning_rate": 4.561351439405384e-05, | |
| "loss": 1.1742217540740967, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.684981684981685, | |
| "grad_norm": 0.2865961790084839, | |
| "learning_rate": 4.5560561914542576e-05, | |
| "loss": 1.1755157709121704, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.6886446886446886, | |
| "grad_norm": 0.20178958773612976, | |
| "learning_rate": 4.550732669361298e-05, | |
| "loss": 0.8584067225456238, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.14505843818187714, | |
| "learning_rate": 4.54538095646722e-05, | |
| "loss": 0.8162437081336975, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 0.35326001048088074, | |
| "learning_rate": 4.540001136554077e-05, | |
| "loss": 1.0263890027999878, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6996336996336996, | |
| "grad_norm": 0.2113528698682785, | |
| "learning_rate": 4.534593293843936e-05, | |
| "loss": 0.9698024392127991, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.7032967032967034, | |
| "grad_norm": 0.13257572054862976, | |
| "learning_rate": 4.529157512997571e-05, | |
| "loss": 1.1605135202407837, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.706959706959707, | |
| "grad_norm": 0.17257475852966309, | |
| "learning_rate": 4.5236938791131305e-05, | |
| "loss": 1.0823811292648315, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.7106227106227107, | |
| "grad_norm": 0.2746966779232025, | |
| "learning_rate": 4.518202477724808e-05, | |
| "loss": 0.8808259963989258, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.11813390254974365, | |
| "learning_rate": 4.5126833948015016e-05, | |
| "loss": 1.0819435119628906, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 0.2048182636499405, | |
| "learning_rate": 4.5071367167454687e-05, | |
| "loss": 1.1645246744155884, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.7216117216117216, | |
| "grad_norm": 2.1287009716033936, | |
| "learning_rate": 4.5015625303909755e-05, | |
| "loss": 1.1096913814544678, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.7252747252747253, | |
| "grad_norm": 0.11994423717260361, | |
| "learning_rate": 4.495960923002935e-05, | |
| "loss": 1.223901391029358, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.7289377289377289, | |
| "grad_norm": 0.15119600296020508, | |
| "learning_rate": 4.49033198227554e-05, | |
| "loss": 0.9063436388969421, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 0.11098281294107437, | |
| "learning_rate": 4.4846757963308936e-05, | |
| "loss": 0.8366504907608032, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7362637362637363, | |
| "grad_norm": 0.14989100396633148, | |
| "learning_rate": 4.478992453717626e-05, | |
| "loss": 1.228022813796997, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.73992673992674, | |
| "grad_norm": 0.2334737479686737, | |
| "learning_rate": 4.4732820434095123e-05, | |
| "loss": 0.8357920050621033, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.7435897435897436, | |
| "grad_norm": 0.3819234073162079, | |
| "learning_rate": 4.4675446548040754e-05, | |
| "loss": 1.0126510858535767, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.7472527472527473, | |
| "grad_norm": 0.19562920928001404, | |
| "learning_rate": 4.46178037772119e-05, | |
| "loss": 1.012734055519104, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.7509157509157509, | |
| "grad_norm": 0.304485559463501, | |
| "learning_rate": 4.4559893024016726e-05, | |
| "loss": 0.8644341230392456, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7545787545787546, | |
| "grad_norm": 0.24467211961746216, | |
| "learning_rate": 4.450171519505873e-05, | |
| "loss": 1.152502179145813, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.7582417582417582, | |
| "grad_norm": 0.8111533522605896, | |
| "learning_rate": 4.4443271201122514e-05, | |
| "loss": 0.9966916441917419, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 0.13030032813549042, | |
| "learning_rate": 4.4384561957159565e-05, | |
| "loss": 1.2616826295852661, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.7655677655677655, | |
| "grad_norm": 0.09772861003875732, | |
| "learning_rate": 4.43255883822739e-05, | |
| "loss": 0.6672307252883911, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.16410967707633972, | |
| "learning_rate": 4.4266351399707664e-05, | |
| "loss": 1.214950680732727, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7728937728937729, | |
| "grad_norm": 0.12338349223136902, | |
| "learning_rate": 4.420685193682672e-05, | |
| "loss": 0.9765850305557251, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.7765567765567766, | |
| "grad_norm": 0.5074856281280518, | |
| "learning_rate": 4.4147090925106104e-05, | |
| "loss": 0.4896080195903778, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.7802197802197802, | |
| "grad_norm": 0.2849983870983124, | |
| "learning_rate": 4.4087069300115444e-05, | |
| "loss": 0.7668413519859314, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.7838827838827839, | |
| "grad_norm": 0.36542126536369324, | |
| "learning_rate": 4.4026788001504314e-05, | |
| "loss": 1.045650839805603, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.7875457875457875, | |
| "grad_norm": 0.1004275232553482, | |
| "learning_rate": 4.396624797298754e-05, | |
| "loss": 1.1941821575164795, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7912087912087912, | |
| "grad_norm": 0.1513642817735672, | |
| "learning_rate": 4.390545016233039e-05, | |
| "loss": 1.2807530164718628, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.7948717948717948, | |
| "grad_norm": 0.3031829595565796, | |
| "learning_rate": 4.3844395521333786e-05, | |
| "loss": 0.8745837807655334, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.7985347985347986, | |
| "grad_norm": 0.1763853281736374, | |
| "learning_rate": 4.378308500581934e-05, | |
| "loss": 0.9577867984771729, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.8021978021978022, | |
| "grad_norm": 0.21650274097919464, | |
| "learning_rate": 4.372151957561447e-05, | |
| "loss": 0.8710334300994873, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 0.3157196640968323, | |
| "learning_rate": 4.36597001945373e-05, | |
| "loss": 1.2961158752441406, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8095238095238095, | |
| "grad_norm": 0.11482734233140945, | |
| "learning_rate": 4.3597627830381606e-05, | |
| "loss": 1.1874325275421143, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.8131868131868132, | |
| "grad_norm": 0.15268991887569427, | |
| "learning_rate": 4.353530345490167e-05, | |
| "loss": 1.1880759000778198, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.8168498168498168, | |
| "grad_norm": 0.21278268098831177, | |
| "learning_rate": 4.347272804379705e-05, | |
| "loss": 1.206059455871582, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 0.179881751537323, | |
| "learning_rate": 4.340990257669732e-05, | |
| "loss": 1.2088541984558105, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.8241758241758241, | |
| "grad_norm": 0.14933490753173828, | |
| "learning_rate": 4.334682803714672e-05, | |
| "loss": 1.2412981986999512, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8278388278388278, | |
| "grad_norm": 0.1529897153377533, | |
| "learning_rate": 4.328350541258876e-05, | |
| "loss": 0.9919160008430481, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.8315018315018315, | |
| "grad_norm": 0.10920170694589615, | |
| "learning_rate": 4.321993569435078e-05, | |
| "loss": 0.49135756492614746, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.8351648351648352, | |
| "grad_norm": 0.4436364471912384, | |
| "learning_rate": 4.315611987762841e-05, | |
| "loss": 0.8858435750007629, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.8388278388278388, | |
| "grad_norm": 0.22913309931755066, | |
| "learning_rate": 4.309205896146999e-05, | |
| "loss": 0.8232947587966919, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 0.2811645567417145, | |
| "learning_rate": 4.302775394876096e-05, | |
| "loss": 1.0056540966033936, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 0.22459489107131958, | |
| "learning_rate": 4.29632058462081e-05, | |
| "loss": 1.2183864116668701, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.8498168498168498, | |
| "grad_norm": 0.12809278070926666, | |
| "learning_rate": 4.2898415664323844e-05, | |
| "loss": 1.1671696901321411, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.8534798534798534, | |
| "grad_norm": 0.03261662647128105, | |
| "learning_rate": 4.2833384417410395e-05, | |
| "loss": 1.187354564666748, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.1845274269580841, | |
| "learning_rate": 4.276811312354389e-05, | |
| "loss": 0.8790689706802368, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.8608058608058609, | |
| "grad_norm": 0.6504915952682495, | |
| "learning_rate": 4.270260280455843e-05, | |
| "loss": 0.8886659145355225, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8644688644688645, | |
| "grad_norm": 0.19828136265277863, | |
| "learning_rate": 4.263685448603012e-05, | |
| "loss": 1.1550533771514893, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.8681318681318682, | |
| "grad_norm": 0.4623855948448181, | |
| "learning_rate": 4.257086919726097e-05, | |
| "loss": 0.7794157862663269, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 0.2206961214542389, | |
| "learning_rate": 4.25046479712628e-05, | |
| "loss": 1.087639331817627, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.8754578754578755, | |
| "grad_norm": 0.23512773215770721, | |
| "learning_rate": 4.2438191844741105e-05, | |
| "loss": 1.0371439456939697, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 0.7811533808708191, | |
| "learning_rate": 4.2371501858078753e-05, | |
| "loss": 1.055543065071106, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8827838827838828, | |
| "grad_norm": 0.22808873653411865, | |
| "learning_rate": 4.230457905531976e-05, | |
| "loss": 1.1782468557357788, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.8864468864468864, | |
| "grad_norm": 0.1753520965576172, | |
| "learning_rate": 4.22374244841529e-05, | |
| "loss": 1.244563341140747, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.8901098901098901, | |
| "grad_norm": 1.1950969696044922, | |
| "learning_rate": 4.217003919589535e-05, | |
| "loss": 0.8924474120140076, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.8937728937728938, | |
| "grad_norm": 0.42523816227912903, | |
| "learning_rate": 4.210242424547617e-05, | |
| "loss": 1.136575698852539, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.2354390025138855, | |
| "learning_rate": 4.203458069141985e-05, | |
| "loss": 1.0925524234771729, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9010989010989011, | |
| "grad_norm": 0.3065359592437744, | |
| "learning_rate": 4.196650959582973e-05, | |
| "loss": 1.031598687171936, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.9047619047619048, | |
| "grad_norm": 0.40230098366737366, | |
| "learning_rate": 4.1898212024371304e-05, | |
| "loss": 0.5824300646781921, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.9084249084249084, | |
| "grad_norm": 0.1176103800535202, | |
| "learning_rate": 4.1829689046255616e-05, | |
| "loss": 1.2608321905136108, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.9120879120879121, | |
| "grad_norm": 0.1900339424610138, | |
| "learning_rate": 4.1760941734222505e-05, | |
| "loss": 1.117556095123291, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 0.13478179275989532, | |
| "learning_rate": 4.1691971164523764e-05, | |
| "loss": 0.7983730435371399, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9194139194139194, | |
| "grad_norm": 0.11394549906253815, | |
| "learning_rate": 4.1622778416906375e-05, | |
| "loss": 0.8523120284080505, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.15726390480995178, | |
| "learning_rate": 4.15533645745955e-05, | |
| "loss": 0.9240443110466003, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.9267399267399268, | |
| "grad_norm": 0.07845434546470642, | |
| "learning_rate": 4.148373072427762e-05, | |
| "loss": 0.8336247205734253, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.9304029304029304, | |
| "grad_norm": 0.1776849627494812, | |
| "learning_rate": 4.1413877956083456e-05, | |
| "loss": 1.1461174488067627, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.9340659340659341, | |
| "grad_norm": 0.21315138041973114, | |
| "learning_rate": 4.1343807363570964e-05, | |
| "loss": 1.2062344551086426, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9377289377289377, | |
| "grad_norm": 0.1842324286699295, | |
| "learning_rate": 4.127352004370814e-05, | |
| "loss": 1.1556131839752197, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.9413919413919414, | |
| "grad_norm": 0.17686887085437775, | |
| "learning_rate": 4.12030170968559e-05, | |
| "loss": 0.9388005137443542, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.945054945054945, | |
| "grad_norm": 0.20578397810459137, | |
| "learning_rate": 4.113229962675085e-05, | |
| "loss": 1.1634795665740967, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.9487179487179487, | |
| "grad_norm": 0.19318969547748566, | |
| "learning_rate": 4.1061368740488e-05, | |
| "loss": 1.1986818313598633, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 0.25902265310287476, | |
| "learning_rate": 4.09902255485034e-05, | |
| "loss": 1.1979715824127197, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9560439560439561, | |
| "grad_norm": 0.3866836726665497, | |
| "learning_rate": 4.091887116455681e-05, | |
| "loss": 0.8659937381744385, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.9597069597069597, | |
| "grad_norm": 0.4504384994506836, | |
| "learning_rate": 4.084730670571424e-05, | |
| "loss": 1.0120433568954468, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.9633699633699634, | |
| "grad_norm": 0.2555679380893707, | |
| "learning_rate": 4.0775533292330464e-05, | |
| "loss": 0.9460458755493164, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.967032967032967, | |
| "grad_norm": 0.3483099639415741, | |
| "learning_rate": 4.070355204803145e-05, | |
| "loss": 0.6675710082054138, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.9706959706959707, | |
| "grad_norm": 0.09682053327560425, | |
| "learning_rate": 4.0631364099696815e-05, | |
| "loss": 0.90069580078125, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 0.7320610880851746, | |
| "learning_rate": 4.055897057744219e-05, | |
| "loss": 1.3395118713378906, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.978021978021978, | |
| "grad_norm": 0.24255181849002838, | |
| "learning_rate": 4.048637261460145e-05, | |
| "loss": 0.6177163124084473, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.9816849816849816, | |
| "grad_norm": 0.42302218079566956, | |
| "learning_rate": 4.0413571347709074e-05, | |
| "loss": 0.8449323177337646, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.9853479853479854, | |
| "grad_norm": 0.1898687183856964, | |
| "learning_rate": 4.034056791648228e-05, | |
| "loss": 0.7976465225219727, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 1.4811292886734009, | |
| "learning_rate": 4.0267363463803216e-05, | |
| "loss": 1.1151212453842163, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9926739926739927, | |
| "grad_norm": 0.20806598663330078, | |
| "learning_rate": 4.019395913570104e-05, | |
| "loss": 1.1612093448638916, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.9963369963369964, | |
| "grad_norm": 0.4448166787624359, | |
| "learning_rate": 4.0120356081334004e-05, | |
| "loss": 1.1680574417114258, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.2003720998764038, | |
| "learning_rate": 4.004655545297148e-05, | |
| "loss": 1.347452163696289, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.0036630036630036, | |
| "grad_norm": 0.5149394869804382, | |
| "learning_rate": 3.997255840597587e-05, | |
| "loss": 0.8998035788536072, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.0073260073260073, | |
| "grad_norm": 0.18844829499721527, | |
| "learning_rate": 3.9898366098784544e-05, | |
| "loss": 1.149839162826538, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.010989010989011, | |
| "grad_norm": 0.12711450457572937, | |
| "learning_rate": 3.9823979692891734e-05, | |
| "loss": 0.6266541481018066, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.0146520146520146, | |
| "grad_norm": 0.43699517846107483, | |
| "learning_rate": 3.974940035283029e-05, | |
| "loss": 1.0944384336471558, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.0183150183150182, | |
| "grad_norm": 0.4765096604824066, | |
| "learning_rate": 3.967462924615351e-05, | |
| "loss": 0.9994142055511475, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.021978021978022, | |
| "grad_norm": 0.1308145374059677, | |
| "learning_rate": 3.95996675434168e-05, | |
| "loss": 1.181485652923584, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 0.17354629933834076, | |
| "learning_rate": 3.952451641815942e-05, | |
| "loss": 0.8960216641426086, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0293040293040292, | |
| "grad_norm": 0.20918694138526917, | |
| "learning_rate": 3.944917704688605e-05, | |
| "loss": 1.130763053894043, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.032967032967033, | |
| "grad_norm": 1.6546434164047241, | |
| "learning_rate": 3.9373650609048404e-05, | |
| "loss": 1.109397530555725, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.0366300366300367, | |
| "grad_norm": 0.20021019876003265, | |
| "learning_rate": 3.929793828702676e-05, | |
| "loss": 0.9343792796134949, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.0402930402930404, | |
| "grad_norm": 0.8677191734313965, | |
| "learning_rate": 3.9222041266111444e-05, | |
| "loss": 1.0045840740203857, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.043956043956044, | |
| "grad_norm": 0.3973585367202759, | |
| "learning_rate": 3.914596073448427e-05, | |
| "loss": 1.0684887170791626, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.0476190476190477, | |
| "grad_norm": 0.5770029425621033, | |
| "learning_rate": 3.906969788319996e-05, | |
| "loss": 1.2040116786956787, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.0512820512820513, | |
| "grad_norm": 0.4743853509426117, | |
| "learning_rate": 3.899325390616748e-05, | |
| "loss": 1.0301820039749146, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.054945054945055, | |
| "grad_norm": 0.5908330678939819, | |
| "learning_rate": 3.891663000013133e-05, | |
| "loss": 1.180071473121643, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.0586080586080586, | |
| "grad_norm": 0.19290882349014282, | |
| "learning_rate": 3.8839827364652875e-05, | |
| "loss": 1.1906160116195679, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.0622710622710623, | |
| "grad_norm": 0.9853556752204895, | |
| "learning_rate": 3.8762847202091486e-05, | |
| "loss": 1.186142086982727, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.065934065934066, | |
| "grad_norm": 0.2555069625377655, | |
| "learning_rate": 3.868569071758577e-05, | |
| "loss": 0.9499126076698303, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.0695970695970696, | |
| "grad_norm": 0.37080565094947815, | |
| "learning_rate": 3.860835911903467e-05, | |
| "loss": 1.1149709224700928, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.0732600732600732, | |
| "grad_norm": 0.15910615026950836, | |
| "learning_rate": 3.853085361707859e-05, | |
| "loss": 1.0009230375289917, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 0.24355213344097137, | |
| "learning_rate": 3.8453175425080426e-05, | |
| "loss": 0.7909836769104004, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.0805860805860805, | |
| "grad_norm": 0.2337721288204193, | |
| "learning_rate": 3.8375325759106563e-05, | |
| "loss": 1.1665717363357544, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.0842490842490842, | |
| "grad_norm": 0.16137680411338806, | |
| "learning_rate": 3.829730583790782e-05, | |
| "loss": 1.009416103363037, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.0879120879120878, | |
| "grad_norm": 0.19279745221138, | |
| "learning_rate": 3.821911688290043e-05, | |
| "loss": 0.9450397491455078, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.0915750915750915, | |
| "grad_norm": 0.17870314419269562, | |
| "learning_rate": 3.814076011814685e-05, | |
| "loss": 0.991208553314209, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.0952380952380953, | |
| "grad_norm": 0.3691723346710205, | |
| "learning_rate": 3.806223677033664e-05, | |
| "loss": 1.0436757802963257, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.098901098901099, | |
| "grad_norm": 0.2122497707605362, | |
| "learning_rate": 3.798354806876728e-05, | |
| "loss": 1.10894775390625, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1025641025641026, | |
| "grad_norm": 0.22092920541763306, | |
| "learning_rate": 3.790469524532484e-05, | |
| "loss": 0.7955626845359802, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.1062271062271063, | |
| "grad_norm": 0.6790117621421814, | |
| "learning_rate": 3.782567953446477e-05, | |
| "loss": 0.9074943661689758, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.10989010989011, | |
| "grad_norm": 0.2356610894203186, | |
| "learning_rate": 3.774650217319257e-05, | |
| "loss": 0.8648009896278381, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.1135531135531136, | |
| "grad_norm": 0.5769211649894714, | |
| "learning_rate": 3.766716440104439e-05, | |
| "loss": 1.1958070993423462, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.1172161172161172, | |
| "grad_norm": 0.2272127866744995, | |
| "learning_rate": 3.7587667460067635e-05, | |
| "loss": 0.7023400664329529, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.120879120879121, | |
| "grad_norm": 0.6303228735923767, | |
| "learning_rate": 3.750801259480154e-05, | |
| "loss": 1.1139551401138306, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.1245421245421245, | |
| "grad_norm": 0.22971026599407196, | |
| "learning_rate": 3.7428201052257675e-05, | |
| "loss": 0.9638775587081909, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.1282051282051282, | |
| "grad_norm": 0.1906343698501587, | |
| "learning_rate": 3.7348234081900424e-05, | |
| "loss": 1.127274513244629, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.1318681318681318, | |
| "grad_norm": 0.29741182923316956, | |
| "learning_rate": 3.726811293562739e-05, | |
| "loss": 0.36746326088905334, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.1355311355311355, | |
| "grad_norm": 1.2210016250610352, | |
| "learning_rate": 3.718783886774988e-05, | |
| "loss": 1.0633288621902466, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1391941391941391, | |
| "grad_norm": 1.7148534059524536, | |
| "learning_rate": 3.7107413134973174e-05, | |
| "loss": 0.7120411992073059, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.6514428853988647, | |
| "learning_rate": 3.702683699637692e-05, | |
| "loss": 1.0393530130386353, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.1465201465201464, | |
| "grad_norm": 0.6007410287857056, | |
| "learning_rate": 3.6946111713395365e-05, | |
| "loss": 1.3253600597381592, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.15018315018315, | |
| "grad_norm": 0.6678552031517029, | |
| "learning_rate": 3.6865238549797686e-05, | |
| "loss": 0.4324287176132202, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.056091565638780594, | |
| "learning_rate": 3.6784218771668125e-05, | |
| "loss": 1.0922839641571045, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.1575091575091574, | |
| "grad_norm": 0.34849968552589417, | |
| "learning_rate": 3.670305364738621e-05, | |
| "loss": 1.0799121856689453, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.1611721611721613, | |
| "grad_norm": 1.3243259191513062, | |
| "learning_rate": 3.662174444760688e-05, | |
| "loss": 0.8275938630104065, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.164835164835165, | |
| "grad_norm": 1.356533169746399, | |
| "learning_rate": 3.6540292445240624e-05, | |
| "loss": 0.93868488073349, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.1684981684981686, | |
| "grad_norm": 0.4635038673877716, | |
| "learning_rate": 3.6458698915433506e-05, | |
| "loss": 1.1719251871109009, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.1721611721611722, | |
| "grad_norm": 0.04632457718253136, | |
| "learning_rate": 3.637696513554725e-05, | |
| "loss": 0.7194165587425232, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.1758241758241759, | |
| "grad_norm": 0.3217860162258148, | |
| "learning_rate": 3.629509238513921e-05, | |
| "loss": 1.0931247472763062, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.1794871794871795, | |
| "grad_norm": 0.8376574516296387, | |
| "learning_rate": 3.621308194594236e-05, | |
| "loss": 0.9752073884010315, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.1831501831501832, | |
| "grad_norm": 0.19979257881641388, | |
| "learning_rate": 3.6130935101845194e-05, | |
| "loss": 0.7485665678977966, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.1868131868131868, | |
| "grad_norm": 0.2617126703262329, | |
| "learning_rate": 3.6048653138871666e-05, | |
| "loss": 0.8534201383590698, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.1904761904761905, | |
| "grad_norm": 1.090280532836914, | |
| "learning_rate": 3.596623734516104e-05, | |
| "loss": 1.1257884502410889, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.1941391941391941, | |
| "grad_norm": 6.1979570388793945, | |
| "learning_rate": 3.588368901094773e-05, | |
| "loss": 0.653273344039917, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.1978021978021978, | |
| "grad_norm": 0.17688162624835968, | |
| "learning_rate": 3.5801009428541096e-05, | |
| "loss": 1.249631643295288, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.2014652014652014, | |
| "grad_norm": 0.8640678524971008, | |
| "learning_rate": 3.571819989230519e-05, | |
| "loss": 0.8184079527854919, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.205128205128205, | |
| "grad_norm": 0.6149291396141052, | |
| "learning_rate": 3.563526169863854e-05, | |
| "loss": 0.7101552486419678, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.2087912087912087, | |
| "grad_norm": 0.7223177552223206, | |
| "learning_rate": 3.555219614595381e-05, | |
| "loss": 1.1504517793655396, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2124542124542124, | |
| "grad_norm": 0.6356011629104614, | |
| "learning_rate": 3.546900453465752e-05, | |
| "loss": 0.970334529876709, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.2161172161172162, | |
| "grad_norm": 0.8084374666213989, | |
| "learning_rate": 3.538568816712964e-05, | |
| "loss": 0.8572604060173035, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.2197802197802199, | |
| "grad_norm": 0.2903348505496979, | |
| "learning_rate": 3.5302248347703224e-05, | |
| "loss": 0.7845436930656433, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.2234432234432235, | |
| "grad_norm": 0.9992715716362, | |
| "learning_rate": 3.5218686382643994e-05, | |
| "loss": 0.8749545812606812, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.2271062271062272, | |
| "grad_norm": 0.7143378257751465, | |
| "learning_rate": 3.513500358012988e-05, | |
| "loss": 0.6878855228424072, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 0.30578601360321045, | |
| "learning_rate": 3.5051201250230545e-05, | |
| "loss": 1.168808937072754, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.2344322344322345, | |
| "grad_norm": 1.2355809211730957, | |
| "learning_rate": 3.4967280704886865e-05, | |
| "loss": 1.1536543369293213, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.2380952380952381, | |
| "grad_norm": 0.3718186914920807, | |
| "learning_rate": 3.488324325789044e-05, | |
| "loss": 1.1648200750350952, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.2417582417582418, | |
| "grad_norm": 0.4209219813346863, | |
| "learning_rate": 3.4799090224862924e-05, | |
| "loss": 0.7579060792922974, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.2454212454212454, | |
| "grad_norm": 2.09708571434021, | |
| "learning_rate": 3.471482292323554e-05, | |
| "loss": 0.8136189579963684, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.249084249084249, | |
| "grad_norm": 0.4596081078052521, | |
| "learning_rate": 3.463044267222841e-05, | |
| "loss": 1.1541743278503418, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.2527472527472527, | |
| "grad_norm": 0.2024964988231659, | |
| "learning_rate": 3.454595079282986e-05, | |
| "loss": 1.1373684406280518, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.2564102564102564, | |
| "grad_norm": 0.5888193845748901, | |
| "learning_rate": 3.4461348607775806e-05, | |
| "loss": 0.8096006512641907, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.26007326007326, | |
| "grad_norm": 0.5350688099861145, | |
| "learning_rate": 3.437663744152902e-05, | |
| "loss": 1.081048607826233, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.2637362637362637, | |
| "grad_norm": 0.4051729738712311, | |
| "learning_rate": 3.429181862025839e-05, | |
| "loss": 1.0899769067764282, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.2673992673992673, | |
| "grad_norm": 1.3433797359466553, | |
| "learning_rate": 3.4206893471818155e-05, | |
| "loss": 1.1519224643707275, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.271062271062271, | |
| "grad_norm": 0.751139223575592, | |
| "learning_rate": 3.4121863325727124e-05, | |
| "loss": 0.9729434251785278, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.2747252747252746, | |
| "grad_norm": 0.7331501245498657, | |
| "learning_rate": 3.40367295131479e-05, | |
| "loss": 0.9491739869117737, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.2783882783882783, | |
| "grad_norm": 0.39379310607910156, | |
| "learning_rate": 3.395149336686595e-05, | |
| "loss": 0.8585996627807617, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 0.2216329425573349, | |
| "learning_rate": 3.386615622126883e-05, | |
| "loss": 0.6349502801895142, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.2857142857142856, | |
| "grad_norm": 1.3201651573181152, | |
| "learning_rate": 3.378071941232525e-05, | |
| "loss": 0.507042646408081, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.2893772893772895, | |
| "grad_norm": 1.1609549522399902, | |
| "learning_rate": 3.369518427756417e-05, | |
| "loss": 1.2238701581954956, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.293040293040293, | |
| "grad_norm": 0.262668639421463, | |
| "learning_rate": 3.360955215605385e-05, | |
| "loss": 0.954353928565979, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.2967032967032968, | |
| "grad_norm": 0.15986381471157074, | |
| "learning_rate": 3.35238243883809e-05, | |
| "loss": 0.9157785177230835, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.3003663003663004, | |
| "grad_norm": 0.24788087606430054, | |
| "learning_rate": 3.34380023166293e-05, | |
| "loss": 0.523282527923584, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.304029304029304, | |
| "grad_norm": 0.5071739554405212, | |
| "learning_rate": 3.335208728435935e-05, | |
| "loss": 0.8822041749954224, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.3076923076923077, | |
| "grad_norm": 0.21843966841697693, | |
| "learning_rate": 3.3266080636586685e-05, | |
| "loss": 1.1520413160324097, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.3113553113553114, | |
| "grad_norm": 0.7570046186447144, | |
| "learning_rate": 3.317998371976121e-05, | |
| "loss": 1.1054189205169678, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.315018315018315, | |
| "grad_norm": 0.24603700637817383, | |
| "learning_rate": 3.309379788174598e-05, | |
| "loss": 0.8677737712860107, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.3186813186813187, | |
| "grad_norm": 0.6697846055030823, | |
| "learning_rate": 3.3007524471796136e-05, | |
| "loss": 0.8973780870437622, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.3223443223443223, | |
| "grad_norm": 0.24531511962413788, | |
| "learning_rate": 3.2921164840537784e-05, | |
| "loss": 0.8588492274284363, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.326007326007326, | |
| "grad_norm": 0.1828172355890274, | |
| "learning_rate": 3.283472033994683e-05, | |
| "loss": 1.188812255859375, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.3296703296703296, | |
| "grad_norm": 0.24933356046676636, | |
| "learning_rate": 3.274819232332783e-05, | |
| "loss": 1.0235859155654907, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.8426433801651001, | |
| "learning_rate": 3.2661582145292805e-05, | |
| "loss": 1.116140604019165, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.3369963369963371, | |
| "grad_norm": 0.518878161907196, | |
| "learning_rate": 3.2574891161740014e-05, | |
| "loss": 0.6969371438026428, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.3406593406593408, | |
| "grad_norm": 0.3753526210784912, | |
| "learning_rate": 3.2488120729832745e-05, | |
| "loss": 0.7986868023872375, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.3443223443223444, | |
| "grad_norm": 0.09369145333766937, | |
| "learning_rate": 3.240127220797807e-05, | |
| "loss": 0.6143500804901123, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.347985347985348, | |
| "grad_norm": 0.7680373787879944, | |
| "learning_rate": 3.231434695580558e-05, | |
| "loss": 1.102622628211975, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.3516483516483517, | |
| "grad_norm": 0.2766784131526947, | |
| "learning_rate": 3.222734633414607e-05, | |
| "loss": 0.7411299347877502, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.3553113553113554, | |
| "grad_norm": 0.4584338665008545, | |
| "learning_rate": 3.214027170501029e-05, | |
| "loss": 0.9950368404388428, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.358974358974359, | |
| "grad_norm": 0.800658106803894, | |
| "learning_rate": 3.205312443156755e-05, | |
| "loss": 0.5370650887489319, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.3626373626373627, | |
| "grad_norm": 0.39628687500953674, | |
| "learning_rate": 3.196590587812446e-05, | |
| "loss": 1.2178653478622437, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.3663003663003663, | |
| "grad_norm": 0.9103190302848816, | |
| "learning_rate": 3.1878617410103514e-05, | |
| "loss": 1.0132914781570435, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.36996336996337, | |
| "grad_norm": 0.24214321374893188, | |
| "learning_rate": 3.1791260394021735e-05, | |
| "loss": 1.0330907106399536, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.3736263736263736, | |
| "grad_norm": 0.22804208099842072, | |
| "learning_rate": 3.1703836197469257e-05, | |
| "loss": 0.7769557237625122, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.3772893772893773, | |
| "grad_norm": 0.31006965041160583, | |
| "learning_rate": 3.161634618908797e-05, | |
| "loss": 1.099147915840149, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.380952380952381, | |
| "grad_norm": 0.2560300827026367, | |
| "learning_rate": 3.1528791738550054e-05, | |
| "loss": 0.9559687376022339, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 0.21459929645061493, | |
| "learning_rate": 3.1441174216536514e-05, | |
| "loss": 1.2862838506698608, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.3882783882783882, | |
| "grad_norm": 0.6228247880935669, | |
| "learning_rate": 3.135349499471579e-05, | |
| "loss": 1.1889519691467285, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.3919413919413919, | |
| "grad_norm": 0.15515995025634766, | |
| "learning_rate": 3.126575544572222e-05, | |
| "loss": 1.1298028230667114, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.3956043956043955, | |
| "grad_norm": 0.2352827489376068, | |
| "learning_rate": 3.117795694313458e-05, | |
| "loss": 1.1332722902297974, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.3992673992673992, | |
| "grad_norm": 0.15803271532058716, | |
| "learning_rate": 3.109010086145456e-05, | |
| "loss": 0.689454197883606, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.4029304029304028, | |
| "grad_norm": 0.2263651043176651, | |
| "learning_rate": 3.1002188576085295e-05, | |
| "loss": 0.8705043196678162, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.4065934065934065, | |
| "grad_norm": 1.2859166860580444, | |
| "learning_rate": 3.091422146330977e-05, | |
| "loss": 0.8634616732597351, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.4102564102564101, | |
| "grad_norm": 0.38691240549087524, | |
| "learning_rate": 3.082620090026932e-05, | |
| "loss": 1.1554944515228271, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.4139194139194138, | |
| "grad_norm": 0.21298235654830933, | |
| "learning_rate": 3.0738128264942046e-05, | |
| "loss": 1.1856485605239868, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.4175824175824177, | |
| "grad_norm": 0.4227127432823181, | |
| "learning_rate": 3.0650004936121254e-05, | |
| "loss": 0.9900102615356445, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.4212454212454213, | |
| "grad_norm": 0.272560715675354, | |
| "learning_rate": 3.0561832293393846e-05, | |
| "loss": 1.1388965845108032, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.424908424908425, | |
| "grad_norm": 0.23176811635494232, | |
| "learning_rate": 3.04736117171188e-05, | |
| "loss": 0.8189452886581421, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.17611531913280487, | |
| "learning_rate": 3.0385344588405422e-05, | |
| "loss": 1.3413128852844238, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.4322344322344323, | |
| "grad_norm": 0.4652513861656189, | |
| "learning_rate": 3.029703228909186e-05, | |
| "loss": 1.1679465770721436, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "grad_norm": 0.16926081478595734, | |
| "learning_rate": 3.0208676201723406e-05, | |
| "loss": 1.134766697883606, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.4395604395604396, | |
| "grad_norm": 3.288715362548828, | |
| "learning_rate": 3.0120277709530854e-05, | |
| "loss": 0.9238865971565247, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.4432234432234432, | |
| "grad_norm": 0.3530486524105072, | |
| "learning_rate": 3.003183819640886e-05, | |
| "loss": 1.1074001789093018, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.4468864468864469, | |
| "grad_norm": 0.0459970124065876, | |
| "learning_rate": 2.9943359046894254e-05, | |
| "loss": 0.6836336851119995, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.4505494505494505, | |
| "grad_norm": 0.23490065336227417, | |
| "learning_rate": 2.9854841646144423e-05, | |
| "loss": 0.9037283062934875, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.4542124542124542, | |
| "grad_norm": 0.35676056146621704, | |
| "learning_rate": 2.9766287379915518e-05, | |
| "loss": 0.8027743101119995, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.4578754578754578, | |
| "grad_norm": 0.17792175710201263, | |
| "learning_rate": 2.967769763454089e-05, | |
| "loss": 1.1282213926315308, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.4615384615384617, | |
| "grad_norm": 0.28798913955688477, | |
| "learning_rate": 2.9589073796909282e-05, | |
| "loss": 0.7936130166053772, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.4652014652014653, | |
| "grad_norm": 0.20934176445007324, | |
| "learning_rate": 2.950041725444318e-05, | |
| "loss": 0.9876341819763184, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.468864468864469, | |
| "grad_norm": 0.4038946032524109, | |
| "learning_rate": 2.941172939507706e-05, | |
| "loss": 1.5155441761016846, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.4725274725274726, | |
| "grad_norm": 0.8771412968635559, | |
| "learning_rate": 2.932301160723566e-05, | |
| "loss": 1.0064780712127686, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.4761904761904763, | |
| "grad_norm": 0.3849470317363739, | |
| "learning_rate": 2.923426527981228e-05, | |
| "loss": 1.171331524848938, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.47985347985348, | |
| "grad_norm": 0.5106516480445862, | |
| "learning_rate": 2.9145491802146984e-05, | |
| "loss": 1.1589710712432861, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.4835164835164836, | |
| "grad_norm": 0.2847084403038025, | |
| "learning_rate": 2.905669256400491e-05, | |
| "loss": 0.9889826774597168, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.4871794871794872, | |
| "grad_norm": 0.20060478150844574, | |
| "learning_rate": 2.896786895555444e-05, | |
| "loss": 0.8426548838615417, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.4908424908424909, | |
| "grad_norm": 0.11503265798091888, | |
| "learning_rate": 2.887902236734552e-05, | |
| "loss": 1.1940970420837402, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.4945054945054945, | |
| "grad_norm": 0.23822751641273499, | |
| "learning_rate": 2.879015419028781e-05, | |
| "loss": 1.1169782876968384, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.4981684981684982, | |
| "grad_norm": 0.1774878203868866, | |
| "learning_rate": 2.8701265815628987e-05, | |
| "loss": 0.3862011432647705, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.5018315018315018, | |
| "grad_norm": 0.2943243980407715, | |
| "learning_rate": 2.8612358634932884e-05, | |
| "loss": 1.1364233493804932, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.5054945054945055, | |
| "grad_norm": 0.34925273060798645, | |
| "learning_rate": 2.852343404005778e-05, | |
| "loss": 0.8967536687850952, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.5091575091575091, | |
| "grad_norm": 0.2488994151353836, | |
| "learning_rate": 2.8434493423134544e-05, | |
| "loss": 0.8218085169792175, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.5128205128205128, | |
| "grad_norm": 0.17446660995483398, | |
| "learning_rate": 2.8345538176544918e-05, | |
| "loss": 1.0801664590835571, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.5164835164835164, | |
| "grad_norm": 0.9011160731315613, | |
| "learning_rate": 2.8256569692899627e-05, | |
| "loss": 0.9831532835960388, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.52014652014652, | |
| "grad_norm": 0.2522432208061218, | |
| "learning_rate": 2.8167589365016646e-05, | |
| "loss": 0.984779953956604, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.5238095238095237, | |
| "grad_norm": 0.2282875031232834, | |
| "learning_rate": 2.8078598585899385e-05, | |
| "loss": 1.2356276512145996, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.5274725274725274, | |
| "grad_norm": 0.45575666427612305, | |
| "learning_rate": 2.7989598748714846e-05, | |
| "loss": 0.7996046543121338, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.531135531135531, | |
| "grad_norm": 0.31805214285850525, | |
| "learning_rate": 2.7900591246771855e-05, | |
| "loss": 1.1425288915634155, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.5347985347985347, | |
| "grad_norm": 0.24859696626663208, | |
| "learning_rate": 2.7811577473499224e-05, | |
| "loss": 1.1804063320159912, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.2666679322719574, | |
| "learning_rate": 2.772255882242394e-05, | |
| "loss": 1.0411241054534912, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.542124542124542, | |
| "grad_norm": 0.20402321219444275, | |
| "learning_rate": 2.7633536687149353e-05, | |
| "loss": 0.8526805639266968, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.5457875457875456, | |
| "grad_norm": 0.38646435737609863, | |
| "learning_rate": 2.7544512461333377e-05, | |
| "loss": 1.0257073640823364, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.5494505494505495, | |
| "grad_norm": 0.19415532052516937, | |
| "learning_rate": 2.745548753866663e-05, | |
| "loss": 1.1687860488891602, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.5531135531135531, | |
| "grad_norm": 0.2800712287425995, | |
| "learning_rate": 2.7366463312850655e-05, | |
| "loss": 0.5396187901496887, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.5567765567765568, | |
| "grad_norm": 0.18754497170448303, | |
| "learning_rate": 2.727744117757607e-05, | |
| "loss": 1.02944016456604, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.5604395604395604, | |
| "grad_norm": 0.2201027274131775, | |
| "learning_rate": 2.7188422526500788e-05, | |
| "loss": 1.168210744857788, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.564102564102564, | |
| "grad_norm": 0.3404369354248047, | |
| "learning_rate": 2.709940875322815e-05, | |
| "loss": 0.8437097072601318, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.5677655677655677, | |
| "grad_norm": 0.35649099946022034, | |
| "learning_rate": 2.7010401251285156e-05, | |
| "loss": 0.8084161877632141, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.5714285714285714, | |
| "grad_norm": 0.47654107213020325, | |
| "learning_rate": 2.6921401414100627e-05, | |
| "loss": 0.9324872493743896, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.575091575091575, | |
| "grad_norm": 0.23765020072460175, | |
| "learning_rate": 2.6832410634983356e-05, | |
| "loss": 0.6993922591209412, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.578754578754579, | |
| "grad_norm": 0.24254556000232697, | |
| "learning_rate": 2.6743430307100388e-05, | |
| "loss": 1.1566822528839111, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.5824175824175826, | |
| "grad_norm": 0.2840297520160675, | |
| "learning_rate": 2.665446182345509e-05, | |
| "loss": 0.8900972008705139, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.5860805860805862, | |
| "grad_norm": 0.23388059437274933, | |
| "learning_rate": 2.6565506576865458e-05, | |
| "loss": 1.1673542261123657, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.5897435897435899, | |
| "grad_norm": 0.5824553966522217, | |
| "learning_rate": 2.6476565959942233e-05, | |
| "loss": 1.1742522716522217, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.5934065934065935, | |
| "grad_norm": 0.2521633803844452, | |
| "learning_rate": 2.6387641365067124e-05, | |
| "loss": 0.5782104730606079, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.5970695970695972, | |
| "grad_norm": 0.16043610870838165, | |
| "learning_rate": 2.6298734184371015e-05, | |
| "loss": 1.1673752069473267, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.6007326007326008, | |
| "grad_norm": 0.1490897536277771, | |
| "learning_rate": 2.6209845809712195e-05, | |
| "loss": 1.0414141416549683, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.6043956043956045, | |
| "grad_norm": 5.497232437133789, | |
| "learning_rate": 2.6120977632654485e-05, | |
| "loss": 0.7130216360092163, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.6080586080586081, | |
| "grad_norm": 0.6886661648750305, | |
| "learning_rate": 2.6032131044445563e-05, | |
| "loss": 0.9459899663925171, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.6117216117216118, | |
| "grad_norm": 0.09610695391893387, | |
| "learning_rate": 2.59433074359951e-05, | |
| "loss": 0.8761966228485107, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.6153846153846154, | |
| "grad_norm": 0.31204506754875183, | |
| "learning_rate": 2.5854508197853022e-05, | |
| "loss": 1.1188955307006836, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.619047619047619, | |
| "grad_norm": 0.3378327190876007, | |
| "learning_rate": 2.5765734720187723e-05, | |
| "loss": 0.9112301468849182, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.6227106227106227, | |
| "grad_norm": 0.22289712727069855, | |
| "learning_rate": 2.5676988392764345e-05, | |
| "loss": 1.1279692649841309, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.6263736263736264, | |
| "grad_norm": 0.20399990677833557, | |
| "learning_rate": 2.5588270604922947e-05, | |
| "loss": 0.8507078886032104, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.63003663003663, | |
| "grad_norm": 0.16419348120689392, | |
| "learning_rate": 2.5499582745556828e-05, | |
| "loss": 1.1664886474609375, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.6336996336996337, | |
| "grad_norm": 0.488471120595932, | |
| "learning_rate": 2.541092620309073e-05, | |
| "loss": 0.8955670595169067, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.6373626373626373, | |
| "grad_norm": 0.2712729573249817, | |
| "learning_rate": 2.5322302365459116e-05, | |
| "loss": 1.1703094244003296, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "grad_norm": 0.2993127107620239, | |
| "learning_rate": 2.5233712620084494e-05, | |
| "loss": 0.7074750065803528, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.6446886446886446, | |
| "grad_norm": 0.17085275053977966, | |
| "learning_rate": 2.5145158353855592e-05, | |
| "loss": 1.122510313987732, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.6483516483516483, | |
| "grad_norm": 0.04999161139130592, | |
| "learning_rate": 2.505664095310574e-05, | |
| "loss": 0.7906731963157654, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.652014652014652, | |
| "grad_norm": 0.22062256932258606, | |
| "learning_rate": 2.496816180359115e-05, | |
| "loss": 0.8734868764877319, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.6556776556776556, | |
| "grad_norm": 0.32644936442375183, | |
| "learning_rate": 2.4879722290469155e-05, | |
| "loss": 1.0106048583984375, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.6593406593406592, | |
| "grad_norm": 0.33785051107406616, | |
| "learning_rate": 2.4791323798276593e-05, | |
| "loss": 1.1063401699066162, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.6630036630036629, | |
| "grad_norm": 0.19222113490104675, | |
| "learning_rate": 2.4702967710908143e-05, | |
| "loss": 0.7271807789802551, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.34294626116752625, | |
| "learning_rate": 2.4614655411594583e-05, | |
| "loss": 1.180138349533081, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.6703296703296702, | |
| "grad_norm": 0.08716004341840744, | |
| "learning_rate": 2.452638828288121e-05, | |
| "loss": 1.2012932300567627, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.673992673992674, | |
| "grad_norm": 0.14314715564250946, | |
| "learning_rate": 2.4438167706606152e-05, | |
| "loss": 1.0735292434692383, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.6776556776556777, | |
| "grad_norm": 0.9384481906890869, | |
| "learning_rate": 2.434999506387875e-05, | |
| "loss": 1.1871505975723267, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.6813186813186813, | |
| "grad_norm": 0.8262288570404053, | |
| "learning_rate": 2.4261871735057956e-05, | |
| "loss": 0.8213975429534912, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.684981684981685, | |
| "grad_norm": 0.25928905606269836, | |
| "learning_rate": 2.417379909973069e-05, | |
| "loss": 1.1863298416137695, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.6886446886446886, | |
| "grad_norm": 0.33573785424232483, | |
| "learning_rate": 2.408577853669024e-05, | |
| "loss": 0.9798559546470642, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 0.254114031791687, | |
| "learning_rate": 2.3997811423914717e-05, | |
| "loss": 0.8279831409454346, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.695970695970696, | |
| "grad_norm": 0.8429379463195801, | |
| "learning_rate": 2.390989913854545e-05, | |
| "loss": 0.9410126209259033, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.6996336996336996, | |
| "grad_norm": 0.13982383906841278, | |
| "learning_rate": 2.382204305686543e-05, | |
| "loss": 0.7881975173950195, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.7032967032967035, | |
| "grad_norm": 0.03225285932421684, | |
| "learning_rate": 2.373424455427779e-05, | |
| "loss": 0.8376743197441101, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.7069597069597071, | |
| "grad_norm": 0.28351178765296936, | |
| "learning_rate": 2.364650500528421e-05, | |
| "loss": 0.9067142605781555, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.7106227106227108, | |
| "grad_norm": 0.1636010706424713, | |
| "learning_rate": 2.3558825783463484e-05, | |
| "loss": 1.3678433895111084, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.29480600357055664, | |
| "learning_rate": 2.3471208261449955e-05, | |
| "loss": 1.1705397367477417, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.717948717948718, | |
| "grad_norm": 0.1829896867275238, | |
| "learning_rate": 2.3383653810912033e-05, | |
| "loss": 0.4487687945365906, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.7216117216117217, | |
| "grad_norm": 0.11328794807195663, | |
| "learning_rate": 2.3296163802530745e-05, | |
| "loss": 0.4792923629283905, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.7252747252747254, | |
| "grad_norm": 0.2216004580259323, | |
| "learning_rate": 2.320873960597828e-05, | |
| "loss": 1.1090213060379028, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.728937728937729, | |
| "grad_norm": 0.147038996219635, | |
| "learning_rate": 2.312138258989649e-05, | |
| "loss": 0.9103480577468872, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.7326007326007327, | |
| "grad_norm": 0.13560520112514496, | |
| "learning_rate": 2.3034094121875543e-05, | |
| "loss": 1.1171597242355347, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.7362637362637363, | |
| "grad_norm": 0.15320324897766113, | |
| "learning_rate": 2.2946875568432458e-05, | |
| "loss": 1.0631382465362549, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.73992673992674, | |
| "grad_norm": 0.1420201063156128, | |
| "learning_rate": 2.2859728294989718e-05, | |
| "loss": 0.9614072442054749, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.7435897435897436, | |
| "grad_norm": 0.10232152044773102, | |
| "learning_rate": 2.277265366585394e-05, | |
| "loss": 0.9154943227767944, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.7472527472527473, | |
| "grad_norm": 0.8972752690315247, | |
| "learning_rate": 2.268565304419443e-05, | |
| "loss": 1.170873761177063, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.750915750915751, | |
| "grad_norm": 0.3512289226055145, | |
| "learning_rate": 2.2598727792021933e-05, | |
| "loss": 0.741244912147522, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.7545787545787546, | |
| "grad_norm": 0.28857314586639404, | |
| "learning_rate": 2.2511879270167264e-05, | |
| "loss": 1.1468185186386108, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.7582417582417582, | |
| "grad_norm": 0.7446520328521729, | |
| "learning_rate": 2.2425108838259995e-05, | |
| "loss": 0.46506467461586, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.7619047619047619, | |
| "grad_norm": 0.3139968514442444, | |
| "learning_rate": 2.23384178547072e-05, | |
| "loss": 0.9495673775672913, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.7655677655677655, | |
| "grad_norm": 0.18298202753067017, | |
| "learning_rate": 2.225180767667217e-05, | |
| "loss": 1.0209523439407349, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.7692307692307692, | |
| "grad_norm": 0.16875436902046204, | |
| "learning_rate": 2.2165279660053174e-05, | |
| "loss": 1.1537625789642334, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.7728937728937728, | |
| "grad_norm": 0.19506409764289856, | |
| "learning_rate": 2.2078835159462225e-05, | |
| "loss": 0.5657550692558289, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.7765567765567765, | |
| "grad_norm": 0.1412108838558197, | |
| "learning_rate": 2.1992475528203872e-05, | |
| "loss": 0.8089891672134399, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.7802197802197801, | |
| "grad_norm": 0.4082830250263214, | |
| "learning_rate": 2.1906202118254025e-05, | |
| "loss": 0.8698192834854126, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.7838827838827838, | |
| "grad_norm": 0.3070997893810272, | |
| "learning_rate": 2.1820016280238792e-05, | |
| "loss": 1.1678433418273926, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.7875457875457874, | |
| "grad_norm": 0.18406766653060913, | |
| "learning_rate": 2.1733919363413314e-05, | |
| "loss": 1.1347768306732178, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.791208791208791, | |
| "grad_norm": 0.16203664243221283, | |
| "learning_rate": 2.1647912715640657e-05, | |
| "loss": 0.8943782448768616, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 0.17423562705516815, | |
| "learning_rate": 2.1561997683370705e-05, | |
| "loss": 0.9334428310394287, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.7985347985347986, | |
| "grad_norm": 0.15521575510501862, | |
| "learning_rate": 2.147617561161911e-05, | |
| "loss": 1.121093988418579, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.8021978021978022, | |
| "grad_norm": 1.1999096870422363, | |
| "learning_rate": 2.1390447843946156e-05, | |
| "loss": 1.0399394035339355, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.8058608058608059, | |
| "grad_norm": 0.236453577876091, | |
| "learning_rate": 2.1304815722435838e-05, | |
| "loss": 0.6336957812309265, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.8095238095238095, | |
| "grad_norm": 1.987808108329773, | |
| "learning_rate": 2.121928058767475e-05, | |
| "loss": 0.8018144965171814, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.8131868131868132, | |
| "grad_norm": 0.6906500458717346, | |
| "learning_rate": 2.113384377873117e-05, | |
| "loss": 0.7112327814102173, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.8168498168498168, | |
| "grad_norm": 0.5853157043457031, | |
| "learning_rate": 2.1048506633134058e-05, | |
| "loss": 0.770244300365448, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.8205128205128205, | |
| "grad_norm": 0.22558943927288055, | |
| "learning_rate": 2.0963270486852116e-05, | |
| "loss": 0.8251454830169678, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.8241758241758241, | |
| "grad_norm": 0.36891841888427734, | |
| "learning_rate": 2.0878136674272874e-05, | |
| "loss": 1.0850389003753662, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.8278388278388278, | |
| "grad_norm": 0.21534068882465363, | |
| "learning_rate": 2.079310652818186e-05, | |
| "loss": 0.8296566605567932, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.8315018315018317, | |
| "grad_norm": 0.20088708400726318, | |
| "learning_rate": 2.070818137974162e-05, | |
| "loss": 1.0995657444000244, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8351648351648353, | |
| "grad_norm": 0.16254781186580658, | |
| "learning_rate": 2.0623362558470983e-05, | |
| "loss": 1.1204814910888672, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.838827838827839, | |
| "grad_norm": 0.16232743859291077, | |
| "learning_rate": 2.05386513922242e-05, | |
| "loss": 1.1130619049072266, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.8424908424908426, | |
| "grad_norm": 0.21432489156723022, | |
| "learning_rate": 2.0454049207170146e-05, | |
| "loss": 1.1204091310501099, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 0.45065784454345703, | |
| "learning_rate": 2.0369557327771594e-05, | |
| "loss": 0.7804591655731201, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.84981684981685, | |
| "grad_norm": 0.2610171139240265, | |
| "learning_rate": 2.0285177076764462e-05, | |
| "loss": 1.076236367225647, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.8534798534798536, | |
| "grad_norm": 0.11059543490409851, | |
| "learning_rate": 2.0200909775137085e-05, | |
| "loss": 0.7410160899162292, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.8571428571428572, | |
| "grad_norm": 2.333650827407837, | |
| "learning_rate": 2.0116756742109577e-05, | |
| "loss": 1.1382379531860352, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.8608058608058609, | |
| "grad_norm": 0.16106364130973816, | |
| "learning_rate": 2.003271929511314e-05, | |
| "loss": 1.1225502490997314, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.8644688644688645, | |
| "grad_norm": 0.1435774266719818, | |
| "learning_rate": 1.9948798749769464e-05, | |
| "loss": 1.197827696800232, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.8681318681318682, | |
| "grad_norm": 0.04951045662164688, | |
| "learning_rate": 1.986499641987013e-05, | |
| "loss": 0.9368598461151123, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.8717948717948718, | |
| "grad_norm": 0.17583589255809784, | |
| "learning_rate": 1.9781313617356012e-05, | |
| "loss": 1.0920844078063965, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.8754578754578755, | |
| "grad_norm": 0.3376821279525757, | |
| "learning_rate": 1.9697751652296782e-05, | |
| "loss": 0.4992130398750305, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.879120879120879, | |
| "grad_norm": 0.1590043604373932, | |
| "learning_rate": 1.961431183287037e-05, | |
| "loss": 1.1315664052963257, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.8827838827838828, | |
| "grad_norm": 0.28124603629112244, | |
| "learning_rate": 1.9530995465342482e-05, | |
| "loss": 0.9077785611152649, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.8864468864468864, | |
| "grad_norm": 0.2012709081172943, | |
| "learning_rate": 1.9447803854046192e-05, | |
| "loss": 1.1241216659545898, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.89010989010989, | |
| "grad_norm": 0.2831279933452606, | |
| "learning_rate": 1.9364738301361473e-05, | |
| "loss": 0.9281163811683655, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.8937728937728937, | |
| "grad_norm": 0.45628663897514343, | |
| "learning_rate": 1.928180010769482e-05, | |
| "loss": 0.7836743593215942, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.8974358974358974, | |
| "grad_norm": 0.26489734649658203, | |
| "learning_rate": 1.919899057145891e-05, | |
| "loss": 1.1775709390640259, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.901098901098901, | |
| "grad_norm": 0.4411194920539856, | |
| "learning_rate": 1.911631098905227e-05, | |
| "loss": 1.1143101453781128, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 0.39963364601135254, | |
| "learning_rate": 1.903376265483896e-05, | |
| "loss": 1.234065055847168, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.9084249084249083, | |
| "grad_norm": 0.2849297523498535, | |
| "learning_rate": 1.895134686112834e-05, | |
| "loss": 0.6098147034645081, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.912087912087912, | |
| "grad_norm": 0.17120879888534546, | |
| "learning_rate": 1.886906489815482e-05, | |
| "loss": 0.8268396258354187, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.9157509157509156, | |
| "grad_norm": 0.2931853234767914, | |
| "learning_rate": 1.878691805405765e-05, | |
| "loss": 1.0940194129943848, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.9194139194139193, | |
| "grad_norm": 0.16556760668754578, | |
| "learning_rate": 1.8704907614860797e-05, | |
| "loss": 0.3900573253631592, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 0.30659088492393494, | |
| "learning_rate": 1.8623034864452753e-05, | |
| "loss": 0.841820478439331, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.9267399267399268, | |
| "grad_norm": 0.13327482342720032, | |
| "learning_rate": 1.8541301084566496e-05, | |
| "loss": 0.7997146248817444, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.9304029304029304, | |
| "grad_norm": 0.23069679737091064, | |
| "learning_rate": 1.8459707554759385e-05, | |
| "loss": 1.1094664335250854, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.934065934065934, | |
| "grad_norm": 0.08997397869825363, | |
| "learning_rate": 1.8378255552393126e-05, | |
| "loss": 0.737388551235199, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.9377289377289377, | |
| "grad_norm": 0.435207724571228, | |
| "learning_rate": 1.8296946352613792e-05, | |
| "loss": 0.9677636027336121, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.9413919413919414, | |
| "grad_norm": 0.2596050500869751, | |
| "learning_rate": 1.8215781228331884e-05, | |
| "loss": 1.0497726202011108, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.945054945054945, | |
| "grad_norm": 0.15544529259204865, | |
| "learning_rate": 1.8134761450202316e-05, | |
| "loss": 0.7944180369377136, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.9487179487179487, | |
| "grad_norm": 0.3575184643268585, | |
| "learning_rate": 1.805388828660463e-05, | |
| "loss": 0.7939121127128601, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.9523809523809523, | |
| "grad_norm": 0.38059937953948975, | |
| "learning_rate": 1.79731630036231e-05, | |
| "loss": 1.1412160396575928, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.9560439560439562, | |
| "grad_norm": 0.15067918598651886, | |
| "learning_rate": 1.7892586865026835e-05, | |
| "loss": 1.1604868173599243, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.9597069597069599, | |
| "grad_norm": 0.14763464033603668, | |
| "learning_rate": 1.7812161132250122e-05, | |
| "loss": 0.6316368579864502, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.9633699633699635, | |
| "grad_norm": 0.20061402022838593, | |
| "learning_rate": 1.7731887064372617e-05, | |
| "loss": 0.7977589964866638, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.9670329670329672, | |
| "grad_norm": 0.20839878916740417, | |
| "learning_rate": 1.7651765918099588e-05, | |
| "loss": 1.1316888332366943, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.9706959706959708, | |
| "grad_norm": 0.14487165212631226, | |
| "learning_rate": 1.757179894774233e-05, | |
| "loss": 1.0948164463043213, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.9743589743589745, | |
| "grad_norm": 1.0238404273986816, | |
| "learning_rate": 1.7491987405198464e-05, | |
| "loss": 0.9682241082191467, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.978021978021978, | |
| "grad_norm": 0.4279444217681885, | |
| "learning_rate": 1.7412332539932367e-05, | |
| "loss": 0.9370381832122803, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.9816849816849818, | |
| "grad_norm": 0.1554751694202423, | |
| "learning_rate": 1.7332835598955615e-05, | |
| "loss": 1.0854570865631104, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.9853479853479854, | |
| "grad_norm": 0.18901632726192474, | |
| "learning_rate": 1.7253497826807435e-05, | |
| "loss": 0.6803427934646606, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.989010989010989, | |
| "grad_norm": 1.4867212772369385, | |
| "learning_rate": 1.717432046553523e-05, | |
| "loss": 0.965499997138977, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.9926739926739927, | |
| "grad_norm": 0.3673637807369232, | |
| "learning_rate": 1.7095304754675168e-05, | |
| "loss": 0.9333543181419373, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.9963369963369964, | |
| "grad_norm": 0.16466295719146729, | |
| "learning_rate": 1.701645193123272e-05, | |
| "loss": 0.887560248374939, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.21007683873176575, | |
| "learning_rate": 1.6937763229663356e-05, | |
| "loss": 0.9977954626083374, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.0036630036630036, | |
| "grad_norm": 0.29877015948295593, | |
| "learning_rate": 1.685923988185316e-05, | |
| "loss": 0.9857615828514099, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.0073260073260073, | |
| "grad_norm": 0.17748361825942993, | |
| "learning_rate": 1.6780883117099575e-05, | |
| "loss": 1.0911893844604492, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.010989010989011, | |
| "grad_norm": 0.16629448533058167, | |
| "learning_rate": 1.6702694162092177e-05, | |
| "loss": 1.0311784744262695, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.0146520146520146, | |
| "grad_norm": 0.4006339907646179, | |
| "learning_rate": 1.6624674240893452e-05, | |
| "loss": 1.078372597694397, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.0183150183150182, | |
| "grad_norm": 0.3369395136833191, | |
| "learning_rate": 1.6546824574919572e-05, | |
| "loss": 0.7856264114379883, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.021978021978022, | |
| "grad_norm": 0.26995745301246643, | |
| "learning_rate": 1.6469146382921407e-05, | |
| "loss": 1.084755778312683, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.0256410256410255, | |
| "grad_norm": 0.26533597707748413, | |
| "learning_rate": 1.6391640880965338e-05, | |
| "loss": 0.7198016047477722, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.029304029304029, | |
| "grad_norm": 0.5049846172332764, | |
| "learning_rate": 1.6314309282414244e-05, | |
| "loss": 1.1550657749176025, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.032967032967033, | |
| "grad_norm": 0.2585044205188751, | |
| "learning_rate": 1.623715279790853e-05, | |
| "loss": 0.7946727275848389, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.0366300366300365, | |
| "grad_norm": 0.08100654929876328, | |
| "learning_rate": 1.616017263534713e-05, | |
| "loss": 0.5124549269676208, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.04029304029304, | |
| "grad_norm": 0.2333383411169052, | |
| "learning_rate": 1.608336999986867e-05, | |
| "loss": 0.7633625268936157, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.043956043956044, | |
| "grad_norm": 0.6371340751647949, | |
| "learning_rate": 1.600674609383253e-05, | |
| "loss": 1.0826982259750366, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.0476190476190474, | |
| "grad_norm": 0.38138705492019653, | |
| "learning_rate": 1.5930302116800044e-05, | |
| "loss": 1.0550010204315186, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 3.1294875144958496, | |
| "learning_rate": 1.585403926551573e-05, | |
| "loss": 0.7597935199737549, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.0549450549450547, | |
| "grad_norm": 5.722661018371582, | |
| "learning_rate": 1.5777958733888565e-05, | |
| "loss": 1.1912044286727905, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.0586080586080584, | |
| "grad_norm": 0.21310099959373474, | |
| "learning_rate": 1.570206171297324e-05, | |
| "loss": 1.0809831619262695, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.062271062271062, | |
| "grad_norm": 0.30974480509757996, | |
| "learning_rate": 1.56263493909516e-05, | |
| "loss": 0.6720230579376221, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.065934065934066, | |
| "grad_norm": 0.1537218540906906, | |
| "learning_rate": 1.555082295311396e-05, | |
| "loss": 1.0456634759902954, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.06959706959707, | |
| "grad_norm": 0.44281265139579773, | |
| "learning_rate": 1.5475483581840587e-05, | |
| "loss": 1.017748236656189, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.0732600732600734, | |
| "grad_norm": 0.24577392637729645, | |
| "learning_rate": 1.54003324565832e-05, | |
| "loss": 1.0750110149383545, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.076923076923077, | |
| "grad_norm": 0.22100472450256348, | |
| "learning_rate": 1.53253707538465e-05, | |
| "loss": 1.060890793800354, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.0805860805860807, | |
| "grad_norm": 0.9236302375793457, | |
| "learning_rate": 1.5250599647169716e-05, | |
| "loss": 0.6415885090827942, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.0842490842490844, | |
| "grad_norm": 0.20396125316619873, | |
| "learning_rate": 1.5176020307108276e-05, | |
| "loss": 1.0569545030593872, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.087912087912088, | |
| "grad_norm": 0.22348055243492126, | |
| "learning_rate": 1.5101633901215456e-05, | |
| "loss": 0.9237917065620422, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.0915750915750917, | |
| "grad_norm": 0.18689396977424622, | |
| "learning_rate": 1.5027441594024133e-05, | |
| "loss": 1.0551191568374634, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.0952380952380953, | |
| "grad_norm": 0.11935965716838837, | |
| "learning_rate": 1.4953444547028531e-05, | |
| "loss": 0.3609432280063629, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.098901098901099, | |
| "grad_norm": 0.8522807955741882, | |
| "learning_rate": 1.4879643918666003e-05, | |
| "loss": 0.7986314296722412, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.1025641025641026, | |
| "grad_norm": 0.35904762148857117, | |
| "learning_rate": 1.480604086429897e-05, | |
| "loss": 0.7680439352989197, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.1062271062271063, | |
| "grad_norm": 0.23995743691921234, | |
| "learning_rate": 1.4732636536196794e-05, | |
| "loss": 0.7488572597503662, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.10989010989011, | |
| "grad_norm": 0.24136975407600403, | |
| "learning_rate": 1.4659432083517726e-05, | |
| "loss": 0.9088020324707031, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.1135531135531136, | |
| "grad_norm": 0.25049832463264465, | |
| "learning_rate": 1.458642865229093e-05, | |
| "loss": 0.6150023937225342, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.1172161172161172, | |
| "grad_norm": 0.2590892016887665, | |
| "learning_rate": 1.4513627385398554e-05, | |
| "loss": 0.9922336935997009, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.120879120879121, | |
| "grad_norm": 0.6481472849845886, | |
| "learning_rate": 1.4441029422557817e-05, | |
| "loss": 0.9142146110534668, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.1245421245421245, | |
| "grad_norm": 0.29179754853248596, | |
| "learning_rate": 1.4368635900303184e-05, | |
| "loss": 0.802727997303009, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.128205128205128, | |
| "grad_norm": 0.5445400476455688, | |
| "learning_rate": 1.4296447951968562e-05, | |
| "loss": 0.6710273623466492, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.131868131868132, | |
| "grad_norm": 0.22561633586883545, | |
| "learning_rate": 1.4224466707669542e-05, | |
| "loss": 0.8315181136131287, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.1355311355311355, | |
| "grad_norm": 0.24327363073825836, | |
| "learning_rate": 1.4152693294285756e-05, | |
| "loss": 0.4054326117038727, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.139194139194139, | |
| "grad_norm": 0.6817838549613953, | |
| "learning_rate": 1.4081128835443188e-05, | |
| "loss": 0.4102446436882019, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 0.44050848484039307, | |
| "learning_rate": 1.400977445149661e-05, | |
| "loss": 0.8157304525375366, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.1465201465201464, | |
| "grad_norm": 0.5266426801681519, | |
| "learning_rate": 1.3938631259512013e-05, | |
| "loss": 1.0504196882247925, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.15018315018315, | |
| "grad_norm": 0.3633262515068054, | |
| "learning_rate": 1.3867700373249152e-05, | |
| "loss": 1.2162549495697021, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 0.5713516473770142, | |
| "learning_rate": 1.37969829031441e-05, | |
| "loss": 0.6878563165664673, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.1575091575091574, | |
| "grad_norm": 0.18287743628025055, | |
| "learning_rate": 1.3726479956291872e-05, | |
| "loss": 1.0863420963287354, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.161172161172161, | |
| "grad_norm": 0.1208617314696312, | |
| "learning_rate": 1.3656192636429043e-05, | |
| "loss": 1.089928388595581, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.1648351648351647, | |
| "grad_norm": 0.32237380743026733, | |
| "learning_rate": 1.3586122043916538e-05, | |
| "loss": 1.0283252000808716, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.1684981684981683, | |
| "grad_norm": 0.18901434540748596, | |
| "learning_rate": 1.3516269275722387e-05, | |
| "loss": 1.0963468551635742, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.172161172161172, | |
| "grad_norm": 0.11865086108446121, | |
| "learning_rate": 1.344663542540451e-05, | |
| "loss": 0.1795816421508789, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.1758241758241756, | |
| "grad_norm": 0.35561323165893555, | |
| "learning_rate": 1.3377221583093632e-05, | |
| "loss": 1.1163209676742554, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.1794871794871793, | |
| "grad_norm": 0.39501580595970154, | |
| "learning_rate": 1.3308028835476238e-05, | |
| "loss": 0.669342041015625, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.183150183150183, | |
| "grad_norm": 0.6345373392105103, | |
| "learning_rate": 1.3239058265777499e-05, | |
| "loss": 0.9228946566581726, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.186813186813187, | |
| "grad_norm": 0.30641525983810425, | |
| "learning_rate": 1.3170310953744388e-05, | |
| "loss": 0.698255181312561, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 0.21093255281448364, | |
| "learning_rate": 1.310178797562871e-05, | |
| "loss": 0.8535992503166199, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.1941391941391943, | |
| "grad_norm": 0.20513616502285004, | |
| "learning_rate": 1.3033490404170276e-05, | |
| "loss": 1.0712019205093384, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.197802197802198, | |
| "grad_norm": 0.4575969874858856, | |
| "learning_rate": 1.296541930858015e-05, | |
| "loss": 0.6536464691162109, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.2014652014652016, | |
| "grad_norm": 0.22849248349666595, | |
| "learning_rate": 1.2897575754523832e-05, | |
| "loss": 1.1299998760223389, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 0.2219730168581009, | |
| "learning_rate": 1.2829960804104663e-05, | |
| "loss": 1.080318570137024, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.208791208791209, | |
| "grad_norm": 0.20927314460277557, | |
| "learning_rate": 1.2762575515847106e-05, | |
| "loss": 0.42392417788505554, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.2124542124542126, | |
| "grad_norm": 0.23490868508815765, | |
| "learning_rate": 1.2695420944680242e-05, | |
| "loss": 0.7045865654945374, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.2161172161172162, | |
| "grad_norm": 0.1882236897945404, | |
| "learning_rate": 1.2628498141921243e-05, | |
| "loss": 0.4839053452014923, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.21978021978022, | |
| "grad_norm": 0.22759205102920532, | |
| "learning_rate": 1.2561808155258897e-05, | |
| "loss": 0.9919928908348083, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.2234432234432235, | |
| "grad_norm": 0.22987626492977142, | |
| "learning_rate": 1.2495352028737201e-05, | |
| "loss": 0.7579973340034485, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.227106227106227, | |
| "grad_norm": 0.2778910994529724, | |
| "learning_rate": 1.2429130802739036e-05, | |
| "loss": 0.8012394905090332, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.230769230769231, | |
| "grad_norm": 0.17181673645973206, | |
| "learning_rate": 1.2363145513969887e-05, | |
| "loss": 1.0715208053588867, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.2344322344322345, | |
| "grad_norm": 0.25348830223083496, | |
| "learning_rate": 1.229739719544157e-05, | |
| "loss": 1.06959867477417, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.238095238095238, | |
| "grad_norm": 0.17098630964756012, | |
| "learning_rate": 1.2231886876456116e-05, | |
| "loss": 1.079147219657898, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.241758241758242, | |
| "grad_norm": 1.3435940742492676, | |
| "learning_rate": 1.2166615582589613e-05, | |
| "loss": 0.40509262681007385, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.2454212454212454, | |
| "grad_norm": 0.24130238592624664, | |
| "learning_rate": 1.210158433567616e-05, | |
| "loss": 1.0378178358078003, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.249084249084249, | |
| "grad_norm": 0.7028672695159912, | |
| "learning_rate": 1.2036794153791905e-05, | |
| "loss": 0.5614770650863647, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.2527472527472527, | |
| "grad_norm": 0.14420144259929657, | |
| "learning_rate": 1.1972246051239054e-05, | |
| "loss": 0.5607399344444275, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 0.20063596963882446, | |
| "learning_rate": 1.1907941038530015e-05, | |
| "loss": 0.5380869507789612, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.26007326007326, | |
| "grad_norm": 0.3156454563140869, | |
| "learning_rate": 1.18438801223716e-05, | |
| "loss": 1.0096523761749268, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.2637362637362637, | |
| "grad_norm": 0.2204177975654602, | |
| "learning_rate": 1.1780064305649224e-05, | |
| "loss": 0.7427061796188354, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.2673992673992673, | |
| "grad_norm": 0.24353505671024323, | |
| "learning_rate": 1.1716494587411248e-05, | |
| "loss": 0.857605516910553, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.271062271062271, | |
| "grad_norm": 0.2163824439048767, | |
| "learning_rate": 1.1653171962853291e-05, | |
| "loss": 0.7742936015129089, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.2747252747252746, | |
| "grad_norm": 0.5982560515403748, | |
| "learning_rate": 1.1590097423302684e-05, | |
| "loss": 0.8472159504890442, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.2783882783882783, | |
| "grad_norm": 0.22811779379844666, | |
| "learning_rate": 1.1527271956202947e-05, | |
| "loss": 1.033808946609497, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.282051282051282, | |
| "grad_norm": 0.2341231256723404, | |
| "learning_rate": 1.1464696545098332e-05, | |
| "loss": 0.6939253807067871, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.3489930033683777, | |
| "learning_rate": 1.1402372169618398e-05, | |
| "loss": 0.6756494641304016, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.2893772893772892, | |
| "grad_norm": 0.21145479381084442, | |
| "learning_rate": 1.1340299805462704e-05, | |
| "loss": 1.0505157709121704, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.293040293040293, | |
| "grad_norm": 0.31191250681877136, | |
| "learning_rate": 1.1278480424385534e-05, | |
| "loss": 0.7581150531768799, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.2967032967032965, | |
| "grad_norm": 0.1843535155057907, | |
| "learning_rate": 1.1216914994180659e-05, | |
| "loss": 1.10303795337677, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.3003663003663, | |
| "grad_norm": 0.4075881540775299, | |
| "learning_rate": 1.1155604478666223e-05, | |
| "loss": 1.1203564405441284, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.304029304029304, | |
| "grad_norm": 1.3023512363433838, | |
| "learning_rate": 1.1094549837669616e-05, | |
| "loss": 0.7518989443778992, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 0.3842734396457672, | |
| "learning_rate": 1.1033752027012465e-05, | |
| "loss": 1.0648375749588013, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.311355311355311, | |
| "grad_norm": 0.9410233497619629, | |
| "learning_rate": 1.097321199849569e-05, | |
| "loss": 0.9340834617614746, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.315018315018315, | |
| "grad_norm": 0.2198442965745926, | |
| "learning_rate": 1.0912930699884563e-05, | |
| "loss": 0.83587646484375, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.3186813186813184, | |
| "grad_norm": 0.1930425763130188, | |
| "learning_rate": 1.08529090748939e-05, | |
| "loss": 0.7000831961631775, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.3223443223443225, | |
| "grad_norm": 0.6036158204078674, | |
| "learning_rate": 1.0793148063173284e-05, | |
| "loss": 0.7626188397407532, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.326007326007326, | |
| "grad_norm": 0.2609650492668152, | |
| "learning_rate": 1.073364860029234e-05, | |
| "loss": 1.1375476121902466, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.32967032967033, | |
| "grad_norm": 0.30271434783935547, | |
| "learning_rate": 1.0674411617726106e-05, | |
| "loss": 0.9527180194854736, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.9938449859619141, | |
| "learning_rate": 1.0615438042840439e-05, | |
| "loss": 0.5007555484771729, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.336996336996337, | |
| "grad_norm": 0.821854293346405, | |
| "learning_rate": 1.0556728798877488e-05, | |
| "loss": 0.9555824398994446, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.340659340659341, | |
| "grad_norm": 0.24786677956581116, | |
| "learning_rate": 1.0498284804941277e-05, | |
| "loss": 0.7994169592857361, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.3443223443223444, | |
| "grad_norm": 0.3086180090904236, | |
| "learning_rate": 1.0440106975983283e-05, | |
| "loss": 0.7172934412956238, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.347985347985348, | |
| "grad_norm": 0.41890591382980347, | |
| "learning_rate": 1.0382196222788108e-05, | |
| "loss": 0.5843296051025391, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.3516483516483517, | |
| "grad_norm": 0.21397873759269714, | |
| "learning_rate": 1.0324553451959245e-05, | |
| "loss": 1.0417900085449219, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.3553113553113554, | |
| "grad_norm": 0.5100418329238892, | |
| "learning_rate": 1.0267179565904879e-05, | |
| "loss": 0.4865255355834961, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 0.6028478145599365, | |
| "learning_rate": 1.0210075462823738e-05, | |
| "loss": 0.8683855533599854, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.3626373626373627, | |
| "grad_norm": 0.18368980288505554, | |
| "learning_rate": 1.0153242036691071e-05, | |
| "loss": 0.8409366607666016, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.3663003663003663, | |
| "grad_norm": 0.13247643411159515, | |
| "learning_rate": 1.0096680177244609e-05, | |
| "loss": 0.7085995078086853, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.36996336996337, | |
| "grad_norm": 0.27349239587783813, | |
| "learning_rate": 1.0040390769970654e-05, | |
| "loss": 0.7937886714935303, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.3736263736263736, | |
| "grad_norm": 0.41278505325317383, | |
| "learning_rate": 9.98437469609025e-06, | |
| "loss": 1.1152591705322266, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.3772893772893773, | |
| "grad_norm": 0.4278549551963806, | |
| "learning_rate": 9.928632832545317e-06, | |
| "loss": 1.0227138996124268, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.21517953276634216, | |
| "learning_rate": 9.873166051984998e-06, | |
| "loss": 1.0946927070617676, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.3846153846153846, | |
| "grad_norm": 0.3315783143043518, | |
| "learning_rate": 9.817975222751931e-06, | |
| "loss": 0.7763844728469849, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.3882783882783882, | |
| "grad_norm": 0.44624730944633484, | |
| "learning_rate": 9.763061208868699e-06, | |
| "loss": 0.4395400285720825, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.391941391941392, | |
| "grad_norm": 0.20948028564453125, | |
| "learning_rate": 9.708424870024285e-06, | |
| "loss": 0.8480145335197449, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.3956043956043955, | |
| "grad_norm": 0.4898599088191986, | |
| "learning_rate": 9.654067061560645e-06, | |
| "loss": 1.0664393901824951, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.399267399267399, | |
| "grad_norm": 0.21064221858978271, | |
| "learning_rate": 9.599988634459236e-06, | |
| "loss": 0.474110871553421, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.402930402930403, | |
| "grad_norm": 0.3536030650138855, | |
| "learning_rate": 9.546190435327795e-06, | |
| "loss": 1.0670816898345947, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.4065934065934065, | |
| "grad_norm": 0.2895529866218567, | |
| "learning_rate": 9.492673306387029e-06, | |
| "loss": 0.7731264233589172, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 0.21338780224323273, | |
| "learning_rate": 9.43943808545743e-06, | |
| "loss": 1.0734295845031738, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.413919413919414, | |
| "grad_norm": 0.5540740489959717, | |
| "learning_rate": 9.386485605946164e-06, | |
| "loss": 0.7238420248031616, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.4175824175824174, | |
| "grad_norm": 0.1840064972639084, | |
| "learning_rate": 9.333816696834049e-06, | |
| "loss": 0.6843035221099854, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.421245421245421, | |
| "grad_norm": 0.1636444330215454, | |
| "learning_rate": 9.28143218266253e-06, | |
| "loss": 0.5108417272567749, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.4249084249084247, | |
| "grad_norm": 0.277536541223526, | |
| "learning_rate": 9.229332883520825e-06, | |
| "loss": 0.7295075058937073, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.4285714285714284, | |
| "grad_norm": 0.1264895349740982, | |
| "learning_rate": 9.177519615033034e-06, | |
| "loss": 0.7249910831451416, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.4322344322344325, | |
| "grad_norm": 0.19156897068023682, | |
| "learning_rate": 9.125993188345402e-06, | |
| "loss": 0.6318535208702087, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.435897435897436, | |
| "grad_norm": 3.024097204208374, | |
| "learning_rate": 9.074754410113628e-06, | |
| "loss": 0.7735837697982788, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.4395604395604398, | |
| "grad_norm": 0.14816512167453766, | |
| "learning_rate": 9.023804082490197e-06, | |
| "loss": 0.9631860256195068, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.4432234432234434, | |
| "grad_norm": 0.43298929929733276, | |
| "learning_rate": 8.973143003111863e-06, | |
| "loss": 0.6613461971282959, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.446886446886447, | |
| "grad_norm": 0.4770919382572174, | |
| "learning_rate": 8.922771965087144e-06, | |
| "loss": 0.5602841973304749, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.4505494505494507, | |
| "grad_norm": 0.7131723165512085, | |
| "learning_rate": 8.872691756983891e-06, | |
| "loss": 0.9735853672027588, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.4542124542124544, | |
| "grad_norm": 0.31672975420951843, | |
| "learning_rate": 8.822903162816986e-06, | |
| "loss": 0.7807232141494751, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.457875457875458, | |
| "grad_norm": 0.08743062615394592, | |
| "learning_rate": 8.773406962036031e-06, | |
| "loss": 0.5491883754730225, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 0.16250762343406677, | |
| "learning_rate": 8.724203929513133e-06, | |
| "loss": 0.7840443253517151, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.4652014652014653, | |
| "grad_norm": 0.3087010383605957, | |
| "learning_rate": 8.675294835530828e-06, | |
| "loss": 1.0146785974502563, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.468864468864469, | |
| "grad_norm": 0.26062580943107605, | |
| "learning_rate": 8.626680445769981e-06, | |
| "loss": 1.0559192895889282, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.4725274725274726, | |
| "grad_norm": 0.25493213534355164, | |
| "learning_rate": 8.5783615212978e-06, | |
| "loss": 0.6265292167663574, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.4761904761904763, | |
| "grad_norm": 0.2113112509250641, | |
| "learning_rate": 8.530338818555931e-06, | |
| "loss": 0.711513340473175, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.47985347985348, | |
| "grad_norm": 0.5152426362037659, | |
| "learning_rate": 8.482613089348618e-06, | |
| "loss": 0.8448625802993774, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.4835164835164836, | |
| "grad_norm": 0.11193950474262238, | |
| "learning_rate": 8.435185080830927e-06, | |
| "loss": 0.8605793118476868, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.4871794871794872, | |
| "grad_norm": 0.18866127729415894, | |
| "learning_rate": 8.388055535497064e-06, | |
| "loss": 1.0280365943908691, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.490842490842491, | |
| "grad_norm": 0.4851335883140564, | |
| "learning_rate": 8.341225191168722e-06, | |
| "loss": 0.8356929421424866, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.4945054945054945, | |
| "grad_norm": 0.18892696499824524, | |
| "learning_rate": 8.29469478098355e-06, | |
| "loss": 0.7504462599754333, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.498168498168498, | |
| "grad_norm": 0.4648627042770386, | |
| "learning_rate": 8.24846503338369e-06, | |
| "loss": 0.7001456618309021, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.501831501831502, | |
| "grad_norm": 0.2734161615371704, | |
| "learning_rate": 8.202536672104326e-06, | |
| "loss": 1.046680212020874, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.5054945054945055, | |
| "grad_norm": 0.12593773007392883, | |
| "learning_rate": 8.156910416162417e-06, | |
| "loss": 0.6337849497795105, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.509157509157509, | |
| "grad_norm": 0.3318650722503662, | |
| "learning_rate": 8.111586979845383e-06, | |
| "loss": 0.8238204121589661, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.5128205128205128, | |
| "grad_norm": 0.1444663256406784, | |
| "learning_rate": 8.066567072699946e-06, | |
| "loss": 0.5835400819778442, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.5164835164835164, | |
| "grad_norm": 0.26651981472969055, | |
| "learning_rate": 8.021851399521048e-06, | |
| "loss": 0.6938576102256775, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.52014652014652, | |
| "grad_norm": 0.5255207419395447, | |
| "learning_rate": 7.97744066034077e-06, | |
| "loss": 0.9116069078445435, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.5238095238095237, | |
| "grad_norm": 0.20919840037822723, | |
| "learning_rate": 7.933335550417405e-06, | |
| "loss": 0.7189561724662781, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.5274725274725274, | |
| "grad_norm": 0.16163820028305054, | |
| "learning_rate": 7.889536760224557e-06, | |
| "loss": 0.7958462834358215, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.531135531135531, | |
| "grad_norm": 0.1546577364206314, | |
| "learning_rate": 7.846044975440334e-06, | |
| "loss": 0.7697736620903015, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.5347985347985347, | |
| "grad_norm": 0.1974683552980423, | |
| "learning_rate": 7.802860876936636e-06, | |
| "loss": 0.7680953741073608, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.5384615384615383, | |
| "grad_norm": 0.1976369023323059, | |
| "learning_rate": 7.759985140768474e-06, | |
| "loss": 1.0490553379058838, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.542124542124542, | |
| "grad_norm": 0.795116126537323, | |
| "learning_rate": 7.717418438163362e-06, | |
| "loss": 0.7246772050857544, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.5457875457875456, | |
| "grad_norm": 0.20595654845237732, | |
| "learning_rate": 7.675161435510869e-06, | |
| "loss": 0.7472343444824219, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.5494505494505493, | |
| "grad_norm": 0.3328467309474945, | |
| "learning_rate": 7.633214794352146e-06, | |
| "loss": 0.8970118761062622, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.553113553113553, | |
| "grad_norm": 0.2709287405014038, | |
| "learning_rate": 7.591579171369574e-06, | |
| "loss": 0.6892199516296387, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.5567765567765566, | |
| "grad_norm": 0.0892128273844719, | |
| "learning_rate": 7.5502552183764845e-06, | |
| "loss": 0.2729473412036896, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.5604395604395602, | |
| "grad_norm": 0.2575327455997467, | |
| "learning_rate": 7.5092435823069655e-06, | |
| "loss": 1.0643916130065918, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 0.42705675959587097, | |
| "learning_rate": 7.468544905205714e-06, | |
| "loss": 1.0583202838897705, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.5677655677655675, | |
| "grad_norm": 0.3415769040584564, | |
| "learning_rate": 7.428159824218017e-06, | |
| "loss": 0.7452787756919861, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.16148477792739868, | |
| "learning_rate": 7.388088971579742e-06, | |
| "loss": 1.0535763502120972, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.575091575091575, | |
| "grad_norm": 0.7299770712852478, | |
| "learning_rate": 7.348332974607445e-06, | |
| "loss": 0.47556331753730774, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.578754578754579, | |
| "grad_norm": 0.1957835853099823, | |
| "learning_rate": 7.308892455688579e-06, | |
| "loss": 1.0507322549819946, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.5824175824175826, | |
| "grad_norm": 0.8958855867385864, | |
| "learning_rate": 7.269768032271726e-06, | |
| "loss": 0.7786467671394348, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.586080586080586, | |
| "grad_norm": 0.5932020545005798, | |
| "learning_rate": 7.230960316856925e-06, | |
| "loss": 0.7563549876213074, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.58974358974359, | |
| "grad_norm": 1.3887028694152832, | |
| "learning_rate": 7.192469916986099e-06, | |
| "loss": 0.9135017395019531, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.5934065934065935, | |
| "grad_norm": 0.3214952051639557, | |
| "learning_rate": 7.154297435233528e-06, | |
| "loss": 1.0506607294082642, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.597069597069597, | |
| "grad_norm": 0.4495256841182709, | |
| "learning_rate": 7.116443469196446e-06, | |
| "loss": 0.6735981702804565, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.600732600732601, | |
| "grad_norm": 0.20636308193206787, | |
| "learning_rate": 7.078908611485656e-06, | |
| "loss": 1.0373022556304932, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.6043956043956045, | |
| "grad_norm": 0.753204345703125, | |
| "learning_rate": 7.041693449716244e-06, | |
| "loss": 0.9630070328712463, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.608058608058608, | |
| "grad_norm": 0.2278732806444168, | |
| "learning_rate": 7.00479856649842e-06, | |
| "loss": 1.1420621871948242, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 2.6117216117216118, | |
| "grad_norm": 0.28468847274780273, | |
| "learning_rate": 6.96822453942837e-06, | |
| "loss": 1.1209793090820312, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 0.3516117036342621, | |
| "learning_rate": 6.931971941079208e-06, | |
| "loss": 1.1041990518569946, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.619047619047619, | |
| "grad_norm": 0.18154621124267578, | |
| "learning_rate": 6.896041338992029e-06, | |
| "loss": 1.0311168432235718, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.6227106227106227, | |
| "grad_norm": 0.5342852473258972, | |
| "learning_rate": 6.860433295667022e-06, | |
| "loss": 0.8854894042015076, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 2.6263736263736264, | |
| "grad_norm": 0.8895637392997742, | |
| "learning_rate": 6.825148368554646e-06, | |
| "loss": 0.8600127696990967, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 2.63003663003663, | |
| "grad_norm": 0.5177263021469116, | |
| "learning_rate": 6.790187110046933e-06, | |
| "loss": 1.2419568300247192, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 2.6336996336996337, | |
| "grad_norm": 0.1689794361591339, | |
| "learning_rate": 6.755550067468812e-06, | |
| "loss": 1.0835387706756592, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 2.6373626373626373, | |
| "grad_norm": 0.2473716288805008, | |
| "learning_rate": 6.721237783069546e-06, | |
| "loss": 1.0448336601257324, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.641025641025641, | |
| "grad_norm": 0.14038227498531342, | |
| "learning_rate": 6.687250794014273e-06, | |
| "loss": 1.0791858434677124, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 2.6446886446886446, | |
| "grad_norm": 0.30842867493629456, | |
| "learning_rate": 6.653589632375541e-06, | |
| "loss": 0.9658035635948181, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 2.6483516483516483, | |
| "grad_norm": 0.30728021264076233, | |
| "learning_rate": 6.6202548251250414e-06, | |
| "loss": 0.7608792185783386, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 2.652014652014652, | |
| "grad_norm": 0.19324815273284912, | |
| "learning_rate": 6.587246894125303e-06, | |
| "loss": 0.7707818150520325, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 2.6556776556776556, | |
| "grad_norm": 0.16655150055885315, | |
| "learning_rate": 6.554566356121558e-06, | |
| "loss": 1.038588285446167, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.659340659340659, | |
| "grad_norm": 0.10667542368173599, | |
| "learning_rate": 6.522213722733638e-06, | |
| "loss": 0.5798073410987854, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 2.663003663003663, | |
| "grad_norm": 0.23501524329185486, | |
| "learning_rate": 6.490189500447973e-06, | |
| "loss": 0.6129744648933411, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.5228734612464905, | |
| "learning_rate": 6.4584941906096515e-06, | |
| "loss": 1.010016918182373, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 2.67032967032967, | |
| "grad_norm": 0.1614047735929489, | |
| "learning_rate": 6.427128289414573e-06, | |
| "loss": 0.7019752264022827, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 2.6739926739926743, | |
| "grad_norm": 0.5308529138565063, | |
| "learning_rate": 6.396092287901696e-06, | |
| "loss": 0.6532785296440125, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.677655677655678, | |
| "grad_norm": 0.22194698452949524, | |
| "learning_rate": 6.365386671945331e-06, | |
| "loss": 0.7371679544448853, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 2.6813186813186816, | |
| "grad_norm": 1.3231110572814941, | |
| "learning_rate": 6.335011922247535e-06, | |
| "loss": 0.9731379151344299, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 2.684981684981685, | |
| "grad_norm": 0.15819787979125977, | |
| "learning_rate": 6.304968514330613e-06, | |
| "loss": 0.8071764707565308, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 2.688644688644689, | |
| "grad_norm": 0.22951114177703857, | |
| "learning_rate": 6.275256918529631e-06, | |
| "loss": 0.95961993932724, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 2.6923076923076925, | |
| "grad_norm": 0.6898245215415955, | |
| "learning_rate": 6.245877599985094e-06, | |
| "loss": 0.5869124531745911, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.695970695970696, | |
| "grad_norm": 0.4145790934562683, | |
| "learning_rate": 6.216831018635631e-06, | |
| "loss": 0.7107551097869873, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 2.6996336996337, | |
| "grad_norm": 0.5302114486694336, | |
| "learning_rate": 6.188117629210814e-06, | |
| "loss": 0.4114135205745697, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 2.7032967032967035, | |
| "grad_norm": 0.254586398601532, | |
| "learning_rate": 6.159737881224042e-06, | |
| "loss": 1.0859794616699219, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 2.706959706959707, | |
| "grad_norm": 0.6441674828529358, | |
| "learning_rate": 6.131692218965484e-06, | |
| "loss": 0.5880909562110901, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 2.7106227106227108, | |
| "grad_norm": 0.1711389720439911, | |
| "learning_rate": 6.103981081495144e-06, | |
| "loss": 1.0421608686447144, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.7142857142857144, | |
| "grad_norm": 0.20255252718925476, | |
| "learning_rate": 6.076604902635971e-06, | |
| "loss": 1.0526020526885986, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 2.717948717948718, | |
| "grad_norm": 0.8872889280319214, | |
| "learning_rate": 6.049564110967082e-06, | |
| "loss": 0.9588233828544617, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 2.7216117216117217, | |
| "grad_norm": 0.2663383483886719, | |
| "learning_rate": 6.022859129817042e-06, | |
| "loss": 1.0862208604812622, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 2.7252747252747254, | |
| "grad_norm": 0.15567375719547272, | |
| "learning_rate": 5.996490377257248e-06, | |
| "loss": 1.091988444328308, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 2.728937728937729, | |
| "grad_norm": 0.3074291944503784, | |
| "learning_rate": 5.970458266095369e-06, | |
| "loss": 0.4964509606361389, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.7326007326007327, | |
| "grad_norm": 0.20127466320991516, | |
| "learning_rate": 5.944763203868888e-06, | |
| "loss": 1.0711864233016968, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 2.7362637362637363, | |
| "grad_norm": 0.22651104629039764, | |
| "learning_rate": 5.919405592838733e-06, | |
| "loss": 0.5613836050033569, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 2.73992673992674, | |
| "grad_norm": 0.20297077298164368, | |
| "learning_rate": 5.894385829982967e-06, | |
| "loss": 1.1413242816925049, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 2.7435897435897436, | |
| "grad_norm": 0.20519724488258362, | |
| "learning_rate": 5.869704306990585e-06, | |
| "loss": 1.0266319513320923, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 2.7472527472527473, | |
| "grad_norm": 3.1034557819366455, | |
| "learning_rate": 5.8453614102553605e-06, | |
| "loss": 0.6879111528396606, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.750915750915751, | |
| "grad_norm": 0.4873732030391693, | |
| "learning_rate": 5.821357520869821e-06, | |
| "loss": 0.9691627621650696, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 2.7545787545787546, | |
| "grad_norm": 0.13368584215641022, | |
| "learning_rate": 5.797693014619274e-06, | |
| "loss": 1.0586458444595337, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 2.758241758241758, | |
| "grad_norm": 0.18307749927043915, | |
| "learning_rate": 5.774368261975912e-06, | |
| "loss": 1.037876844406128, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 0.10759836435317993, | |
| "learning_rate": 5.751383628093026e-06, | |
| "loss": 0.8368395566940308, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 2.7655677655677655, | |
| "grad_norm": 0.2728974521160126, | |
| "learning_rate": 5.728739472799295e-06, | |
| "loss": 0.8790582418441772, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.2314990609884262, | |
| "learning_rate": 5.706436150593126e-06, | |
| "loss": 0.8743211627006531, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 2.772893772893773, | |
| "grad_norm": 0.15124575793743134, | |
| "learning_rate": 5.684474010637134e-06, | |
| "loss": 1.0424885749816895, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 2.7765567765567765, | |
| "grad_norm": 0.21101588010787964, | |
| "learning_rate": 5.662853396752659e-06, | |
| "loss": 0.943360447883606, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 2.78021978021978, | |
| "grad_norm": 0.10518907010555267, | |
| "learning_rate": 5.641574647414386e-06, | |
| "loss": 0.921418309211731, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 2.7838827838827838, | |
| "grad_norm": 0.6700722575187683, | |
| "learning_rate": 5.620638095745048e-06, | |
| "loss": 0.4822154641151428, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.7875457875457874, | |
| "grad_norm": 0.21197772026062012, | |
| "learning_rate": 5.600044069510221e-06, | |
| "loss": 0.708233118057251, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 2.791208791208791, | |
| "grad_norm": 0.5329016447067261, | |
| "learning_rate": 5.579792891113163e-06, | |
| "loss": 0.7894065976142883, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 2.7948717948717947, | |
| "grad_norm": 0.23669062554836273, | |
| "learning_rate": 5.5598848775897975e-06, | |
| "loss": 1.0895702838897705, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 2.7985347985347984, | |
| "grad_norm": 0.2975974977016449, | |
| "learning_rate": 5.540320340603742e-06, | |
| "loss": 1.0676382780075073, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 2.802197802197802, | |
| "grad_norm": 0.35699358582496643, | |
| "learning_rate": 5.52109958644142e-06, | |
| "loss": 1.047616958618164, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.8058608058608057, | |
| "grad_norm": 0.16987060010433197, | |
| "learning_rate": 5.50222291600727e-06, | |
| "loss": 0.9621225595474243, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 2.8095238095238093, | |
| "grad_norm": 0.34407544136047363, | |
| "learning_rate": 5.483690624819042e-06, | |
| "loss": 0.7081210613250732, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 2.813186813186813, | |
| "grad_norm": 0.1482367217540741, | |
| "learning_rate": 5.4655030030031616e-06, | |
| "loss": 1.1918277740478516, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 2.8168498168498166, | |
| "grad_norm": 0.34928014874458313, | |
| "learning_rate": 5.4476603352901945e-06, | |
| "loss": 0.8316318392753601, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 0.3218369781970978, | |
| "learning_rate": 5.430162901010386e-06, | |
| "loss": 0.7342109084129333, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.824175824175824, | |
| "grad_norm": 0.21855826675891876, | |
| "learning_rate": 5.413010974089283e-06, | |
| "loss": 0.8212740421295166, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 2.8278388278388276, | |
| "grad_norm": 0.2800341248512268, | |
| "learning_rate": 5.39620482304346e-06, | |
| "loss": 1.0205451250076294, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 2.8315018315018317, | |
| "grad_norm": 0.21291717886924744, | |
| "learning_rate": 5.379744710976301e-06, | |
| "loss": 1.0645310878753662, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 2.8351648351648353, | |
| "grad_norm": 0.15496331453323364, | |
| "learning_rate": 5.363630895573892e-06, | |
| "loss": 1.1228570938110352, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 2.838827838827839, | |
| "grad_norm": 0.8466178178787231, | |
| "learning_rate": 5.347863629100969e-06, | |
| "loss": 0.737494945526123, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.8424908424908426, | |
| "grad_norm": 0.04996780306100845, | |
| "learning_rate": 5.332443158396993e-06, | |
| "loss": 0.5186063051223755, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 2.8461538461538463, | |
| "grad_norm": 0.34756842255592346, | |
| "learning_rate": 5.317369724872267e-06, | |
| "loss": 1.0735743045806885, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 2.84981684981685, | |
| "grad_norm": 0.25667431950569153, | |
| "learning_rate": 5.302643564504168e-06, | |
| "loss": 0.8242087364196777, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 2.8534798534798536, | |
| "grad_norm": 0.5141234397888184, | |
| "learning_rate": 5.288264907833445e-06, | |
| "loss": 0.9391310811042786, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.37790647149086, | |
| "learning_rate": 5.274233979960608e-06, | |
| "loss": 0.511182427406311, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.860805860805861, | |
| "grad_norm": 0.8588418960571289, | |
| "learning_rate": 5.260551000542418e-06, | |
| "loss": 0.6005702614784241, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 2.8644688644688645, | |
| "grad_norm": 0.20086157321929932, | |
| "learning_rate": 5.247216183788431e-06, | |
| "loss": 0.7859454154968262, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 2.868131868131868, | |
| "grad_norm": 0.5744203925132751, | |
| "learning_rate": 5.234229738457658e-06, | |
| "loss": 0.5249977111816406, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 2.871794871794872, | |
| "grad_norm": 0.588792085647583, | |
| "learning_rate": 5.221591867855286e-06, | |
| "loss": 0.677643895149231, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 2.8754578754578755, | |
| "grad_norm": 0.06420119106769562, | |
| "learning_rate": 5.209302769829507e-06, | |
| "loss": 0.5973821878433228, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.879120879120879, | |
| "grad_norm": 0.18718576431274414, | |
| "learning_rate": 5.197362636768409e-06, | |
| "loss": 0.613332986831665, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 2.8827838827838828, | |
| "grad_norm": 0.2197110801935196, | |
| "learning_rate": 5.185771655596972e-06, | |
| "loss": 0.9175146818161011, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 2.8864468864468864, | |
| "grad_norm": 5.933550834655762, | |
| "learning_rate": 5.174530007774135e-06, | |
| "loss": 0.8471065163612366, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 2.89010989010989, | |
| "grad_norm": 0.09670909494161606, | |
| "learning_rate": 5.1636378692899665e-06, | |
| "loss": 0.8234681487083435, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 2.8937728937728937, | |
| "grad_norm": 0.1601630300283432, | |
| "learning_rate": 5.153095410662896e-06, | |
| "loss": 1.1230218410491943, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.8974358974358974, | |
| "grad_norm": 2.9173190593719482, | |
| "learning_rate": 5.142902796937052e-06, | |
| "loss": 0.7799305319786072, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 2.901098901098901, | |
| "grad_norm": 0.03245487064123154, | |
| "learning_rate": 5.133060187679675e-06, | |
| "loss": 0.7026646733283997, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 2.9047619047619047, | |
| "grad_norm": 0.26367539167404175, | |
| "learning_rate": 5.1235677369786265e-06, | |
| "loss": 0.6960863471031189, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 2.9084249084249083, | |
| "grad_norm": 0.24362139403820038, | |
| "learning_rate": 5.1144255934399655e-06, | |
| "loss": 1.0824929475784302, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 2.912087912087912, | |
| "grad_norm": 0.23772361874580383, | |
| "learning_rate": 5.105633900185632e-06, | |
| "loss": 1.0874613523483276, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.9157509157509156, | |
| "grad_norm": 0.30294981598854065, | |
| "learning_rate": 5.0971927948512e-06, | |
| "loss": 0.4234909117221832, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 2.9194139194139193, | |
| "grad_norm": 0.24272647500038147, | |
| "learning_rate": 5.089102409583725e-06, | |
| "loss": 1.0570107698440552, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 2.9230769230769234, | |
| "grad_norm": 0.20444297790527344, | |
| "learning_rate": 5.081362871039677e-06, | |
| "loss": 0.6874979138374329, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 2.926739926739927, | |
| "grad_norm": 0.40901777148246765, | |
| "learning_rate": 5.073974300382959e-06, | |
| "loss": 1.0847806930541992, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 2.9304029304029307, | |
| "grad_norm": 0.06832870841026306, | |
| "learning_rate": 5.066936813282996e-06, | |
| "loss": 0.6706178784370422, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.9340659340659343, | |
| "grad_norm": 0.16809964179992676, | |
| "learning_rate": 5.060250519912951e-06, | |
| "loss": 1.0802940130233765, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 2.937728937728938, | |
| "grad_norm": 0.11709550023078918, | |
| "learning_rate": 5.053915524947969e-06, | |
| "loss": 0.7102103233337402, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 2.9413919413919416, | |
| "grad_norm": 0.18930549919605255, | |
| "learning_rate": 5.047931927563565e-06, | |
| "loss": 1.052394986152649, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 2.9450549450549453, | |
| "grad_norm": 0.17763479053974152, | |
| "learning_rate": 5.042299821434059e-06, | |
| "loss": 0.6530783772468567, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 2.948717948717949, | |
| "grad_norm": 0.35226595401763916, | |
| "learning_rate": 5.037019294731103e-06, | |
| "loss": 0.8992307186126709, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.9523809523809526, | |
| "grad_norm": 0.30645254254341125, | |
| "learning_rate": 5.032090430122316e-06, | |
| "loss": 0.7746174335479736, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 2.956043956043956, | |
| "grad_norm": 0.7316517233848572, | |
| "learning_rate": 5.0275133047699814e-06, | |
| "loss": 0.6159262657165527, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 2.95970695970696, | |
| "grad_norm": 0.20291663706302643, | |
| "learning_rate": 5.023287990329835e-06, | |
| "loss": 0.737842857837677, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 2.9633699633699635, | |
| "grad_norm": 0.3495129942893982, | |
| "learning_rate": 5.019414552949955e-06, | |
| "loss": 1.2598001956939697, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 2.967032967032967, | |
| "grad_norm": 0.15239816904067993, | |
| "learning_rate": 5.015893053269714e-06, | |
| "loss": 1.167555332183838, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.970695970695971, | |
| "grad_norm": 0.20208200812339783, | |
| "learning_rate": 5.012723546418838e-06, | |
| "loss": 0.8371485471725464, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 2.9743589743589745, | |
| "grad_norm": 0.15967847406864166, | |
| "learning_rate": 5.009906082016538e-06, | |
| "loss": 0.733574390411377, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 2.978021978021978, | |
| "grad_norm": 0.17362992465496063, | |
| "learning_rate": 5.007440704170741e-06, | |
| "loss": 0.7777770161628723, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 2.9816849816849818, | |
| "grad_norm": 0.15043112635612488, | |
| "learning_rate": 5.005327451477387e-06, | |
| "loss": 0.8784082531929016, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 2.9853479853479854, | |
| "grad_norm": 0.19129148125648499, | |
| "learning_rate": 5.003566357019837e-06, | |
| "loss": 1.2974438667297363, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.989010989010989, | |
| "grad_norm": 0.3431568741798401, | |
| "learning_rate": 5.002157448368347e-06, | |
| "loss": 0.9204556345939636, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 2.9926739926739927, | |
| "grad_norm": 0.16419149935245514, | |
| "learning_rate": 5.001100747579644e-06, | |
| "loss": 0.6911695003509521, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 2.9963369963369964, | |
| "grad_norm": 0.48860520124435425, | |
| "learning_rate": 5.000396271196573e-06, | |
| "loss": 1.1634691953659058, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.28968650102615356, | |
| "learning_rate": 5.000044030247836e-06, | |
| "loss": 1.0265119075775146, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1638, | |
| "total_flos": 8.4482141520606e+18, | |
| "train_loss": 0.9791712072451618, | |
| "train_runtime": 55340.5169, | |
| "train_samples_per_second": 0.71, | |
| "train_steps_per_second": 0.03 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1638, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.4482141520606e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |