Text Generation
Transformers
Safetensors
qwen2
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16") model = AutoModelForCausalLM.from_pretrained("AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16
- SGLang
How to use AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16 with Docker Model Runner:
docker model run hf.co/AgPerry/Qwen2.5-Coder-7B-Instruct-num06-accumulate_16
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 3295, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0030350165029022344, | |
| "grad_norm": 2.2979175122091453, | |
| "learning_rate": 2.7272727272727274e-07, | |
| "loss": 0.789, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006070033005804469, | |
| "grad_norm": 1.7852801501566888, | |
| "learning_rate": 5.757575757575758e-07, | |
| "loss": 0.7904, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009105049508706704, | |
| "grad_norm": 1.1632800870309477, | |
| "learning_rate": 8.787878787878788e-07, | |
| "loss": 0.744, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012140066011608937, | |
| "grad_norm": 0.8818693207084786, | |
| "learning_rate": 1.181818181818182e-06, | |
| "loss": 0.693, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.015175082514511173, | |
| "grad_norm": 0.657250412764622, | |
| "learning_rate": 1.484848484848485e-06, | |
| "loss": 0.6656, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.018210099017413408, | |
| "grad_norm": 0.5316521889428266, | |
| "learning_rate": 1.787878787878788e-06, | |
| "loss": 0.6313, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.021245115520315643, | |
| "grad_norm": 0.4575711828032985, | |
| "learning_rate": 2.090909090909091e-06, | |
| "loss": 0.6098, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.024280132023217875, | |
| "grad_norm": 0.4548384308100045, | |
| "learning_rate": 2.393939393939394e-06, | |
| "loss": 0.5885, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02731514852612011, | |
| "grad_norm": 0.4458364948040082, | |
| "learning_rate": 2.6969696969696972e-06, | |
| "loss": 0.5916, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.030350165029022345, | |
| "grad_norm": 0.4794166169125783, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5753, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03338518153192458, | |
| "grad_norm": 0.4567719754046465, | |
| "learning_rate": 3.3030303030303033e-06, | |
| "loss": 0.5597, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.036420198034826816, | |
| "grad_norm": 0.4307419720442758, | |
| "learning_rate": 3.606060606060606e-06, | |
| "loss": 0.5684, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03945521453772905, | |
| "grad_norm": 0.4458642819812865, | |
| "learning_rate": 3.90909090909091e-06, | |
| "loss": 0.5569, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.042490231040631286, | |
| "grad_norm": 0.45568817442378473, | |
| "learning_rate": 4.212121212121212e-06, | |
| "loss": 0.5551, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.045525247543533515, | |
| "grad_norm": 0.4950300651709512, | |
| "learning_rate": 4.5151515151515155e-06, | |
| "loss": 0.5565, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04856026404643575, | |
| "grad_norm": 0.4441786811205893, | |
| "learning_rate": 4.818181818181819e-06, | |
| "loss": 0.5482, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.051595280549337985, | |
| "grad_norm": 0.4926559856436913, | |
| "learning_rate": 5.121212121212121e-06, | |
| "loss": 0.5482, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05463029705224022, | |
| "grad_norm": 0.46505669754342027, | |
| "learning_rate": 5.424242424242425e-06, | |
| "loss": 0.5397, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.057665313555142456, | |
| "grad_norm": 0.4862932493508676, | |
| "learning_rate": 5.727272727272728e-06, | |
| "loss": 0.5379, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06070033005804469, | |
| "grad_norm": 0.5633080158535704, | |
| "learning_rate": 6.030303030303031e-06, | |
| "loss": 0.5487, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06373534656094693, | |
| "grad_norm": 0.5144964810224658, | |
| "learning_rate": 6.333333333333333e-06, | |
| "loss": 0.5354, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06677036306384916, | |
| "grad_norm": 0.623858354223414, | |
| "learning_rate": 6.6363636363636375e-06, | |
| "loss": 0.5321, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0698053795667514, | |
| "grad_norm": 0.5384794963785807, | |
| "learning_rate": 6.93939393939394e-06, | |
| "loss": 0.5286, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07284039606965363, | |
| "grad_norm": 0.535110770579217, | |
| "learning_rate": 7.242424242424243e-06, | |
| "loss": 0.5277, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07587541257255587, | |
| "grad_norm": 0.5036293665682129, | |
| "learning_rate": 7.545454545454546e-06, | |
| "loss": 0.5333, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0789104290754581, | |
| "grad_norm": 0.47084801964860584, | |
| "learning_rate": 7.848484848484849e-06, | |
| "loss": 0.5272, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.08194544557836034, | |
| "grad_norm": 0.6020093296202623, | |
| "learning_rate": 8.151515151515152e-06, | |
| "loss": 0.5283, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08498046208126257, | |
| "grad_norm": 0.5024737305172274, | |
| "learning_rate": 8.454545454545455e-06, | |
| "loss": 0.5212, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08801547858416481, | |
| "grad_norm": 0.4994326809453112, | |
| "learning_rate": 8.757575757575759e-06, | |
| "loss": 0.5161, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09105049508706703, | |
| "grad_norm": 0.5231140922773104, | |
| "learning_rate": 9.06060606060606e-06, | |
| "loss": 0.5281, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09408551158996926, | |
| "grad_norm": 0.5098295511036418, | |
| "learning_rate": 9.363636363636365e-06, | |
| "loss": 0.5154, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.0971205280928715, | |
| "grad_norm": 0.5892467794961541, | |
| "learning_rate": 9.666666666666667e-06, | |
| "loss": 0.5218, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.10015554459577374, | |
| "grad_norm": 0.4951705896851936, | |
| "learning_rate": 9.96969696969697e-06, | |
| "loss": 0.5169, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.10319056109867597, | |
| "grad_norm": 0.6364271310354288, | |
| "learning_rate": 9.999772661973056e-06, | |
| "loss": 0.5133, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1062255776015782, | |
| "grad_norm": 0.5028738085648048, | |
| "learning_rate": 9.99898682866784e-06, | |
| "loss": 0.52, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10926059410448044, | |
| "grad_norm": 0.5217859360497503, | |
| "learning_rate": 9.997639781643002e-06, | |
| "loss": 0.5008, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11229561060738268, | |
| "grad_norm": 0.5157314231456531, | |
| "learning_rate": 9.99573167212544e-06, | |
| "loss": 0.517, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.11533062711028491, | |
| "grad_norm": 0.524618846108783, | |
| "learning_rate": 9.993262714330009e-06, | |
| "loss": 0.5092, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.11836564361318715, | |
| "grad_norm": 0.5191412167795787, | |
| "learning_rate": 9.990233185435473e-06, | |
| "loss": 0.513, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12140066011608938, | |
| "grad_norm": 0.5047990336080693, | |
| "learning_rate": 9.986643425553386e-06, | |
| "loss": 0.5129, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12443567661899162, | |
| "grad_norm": 0.49678214945335275, | |
| "learning_rate": 9.98249383768991e-06, | |
| "loss": 0.5073, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.12747069312189385, | |
| "grad_norm": 0.5341967716757828, | |
| "learning_rate": 9.977784887700572e-06, | |
| "loss": 0.5088, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.13050570962479607, | |
| "grad_norm": 0.49917946146850173, | |
| "learning_rate": 9.972517104237961e-06, | |
| "loss": 0.4991, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.13354072612769832, | |
| "grad_norm": 0.49221501327780853, | |
| "learning_rate": 9.966691078692386e-06, | |
| "loss": 0.506, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.13657574263060054, | |
| "grad_norm": 0.5448393355182506, | |
| "learning_rate": 9.960307465125472e-06, | |
| "loss": 0.5025, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1396107591335028, | |
| "grad_norm": 0.9165522381667764, | |
| "learning_rate": 9.953366980196746e-06, | |
| "loss": 0.4976, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14264577563640501, | |
| "grad_norm": 0.5122649698502958, | |
| "learning_rate": 9.945870403083164e-06, | |
| "loss": 0.503, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.14568079213930726, | |
| "grad_norm": 0.48003844926471007, | |
| "learning_rate": 9.937818575391654e-06, | |
| "loss": 0.5044, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.14871580864220948, | |
| "grad_norm": 0.5083116253946615, | |
| "learning_rate": 9.929212401064616e-06, | |
| "loss": 0.505, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.15175082514511173, | |
| "grad_norm": 0.4997990195713387, | |
| "learning_rate": 9.920052846278455e-06, | |
| "loss": 0.4991, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15478584164801396, | |
| "grad_norm": 0.4863244820808186, | |
| "learning_rate": 9.910340939335098e-06, | |
| "loss": 0.4889, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1578208581509162, | |
| "grad_norm": 0.519835159370644, | |
| "learning_rate": 9.900077770546567e-06, | |
| "loss": 0.488, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.16085587465381843, | |
| "grad_norm": 0.5247955372337217, | |
| "learning_rate": 9.889264492112563e-06, | |
| "loss": 0.5025, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.16389089115672067, | |
| "grad_norm": 0.47839854894729905, | |
| "learning_rate": 9.877902317991116e-06, | |
| "loss": 0.4946, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1669259076596229, | |
| "grad_norm": 0.4889470867795177, | |
| "learning_rate": 9.865992523762306e-06, | |
| "loss": 0.4989, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.16996092416252515, | |
| "grad_norm": 0.4753575663441252, | |
| "learning_rate": 9.853536446485048e-06, | |
| "loss": 0.503, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.17299594066542737, | |
| "grad_norm": 0.5422582939397118, | |
| "learning_rate": 9.840535484546996e-06, | |
| "loss": 0.4903, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.17603095716832962, | |
| "grad_norm": 0.4706321475205836, | |
| "learning_rate": 9.826991097507548e-06, | |
| "loss": 0.4958, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.17906597367123184, | |
| "grad_norm": 0.4839096225021675, | |
| "learning_rate": 9.812904805933989e-06, | |
| "loss": 0.4922, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.18210099017413406, | |
| "grad_norm": 0.4732593571246822, | |
| "learning_rate": 9.798278191230783e-06, | |
| "loss": 0.5004, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1851360066770363, | |
| "grad_norm": 0.4505029818283895, | |
| "learning_rate": 9.78311289546204e-06, | |
| "loss": 0.4802, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.18817102317993853, | |
| "grad_norm": 0.5055436622019127, | |
| "learning_rate": 9.76741062116716e-06, | |
| "loss": 0.4945, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.19120603968284078, | |
| "grad_norm": 0.49572149753193984, | |
| "learning_rate": 9.751173131169705e-06, | |
| "loss": 0.4906, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.194241056185743, | |
| "grad_norm": 0.4448394645138839, | |
| "learning_rate": 9.73440224837949e-06, | |
| "loss": 0.496, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.19727607268864525, | |
| "grad_norm": 0.49316009769902025, | |
| "learning_rate": 9.717099855587935e-06, | |
| "loss": 0.486, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.20031108919154747, | |
| "grad_norm": 0.47797040662070445, | |
| "learning_rate": 9.699267895256695e-06, | |
| "loss": 0.4769, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.20334610569444972, | |
| "grad_norm": 0.5092952568059224, | |
| "learning_rate": 9.68090836929958e-06, | |
| "loss": 0.4918, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.20638112219735194, | |
| "grad_norm": 0.45202984420347087, | |
| "learning_rate": 9.662023338857822e-06, | |
| "loss": 0.485, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2094161387002542, | |
| "grad_norm": 0.4983093319634588, | |
| "learning_rate": 9.642614924068667e-06, | |
| "loss": 0.4902, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2124511552031564, | |
| "grad_norm": 0.492174959560037, | |
| "learning_rate": 9.622685303827366e-06, | |
| "loss": 0.4881, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.21548617170605866, | |
| "grad_norm": 0.47815379215317105, | |
| "learning_rate": 9.602236715542557e-06, | |
| "loss": 0.4848, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.21852118820896088, | |
| "grad_norm": 0.48589573091096694, | |
| "learning_rate": 9.581271454885077e-06, | |
| "loss": 0.4903, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.22155620471186313, | |
| "grad_norm": 0.46478604800112194, | |
| "learning_rate": 9.559791875530247e-06, | |
| "loss": 0.489, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.22459122121476535, | |
| "grad_norm": 0.5035008932669235, | |
| "learning_rate": 9.537800388893628e-06, | |
| "loss": 0.4864, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2276262377176676, | |
| "grad_norm": 0.49713803635487736, | |
| "learning_rate": 9.515299463860301e-06, | |
| "loss": 0.4858, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.23066125422056982, | |
| "grad_norm": 0.48728829894131276, | |
| "learning_rate": 9.492291626507705e-06, | |
| "loss": 0.4874, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.23369627072347207, | |
| "grad_norm": 0.5654089866669252, | |
| "learning_rate": 9.468779459822034e-06, | |
| "loss": 0.4865, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2367312872263743, | |
| "grad_norm": 0.456617843862989, | |
| "learning_rate": 9.444765603408273e-06, | |
| "loss": 0.4834, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.23976630372927651, | |
| "grad_norm": 0.7108327075954629, | |
| "learning_rate": 9.420252753193842e-06, | |
| "loss": 0.4725, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.24280132023217876, | |
| "grad_norm": 0.49021009503467355, | |
| "learning_rate": 9.395243661125948e-06, | |
| "loss": 0.4882, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.24583633673508098, | |
| "grad_norm": 0.48115706251944507, | |
| "learning_rate": 9.369741134862636e-06, | |
| "loss": 0.4752, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.24887135323798323, | |
| "grad_norm": 0.5131051402999688, | |
| "learning_rate": 9.343748037457585e-06, | |
| "loss": 0.4869, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2519063697408855, | |
| "grad_norm": 0.4658252213252816, | |
| "learning_rate": 9.317267287038682e-06, | |
| "loss": 0.4884, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2549413862437877, | |
| "grad_norm": 0.4751568661396783, | |
| "learning_rate": 9.290301856480425e-06, | |
| "loss": 0.4797, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2579764027466899, | |
| "grad_norm": 0.47930138340752015, | |
| "learning_rate": 9.262854773070157e-06, | |
| "loss": 0.4869, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.26101141924959215, | |
| "grad_norm": 0.47733793569696137, | |
| "learning_rate": 9.234929118168228e-06, | |
| "loss": 0.4712, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.2640464357524944, | |
| "grad_norm": 0.4566798290905438, | |
| "learning_rate": 9.206528026862043e-06, | |
| "loss": 0.4765, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.26708145225539665, | |
| "grad_norm": 0.4466757545533174, | |
| "learning_rate": 9.177654687614112e-06, | |
| "loss": 0.4824, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.27011646875829887, | |
| "grad_norm": 0.47012964956941894, | |
| "learning_rate": 9.148312341904095e-06, | |
| "loss": 0.4768, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2731514852612011, | |
| "grad_norm": 0.4650672010975063, | |
| "learning_rate": 9.118504283864891e-06, | |
| "loss": 0.4763, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.27618650176410336, | |
| "grad_norm": 0.4522875610929074, | |
| "learning_rate": 9.088233859912823e-06, | |
| "loss": 0.4774, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2792215182670056, | |
| "grad_norm": 0.9218754990609884, | |
| "learning_rate": 9.057504468371954e-06, | |
| "loss": 0.4774, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2822565347699078, | |
| "grad_norm": 0.4446080760553245, | |
| "learning_rate": 9.026319559092566e-06, | |
| "loss": 0.4822, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.28529155127281003, | |
| "grad_norm": 0.45974314278861356, | |
| "learning_rate": 8.994682633063868e-06, | |
| "loss": 0.4737, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.2883265677757123, | |
| "grad_norm": 0.46640835738246217, | |
| "learning_rate": 8.962597242020947e-06, | |
| "loss": 0.4772, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2913615842786145, | |
| "grad_norm": 0.45954793881402833, | |
| "learning_rate": 8.930066988046042e-06, | |
| "loss": 0.4688, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.29439660078151675, | |
| "grad_norm": 0.5238237149613728, | |
| "learning_rate": 8.897095523164141e-06, | |
| "loss": 0.4742, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.29743161728441897, | |
| "grad_norm": 0.45541775923959604, | |
| "learning_rate": 8.863686548933001e-06, | |
| "loss": 0.4786, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3004666337873212, | |
| "grad_norm": 0.46811147224647004, | |
| "learning_rate": 8.829843816027575e-06, | |
| "loss": 0.4706, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.30350165029022347, | |
| "grad_norm": 0.47480506009090023, | |
| "learning_rate": 8.795571123818948e-06, | |
| "loss": 0.4733, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3065366667931257, | |
| "grad_norm": 0.46579935990440124, | |
| "learning_rate": 8.760872319947796e-06, | |
| "loss": 0.467, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3095716832960279, | |
| "grad_norm": 0.4713650285222608, | |
| "learning_rate": 8.72575129989244e-06, | |
| "loss": 0.4714, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.31260669979893013, | |
| "grad_norm": 0.5172958915739383, | |
| "learning_rate": 8.690212006531498e-06, | |
| "loss": 0.4778, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3156417163018324, | |
| "grad_norm": 0.4636306902344978, | |
| "learning_rate": 8.654258429701254e-06, | |
| "loss": 0.4766, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.31867673280473463, | |
| "grad_norm": 0.4630632755815923, | |
| "learning_rate": 8.617894605747728e-06, | |
| "loss": 0.471, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.32171174930763685, | |
| "grad_norm": 0.4726631111043291, | |
| "learning_rate": 8.581124617073531e-06, | |
| "loss": 0.4754, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3247467658105391, | |
| "grad_norm": 0.449173460165581, | |
| "learning_rate": 8.543952591679565e-06, | |
| "loss": 0.4757, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.32778178231344135, | |
| "grad_norm": 0.4593115406956091, | |
| "learning_rate": 8.506382702701575e-06, | |
| "loss": 0.4682, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.33081679881634357, | |
| "grad_norm": 0.49121579493660045, | |
| "learning_rate": 8.468419167941658e-06, | |
| "loss": 0.4631, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3338518153192458, | |
| "grad_norm": 0.4799330378295981, | |
| "learning_rate": 8.430066249394754e-06, | |
| "loss": 0.4786, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.336886831822148, | |
| "grad_norm": 0.46743240892192656, | |
| "learning_rate": 8.391328252770165e-06, | |
| "loss": 0.4648, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.3399218483250503, | |
| "grad_norm": 0.49238589329434995, | |
| "learning_rate": 8.352209527008164e-06, | |
| "loss": 0.4785, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3429568648279525, | |
| "grad_norm": 0.463123088401655, | |
| "learning_rate": 8.31271446379178e-06, | |
| "loss": 0.4684, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.34599188133085473, | |
| "grad_norm": 0.45804967626699783, | |
| "learning_rate": 8.272847497053745e-06, | |
| "loss": 0.467, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.34902689783375695, | |
| "grad_norm": 0.49208327340861874, | |
| "learning_rate": 8.232613102478722e-06, | |
| "loss": 0.4734, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.35206191433665923, | |
| "grad_norm": 0.4834727766246894, | |
| "learning_rate": 8.192015797000849e-06, | |
| "loss": 0.4634, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.35509693083956145, | |
| "grad_norm": 0.45802821271745225, | |
| "learning_rate": 8.151060138296624e-06, | |
| "loss": 0.4769, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.3581319473424637, | |
| "grad_norm": 0.47069229829560316, | |
| "learning_rate": 8.10975072427326e-06, | |
| "loss": 0.4631, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3611669638453659, | |
| "grad_norm": 0.4638785009109008, | |
| "learning_rate": 8.068092192552473e-06, | |
| "loss": 0.4621, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3642019803482681, | |
| "grad_norm": 0.5267368501556567, | |
| "learning_rate": 8.026089219949856e-06, | |
| "loss": 0.4707, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3672369968511704, | |
| "grad_norm": 0.4603778748004369, | |
| "learning_rate": 7.983746521949822e-06, | |
| "loss": 0.4691, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3702720133540726, | |
| "grad_norm": 0.5044371212314646, | |
| "learning_rate": 7.941068852176233e-06, | |
| "loss": 0.4673, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.37330702985697484, | |
| "grad_norm": 0.5023326078920469, | |
| "learning_rate": 7.898061001858712e-06, | |
| "loss": 0.4652, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.37634204635987706, | |
| "grad_norm": 0.5465382069712866, | |
| "learning_rate": 7.854727799294768e-06, | |
| "loss": 0.4648, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.37937706286277934, | |
| "grad_norm": 0.46811731936405077, | |
| "learning_rate": 7.81107410930774e-06, | |
| "loss": 0.474, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.38241207936568156, | |
| "grad_norm": 0.4582110078248361, | |
| "learning_rate": 7.767104832700645e-06, | |
| "loss": 0.4557, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.3854470958685838, | |
| "grad_norm": 0.47816100008054285, | |
| "learning_rate": 7.72282490570599e-06, | |
| "loss": 0.4655, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.388482112371486, | |
| "grad_norm": 0.4587552509577914, | |
| "learning_rate": 7.678239299431594e-06, | |
| "loss": 0.4675, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3915171288743883, | |
| "grad_norm": 0.48955107625039307, | |
| "learning_rate": 7.633353019302519e-06, | |
| "loss": 0.4628, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3945521453772905, | |
| "grad_norm": 0.4697573855941327, | |
| "learning_rate": 7.58817110449912e-06, | |
| "loss": 0.4705, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3975871618801927, | |
| "grad_norm": 0.4586083541834714, | |
| "learning_rate": 7.5426986273913275e-06, | |
| "loss": 0.4633, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.40062217838309494, | |
| "grad_norm": 0.4943303423222579, | |
| "learning_rate": 7.496940692969188e-06, | |
| "loss": 0.4664, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4036571948859972, | |
| "grad_norm": 0.4472430460309261, | |
| "learning_rate": 7.450902438269761e-06, | |
| "loss": 0.466, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.40669221138889944, | |
| "grad_norm": 0.45151069440310115, | |
| "learning_rate": 7.404589031800395e-06, | |
| "loss": 0.466, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.40972722789180166, | |
| "grad_norm": 0.4931709679843594, | |
| "learning_rate": 7.358005672958488e-06, | |
| "loss": 0.4638, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4127622443947039, | |
| "grad_norm": 0.4458955481322604, | |
| "learning_rate": 7.311157591447775e-06, | |
| "loss": 0.4574, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.41579726089760616, | |
| "grad_norm": 0.5037362623251065, | |
| "learning_rate": 7.264050046691211e-06, | |
| "loss": 0.4631, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.4188322774005084, | |
| "grad_norm": 0.5401075737360693, | |
| "learning_rate": 7.216688327240523e-06, | |
| "loss": 0.4672, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.4218672939034106, | |
| "grad_norm": 0.46823354166024683, | |
| "learning_rate": 7.16907775018248e-06, | |
| "loss": 0.4613, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.4249023104063128, | |
| "grad_norm": 0.4780876891839522, | |
| "learning_rate": 7.1212236605419795e-06, | |
| "loss": 0.4666, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.42793732690921504, | |
| "grad_norm": 0.5119425577262322, | |
| "learning_rate": 7.0731314306819725e-06, | |
| "loss": 0.454, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.4309723434121173, | |
| "grad_norm": 0.47436698182963066, | |
| "learning_rate": 7.024806459700344e-06, | |
| "loss": 0.4745, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.43400735991501954, | |
| "grad_norm": 0.47009240919947903, | |
| "learning_rate": 6.976254172823773e-06, | |
| "loss": 0.4578, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.43704237641792176, | |
| "grad_norm": 0.4505894481894143, | |
| "learning_rate": 6.92748002079867e-06, | |
| "loss": 0.4652, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.440077392920824, | |
| "grad_norm": 0.4739852546862808, | |
| "learning_rate": 6.878489479279248e-06, | |
| "loss": 0.4634, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.44311240942372626, | |
| "grad_norm": 0.43339777098222865, | |
| "learning_rate": 6.829288048212789e-06, | |
| "loss": 0.4583, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.4461474259266285, | |
| "grad_norm": 0.48444826933680063, | |
| "learning_rate": 6.779881251222198e-06, | |
| "loss": 0.4654, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.4491824424295307, | |
| "grad_norm": 0.43409574874106044, | |
| "learning_rate": 6.730274634985883e-06, | |
| "loss": 0.4671, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.4522174589324329, | |
| "grad_norm": 0.4532708590504662, | |
| "learning_rate": 6.6804737686150615e-06, | |
| "loss": 0.4698, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.4552524754353352, | |
| "grad_norm": 0.473135305256464, | |
| "learning_rate": 6.630484243028534e-06, | |
| "loss": 0.4737, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4582874919382374, | |
| "grad_norm": 0.4685015310135646, | |
| "learning_rate": 6.580311670325029e-06, | |
| "loss": 0.4556, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.46132250844113964, | |
| "grad_norm": 0.46358839605143787, | |
| "learning_rate": 6.529961683153136e-06, | |
| "loss": 0.4604, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.46435752494404187, | |
| "grad_norm": 0.45388131225935224, | |
| "learning_rate": 6.479439934078983e-06, | |
| "loss": 0.4559, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.46739254144694414, | |
| "grad_norm": 0.435672967522485, | |
| "learning_rate": 6.428752094951621e-06, | |
| "loss": 0.4589, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.47042755794984636, | |
| "grad_norm": 0.4681448507359377, | |
| "learning_rate": 6.377903856266285e-06, | |
| "loss": 0.4656, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4734625744527486, | |
| "grad_norm": 0.4866196593408997, | |
| "learning_rate": 6.326900926525552e-06, | |
| "loss": 0.4587, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.4764975909556508, | |
| "grad_norm": 0.5334135903943417, | |
| "learning_rate": 6.275749031598457e-06, | |
| "loss": 0.4596, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.47953260745855303, | |
| "grad_norm": 0.545053303736673, | |
| "learning_rate": 6.224453914077691e-06, | |
| "loss": 0.4599, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.4825676239614553, | |
| "grad_norm": 0.43049803862728125, | |
| "learning_rate": 6.173021332634899e-06, | |
| "loss": 0.4609, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4856026404643575, | |
| "grad_norm": 0.4679235070374552, | |
| "learning_rate": 6.121457061374182e-06, | |
| "loss": 0.4659, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.48863765696725975, | |
| "grad_norm": 0.48596466698858914, | |
| "learning_rate": 6.06976688918386e-06, | |
| "loss": 0.4552, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.49167267347016197, | |
| "grad_norm": 0.4654168221541189, | |
| "learning_rate": 6.017956619086585e-06, | |
| "loss": 0.4652, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.49470768997306425, | |
| "grad_norm": 0.43813429165646467, | |
| "learning_rate": 5.966032067587862e-06, | |
| "loss": 0.4596, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.49774270647596647, | |
| "grad_norm": 0.6681280851162651, | |
| "learning_rate": 5.913999064023046e-06, | |
| "loss": 0.4572, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.5007777229788687, | |
| "grad_norm": 0.5204323449742513, | |
| "learning_rate": 5.861863449902926e-06, | |
| "loss": 0.4628, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.503812739481771, | |
| "grad_norm": 0.4296041756151093, | |
| "learning_rate": 5.80963107825791e-06, | |
| "loss": 0.4568, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5068477559846731, | |
| "grad_norm": 0.4687304772565422, | |
| "learning_rate": 5.7573078129809386e-06, | |
| "loss": 0.4604, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5098827724875754, | |
| "grad_norm": 0.4554317567297939, | |
| "learning_rate": 5.704899528169175e-06, | |
| "loss": 0.4698, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5129177889904777, | |
| "grad_norm": 0.4550611945171018, | |
| "learning_rate": 5.652412107464532e-06, | |
| "loss": 0.4559, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.5159528054933799, | |
| "grad_norm": 0.4737844991440093, | |
| "learning_rate": 5.5998514433931636e-06, | |
| "loss": 0.4657, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5189878219962821, | |
| "grad_norm": 0.44176755708702975, | |
| "learning_rate": 5.547223436703919e-06, | |
| "loss": 0.4555, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.5220228384991843, | |
| "grad_norm": 0.4602868804162559, | |
| "learning_rate": 5.494533995705904e-06, | |
| "loss": 0.4587, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.5250578550020866, | |
| "grad_norm": 0.4619636659852323, | |
| "learning_rate": 5.441789035605174e-06, | |
| "loss": 0.4605, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.5280928715049888, | |
| "grad_norm": 0.4918743050797947, | |
| "learning_rate": 5.3889944778406656e-06, | |
| "loss": 0.4601, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.531127888007891, | |
| "grad_norm": 0.5108035906034613, | |
| "learning_rate": 5.336156249419422e-06, | |
| "loss": 0.4583, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5341629045107933, | |
| "grad_norm": 0.4644351963738344, | |
| "learning_rate": 5.283280282251192e-06, | |
| "loss": 0.451, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5371979210136956, | |
| "grad_norm": 0.466505558488539, | |
| "learning_rate": 5.230372512482485e-06, | |
| "loss": 0.4569, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5402329375165977, | |
| "grad_norm": 0.4395984838919295, | |
| "learning_rate": 5.177438879830148e-06, | |
| "loss": 0.4546, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5432679540195, | |
| "grad_norm": 0.5081543080746838, | |
| "learning_rate": 5.1244853269145315e-06, | |
| "loss": 0.4522, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5463029705224022, | |
| "grad_norm": 0.4739999458440851, | |
| "learning_rate": 5.0715177985923454e-06, | |
| "loss": 0.4575, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5493379870253045, | |
| "grad_norm": 0.44986425269809655, | |
| "learning_rate": 5.0185422412892615e-06, | |
| "loss": 0.4504, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5523730035282067, | |
| "grad_norm": 0.493838430904318, | |
| "learning_rate": 4.96556460233232e-06, | |
| "loss": 0.4565, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5554080200311089, | |
| "grad_norm": 0.4597123498494703, | |
| "learning_rate": 4.912590829282269e-06, | |
| "loss": 0.4552, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5584430365340112, | |
| "grad_norm": 0.45249866862931715, | |
| "learning_rate": 4.859626869265838e-06, | |
| "loss": 0.4646, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5614780530369133, | |
| "grad_norm": 0.47693139933323253, | |
| "learning_rate": 4.806678668308102e-06, | |
| "loss": 0.4593, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5645130695398156, | |
| "grad_norm": 0.4441745555089401, | |
| "learning_rate": 4.753752170664926e-06, | |
| "loss": 0.4518, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5675480860427179, | |
| "grad_norm": 0.5407791277506199, | |
| "learning_rate": 4.700853318155655e-06, | |
| "loss": 0.4537, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5705831025456201, | |
| "grad_norm": 0.43766170865780535, | |
| "learning_rate": 4.647988049496026e-06, | |
| "loss": 0.456, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5736181190485223, | |
| "grad_norm": 0.4381830382862563, | |
| "learning_rate": 4.5951622996314785e-06, | |
| "loss": 0.4544, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5766531355514246, | |
| "grad_norm": 0.5008063102656403, | |
| "learning_rate": 4.542381999070851e-06, | |
| "loss": 0.4576, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5796881520543268, | |
| "grad_norm": 0.4449319997966233, | |
| "learning_rate": 4.489653073220593e-06, | |
| "loss": 0.4479, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.582723168557229, | |
| "grad_norm": 0.4325405045901816, | |
| "learning_rate": 4.43698144171955e-06, | |
| "loss": 0.4566, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5857581850601312, | |
| "grad_norm": 0.4558072435464798, | |
| "learning_rate": 4.3843730177743835e-06, | |
| "loss": 0.4522, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5887932015630335, | |
| "grad_norm": 0.46993480403343113, | |
| "learning_rate": 4.331833707495735e-06, | |
| "loss": 0.4497, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5918282180659358, | |
| "grad_norm": 0.48201210672220995, | |
| "learning_rate": 4.279369409235159e-06, | |
| "loss": 0.4557, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5948632345688379, | |
| "grad_norm": 0.48886046384215476, | |
| "learning_rate": 4.226986012922954e-06, | |
| "loss": 0.4527, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5978982510717402, | |
| "grad_norm": 0.496378890478507, | |
| "learning_rate": 4.174689399406917e-06, | |
| "loss": 0.4474, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.6009332675746424, | |
| "grad_norm": 0.5586162522102414, | |
| "learning_rate": 4.122485439792139e-06, | |
| "loss": 0.4525, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.6039682840775447, | |
| "grad_norm": 0.446752242572371, | |
| "learning_rate": 4.070379994781865e-06, | |
| "loss": 0.446, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.6070033005804469, | |
| "grad_norm": 0.47706411203661847, | |
| "learning_rate": 4.018378914019556e-06, | |
| "loss": 0.4596, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6100383170833491, | |
| "grad_norm": 0.538309740370942, | |
| "learning_rate": 3.966488035432169e-06, | |
| "loss": 0.4421, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.6130733335862514, | |
| "grad_norm": 0.5230632035555158, | |
| "learning_rate": 3.914713184574759e-06, | |
| "loss": 0.4569, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.6161083500891537, | |
| "grad_norm": 0.4338338834185748, | |
| "learning_rate": 3.863060173976466e-06, | |
| "loss": 0.4541, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.6191433665920558, | |
| "grad_norm": 0.5603785317621145, | |
| "learning_rate": 3.811534802487983e-06, | |
| "loss": 0.4551, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.6221783830949581, | |
| "grad_norm": 0.47642244684394447, | |
| "learning_rate": 3.7601428546305246e-06, | |
| "loss": 0.4523, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6252133995978603, | |
| "grad_norm": 0.46992960142430185, | |
| "learning_rate": 3.7088900999464432e-06, | |
| "loss": 0.446, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.6282484161007625, | |
| "grad_norm": 0.4845504298042459, | |
| "learning_rate": 3.657782292351501e-06, | |
| "loss": 0.4566, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.6312834326036648, | |
| "grad_norm": 0.47017322226992764, | |
| "learning_rate": 3.6068251694888973e-06, | |
| "loss": 0.4508, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.634318449106567, | |
| "grad_norm": 0.5121234005197363, | |
| "learning_rate": 3.556024452085144e-06, | |
| "loss": 0.4431, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.6373534656094693, | |
| "grad_norm": 0.487255052511626, | |
| "learning_rate": 3.505385843307809e-06, | |
| "loss": 0.4473, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6403884821123715, | |
| "grad_norm": 0.4564465966378128, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 0.4472, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6434234986152737, | |
| "grad_norm": 0.4806890404068221, | |
| "learning_rate": 3.404617672668441e-06, | |
| "loss": 0.4536, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.646458515118176, | |
| "grad_norm": 0.45235070380959885, | |
| "learning_rate": 3.354499423594737e-06, | |
| "loss": 0.4522, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6494935316210781, | |
| "grad_norm": 0.45955408837797435, | |
| "learning_rate": 3.3045659074540797e-06, | |
| "loss": 0.4441, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6525285481239804, | |
| "grad_norm": 0.4923322956992551, | |
| "learning_rate": 3.254822730057266e-06, | |
| "loss": 0.4551, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6555635646268827, | |
| "grad_norm": 0.48682070386464155, | |
| "learning_rate": 3.205275475846614e-06, | |
| "loss": 0.4496, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6585985811297849, | |
| "grad_norm": 0.47952708737796373, | |
| "learning_rate": 3.1559297072690376e-06, | |
| "loss": 0.4509, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6616335976326871, | |
| "grad_norm": 0.4543123589314356, | |
| "learning_rate": 3.106790964151556e-06, | |
| "loss": 0.4469, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6646686141355893, | |
| "grad_norm": 0.4820597535972369, | |
| "learning_rate": 3.0578647630793845e-06, | |
| "loss": 0.45, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.6677036306384916, | |
| "grad_norm": 0.47560501988050274, | |
| "learning_rate": 3.0091565967765903e-06, | |
| "loss": 0.4506, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6707386471413939, | |
| "grad_norm": 0.4590471436665147, | |
| "learning_rate": 2.9606719334894673e-06, | |
| "loss": 0.4411, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.673773663644296, | |
| "grad_norm": 0.4871582819595622, | |
| "learning_rate": 2.9124162163726333e-06, | |
| "loss": 0.4581, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6768086801471983, | |
| "grad_norm": 0.4621936812853514, | |
| "learning_rate": 2.864394862877945e-06, | |
| "loss": 0.4392, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6798436966501006, | |
| "grad_norm": 0.5106734882471429, | |
| "learning_rate": 2.8166132641463174e-06, | |
| "loss": 0.4514, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6828787131530027, | |
| "grad_norm": 0.4872901241863599, | |
| "learning_rate": 2.7690767844024757e-06, | |
| "loss": 0.456, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.685913729655905, | |
| "grad_norm": 0.5327159140172049, | |
| "learning_rate": 2.7217907603527425e-06, | |
| "loss": 0.4502, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.6889487461588072, | |
| "grad_norm": 0.46596161967531874, | |
| "learning_rate": 2.67476050058591e-06, | |
| "loss": 0.4368, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6919837626617095, | |
| "grad_norm": 0.4756057088646994, | |
| "learning_rate": 2.627991284977265e-06, | |
| "loss": 0.4427, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6950187791646117, | |
| "grad_norm": 0.457554867967829, | |
| "learning_rate": 2.5814883640958425e-06, | |
| "loss": 0.4492, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6980537956675139, | |
| "grad_norm": 0.4914134735789525, | |
| "learning_rate": 2.535256958614972e-06, | |
| "loss": 0.4521, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.7010888121704162, | |
| "grad_norm": 0.4514454510170551, | |
| "learning_rate": 2.489302258726169e-06, | |
| "loss": 0.445, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.7041238286733185, | |
| "grad_norm": 0.5541195252480908, | |
| "learning_rate": 2.4436294235564616e-06, | |
| "loss": 0.4487, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.7071588451762206, | |
| "grad_norm": 0.4701102512963565, | |
| "learning_rate": 2.398243580589197e-06, | |
| "loss": 0.4467, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.7101938616791229, | |
| "grad_norm": 0.5033348530844566, | |
| "learning_rate": 2.353149825088401e-06, | |
| "loss": 0.4424, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.7132288781820251, | |
| "grad_norm": 0.4896392329694068, | |
| "learning_rate": 2.30835321952675e-06, | |
| "loss": 0.4492, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7162638946849273, | |
| "grad_norm": 0.45357241678052895, | |
| "learning_rate": 2.263858793017247e-06, | |
| "loss": 0.4399, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.7192989111878296, | |
| "grad_norm": 0.47254537181545697, | |
| "learning_rate": 2.219671540748607e-06, | |
| "loss": 0.4486, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.7223339276907318, | |
| "grad_norm": 1.0513237554990393, | |
| "learning_rate": 2.1757964234244806e-06, | |
| "loss": 0.4516, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.7253689441936341, | |
| "grad_norm": 1.211615701259343, | |
| "learning_rate": 2.1322383667065328e-06, | |
| "loss": 0.4459, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.7284039606965362, | |
| "grad_norm": 0.49549117242754526, | |
| "learning_rate": 2.0890022606614658e-06, | |
| "loss": 0.4519, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7314389771994385, | |
| "grad_norm": 0.441302787437587, | |
| "learning_rate": 2.0460929592120286e-06, | |
| "loss": 0.4421, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.7344739937023408, | |
| "grad_norm": 0.47459997274968196, | |
| "learning_rate": 2.0035152795920943e-06, | |
| "loss": 0.4474, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.737509010205243, | |
| "grad_norm": 0.5534394858546283, | |
| "learning_rate": 1.961274001805844e-06, | |
| "loss": 0.4506, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.7405440267081452, | |
| "grad_norm": 0.5940641703049439, | |
| "learning_rate": 1.9193738680911444e-06, | |
| "loss": 0.4435, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.7435790432110475, | |
| "grad_norm": 0.47749388420290173, | |
| "learning_rate": 1.8778195823871537e-06, | |
| "loss": 0.4473, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7466140597139497, | |
| "grad_norm": 0.46649043860109973, | |
| "learning_rate": 1.836615809806232e-06, | |
| "loss": 0.441, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.749649076216852, | |
| "grad_norm": 0.47765577041447577, | |
| "learning_rate": 1.7957671761102142e-06, | |
| "loss": 0.4394, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.7526840927197541, | |
| "grad_norm": 0.4609425591065501, | |
| "learning_rate": 1.7552782671910845e-06, | |
| "loss": 0.4491, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7557191092226564, | |
| "grad_norm": 0.4952386048466494, | |
| "learning_rate": 1.715153628556162e-06, | |
| "loss": 0.4429, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.7587541257255587, | |
| "grad_norm": 0.45327322111640855, | |
| "learning_rate": 1.6753977648177682e-06, | |
| "loss": 0.452, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7617891422284608, | |
| "grad_norm": 0.560152662522095, | |
| "learning_rate": 1.6360151391875395e-06, | |
| "loss": 0.4482, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7648241587313631, | |
| "grad_norm": 0.5588616468957155, | |
| "learning_rate": 1.5970101729753485e-06, | |
| "loss": 0.4411, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7678591752342654, | |
| "grad_norm": 0.5012865331807312, | |
| "learning_rate": 1.5583872450929455e-06, | |
| "loss": 0.4466, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7708941917371676, | |
| "grad_norm": 0.4458865832841834, | |
| "learning_rate": 1.5201506915623621e-06, | |
| "loss": 0.443, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7739292082400698, | |
| "grad_norm": 0.5048770346419175, | |
| "learning_rate": 1.4823048050291211e-06, | |
| "loss": 0.452, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.776964224742972, | |
| "grad_norm": 0.46420339110163, | |
| "learning_rate": 1.4448538342803242e-06, | |
| "loss": 0.4405, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.7799992412458743, | |
| "grad_norm": 0.5148070470946807, | |
| "learning_rate": 1.407801983767656e-06, | |
| "loss": 0.4452, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.7830342577487766, | |
| "grad_norm": 0.4919925143595241, | |
| "learning_rate": 1.3711534131353738e-06, | |
| "loss": 0.4481, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.7860692742516787, | |
| "grad_norm": 0.8295554704623347, | |
| "learning_rate": 1.3349122367533135e-06, | |
| "loss": 0.4443, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.789104290754581, | |
| "grad_norm": 0.4715835221582969, | |
| "learning_rate": 1.2990825232550065e-06, | |
| "loss": 0.4441, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7921393072574832, | |
| "grad_norm": 0.4888383711595245, | |
| "learning_rate": 1.2636682950808882e-06, | |
| "loss": 0.4414, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7951743237603854, | |
| "grad_norm": 0.46367852635576223, | |
| "learning_rate": 1.228673528026741e-06, | |
| "loss": 0.443, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7982093402632877, | |
| "grad_norm": 0.458931669471863, | |
| "learning_rate": 1.194102150797326e-06, | |
| "loss": 0.445, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.8012443567661899, | |
| "grad_norm": 0.5805342428508335, | |
| "learning_rate": 1.1599580445653496e-06, | |
| "loss": 0.4416, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.8042793732690922, | |
| "grad_norm": 0.47446300706665095, | |
| "learning_rate": 1.1262450425357175e-06, | |
| "loss": 0.4527, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8073143897719944, | |
| "grad_norm": 0.4585780912736001, | |
| "learning_rate": 1.092966929515218e-06, | |
| "loss": 0.44, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.8103494062748966, | |
| "grad_norm": 0.48233598242441533, | |
| "learning_rate": 1.0601274414876067e-06, | |
| "loss": 0.4455, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.8133844227777989, | |
| "grad_norm": 0.46200584695129754, | |
| "learning_rate": 1.0277302651941894e-06, | |
| "loss": 0.4446, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.816419439280701, | |
| "grad_norm": 0.4874246650348082, | |
| "learning_rate": 9.95779037719926e-07, | |
| "loss": 0.4397, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.8194544557836033, | |
| "grad_norm": 0.4547919796548769, | |
| "learning_rate": 9.642773460851141e-07, | |
| "loss": 0.4473, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.8224894722865056, | |
| "grad_norm": 0.4972539970599672, | |
| "learning_rate": 9.332287268426881e-07, | |
| "loss": 0.4425, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.8255244887894078, | |
| "grad_norm": 0.43865232063488374, | |
| "learning_rate": 9.026366656811835e-07, | |
| "loss": 0.4401, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.82855950529231, | |
| "grad_norm": 1.1602238104916722, | |
| "learning_rate": 8.725045970334262e-07, | |
| "loss": 0.4504, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.8315945217952123, | |
| "grad_norm": 0.4663648778218256, | |
| "learning_rate": 8.428359036909455e-07, | |
| "loss": 0.4391, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.8346295382981145, | |
| "grad_norm": 0.48682417123785116, | |
| "learning_rate": 8.136339164242241e-07, | |
| "loss": 0.4467, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.8376645548010168, | |
| "grad_norm": 0.4673166286188927, | |
| "learning_rate": 7.849019136087477e-07, | |
| "loss": 0.4398, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.8406995713039189, | |
| "grad_norm": 0.4903812570342262, | |
| "learning_rate": 7.566431208569747e-07, | |
| "loss": 0.4413, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.8437345878068212, | |
| "grad_norm": 0.485844612436569, | |
| "learning_rate": 7.288607106561935e-07, | |
| "loss": 0.4451, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.8467696043097235, | |
| "grad_norm": 0.4585651698156433, | |
| "learning_rate": 7.015578020123804e-07, | |
| "loss": 0.439, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.8498046208126256, | |
| "grad_norm": 0.4839043585723541, | |
| "learning_rate": 6.747374601000229e-07, | |
| "loss": 0.4451, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.8528396373155279, | |
| "grad_norm": 0.45692832107739795, | |
| "learning_rate": 6.484026959180256e-07, | |
| "loss": 0.439, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.8558746538184301, | |
| "grad_norm": 0.8934971320404392, | |
| "learning_rate": 6.225564659516653e-07, | |
| "loss": 0.4427, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.8589096703213324, | |
| "grad_norm": 0.42447350665857003, | |
| "learning_rate": 5.972016718406832e-07, | |
| "loss": 0.445, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.8619446868242346, | |
| "grad_norm": 0.5221390912623963, | |
| "learning_rate": 5.723411600535378e-07, | |
| "loss": 0.4493, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8649797033271368, | |
| "grad_norm": 0.470728180227603, | |
| "learning_rate": 5.4797772156783e-07, | |
| "loss": 0.4436, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8680147198300391, | |
| "grad_norm": 0.47672096853939294, | |
| "learning_rate": 5.24114091556992e-07, | |
| "loss": 0.4405, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.8710497363329414, | |
| "grad_norm": 0.4546129075040933, | |
| "learning_rate": 5.00752949083202e-07, | |
| "loss": 0.4534, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.8740847528358435, | |
| "grad_norm": 0.463106848164873, | |
| "learning_rate": 4.778969167966346e-07, | |
| "loss": 0.444, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.8771197693387458, | |
| "grad_norm": 0.4491761462781034, | |
| "learning_rate": 4.5554856064101314e-07, | |
| "loss": 0.436, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.880154785841648, | |
| "grad_norm": 0.4675366494918407, | |
| "learning_rate": 4.337103895655581e-07, | |
| "loss": 0.4531, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8831898023445502, | |
| "grad_norm": 0.44541163156936503, | |
| "learning_rate": 4.123848552433019e-07, | |
| "loss": 0.4375, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8862248188474525, | |
| "grad_norm": 0.49240823007466994, | |
| "learning_rate": 3.9157435179586756e-07, | |
| "loss": 0.4374, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8892598353503547, | |
| "grad_norm": 0.4733511579541881, | |
| "learning_rate": 3.712812155246759e-07, | |
| "loss": 0.4441, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.892294851853257, | |
| "grad_norm": 0.5930159408214484, | |
| "learning_rate": 3.5150772464867314e-07, | |
| "loss": 0.4441, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.8953298683561592, | |
| "grad_norm": 0.4915855850221476, | |
| "learning_rate": 3.322560990485535e-07, | |
| "loss": 0.4475, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8983648848590614, | |
| "grad_norm": 0.4400578392570043, | |
| "learning_rate": 3.135285000175531e-07, | |
| "loss": 0.437, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.9013999013619637, | |
| "grad_norm": 1.0625536086809468, | |
| "learning_rate": 2.953270300188038e-07, | |
| "loss": 0.4461, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.9044349178648659, | |
| "grad_norm": 0.47898803434881615, | |
| "learning_rate": 2.776537324493045e-07, | |
| "loss": 0.4411, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.9074699343677681, | |
| "grad_norm": 0.48013757465246243, | |
| "learning_rate": 2.6051059141051713e-07, | |
| "loss": 0.4463, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.9105049508706704, | |
| "grad_norm": 0.46792544413322756, | |
| "learning_rate": 2.4389953148561574e-07, | |
| "loss": 0.4541, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9135399673735726, | |
| "grad_norm": 0.47509286467451683, | |
| "learning_rate": 2.2782241752343004e-07, | |
| "loss": 0.4392, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.9165749838764748, | |
| "grad_norm": 0.5355760986925688, | |
| "learning_rate": 2.122810544290782e-07, | |
| "loss": 0.4459, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.919610000379377, | |
| "grad_norm": 0.4481951370049728, | |
| "learning_rate": 1.972771869613499e-07, | |
| "loss": 0.4408, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.9226450168822793, | |
| "grad_norm": 1.212596637597613, | |
| "learning_rate": 1.8281249953681633e-07, | |
| "loss": 0.4524, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.9256800333851816, | |
| "grad_norm": 0.4653062821594877, | |
| "learning_rate": 1.6888861604074158e-07, | |
| "loss": 0.4547, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.9287150498880837, | |
| "grad_norm": 0.4417864126009544, | |
| "learning_rate": 1.5550709964476606e-07, | |
| "loss": 0.4343, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.931750066390986, | |
| "grad_norm": 0.4636627611118036, | |
| "learning_rate": 1.4266945263142152e-07, | |
| "loss": 0.4442, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.9347850828938883, | |
| "grad_norm": 0.4750602636819015, | |
| "learning_rate": 1.3037711622547633e-07, | |
| "loss": 0.4402, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.9378200993967905, | |
| "grad_norm": 0.5641879257431062, | |
| "learning_rate": 1.1863147043213453e-07, | |
| "loss": 0.4463, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.9408551158996927, | |
| "grad_norm": 0.48298472160583994, | |
| "learning_rate": 1.0743383388210849e-07, | |
| "loss": 0.4509, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.9438901324025949, | |
| "grad_norm": 0.4709867848354663, | |
| "learning_rate": 9.678546368358299e-08, | |
| "loss": 0.4469, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.9469251489054972, | |
| "grad_norm": 0.45111687792423893, | |
| "learning_rate": 8.668755528108586e-08, | |
| "loss": 0.446, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.9499601654083994, | |
| "grad_norm": 0.4640080663242888, | |
| "learning_rate": 7.714124232127974e-08, | |
| "loss": 0.447, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.9529951819113016, | |
| "grad_norm": 0.4835533725431675, | |
| "learning_rate": 6.814759652569391e-08, | |
| "loss": 0.4471, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.9560301984142039, | |
| "grad_norm": 0.4775784576850005, | |
| "learning_rate": 5.970762757040339e-08, | |
| "loss": 0.4581, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.9590652149171061, | |
| "grad_norm": 0.4406150853474837, | |
| "learning_rate": 5.182228297268388e-08, | |
| "loss": 0.4377, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.9621002314200083, | |
| "grad_norm": 0.4875065749326578, | |
| "learning_rate": 4.449244798463037e-08, | |
| "loss": 0.4466, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.9651352479229106, | |
| "grad_norm": 0.5331920537623118, | |
| "learning_rate": 3.7718945493781523e-08, | |
| "loss": 0.4457, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.9681702644258128, | |
| "grad_norm": 0.4829553299763894, | |
| "learning_rate": 3.150253593073027e-08, | |
| "loss": 0.4373, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.971205280928715, | |
| "grad_norm": 0.4683361415791196, | |
| "learning_rate": 2.5843917183761002e-08, | |
| "loss": 0.4541, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9742402974316173, | |
| "grad_norm": 0.4647148561695759, | |
| "learning_rate": 2.0743724520495e-08, | |
| "loss": 0.4372, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.9772753139345195, | |
| "grad_norm": 0.45464384300389377, | |
| "learning_rate": 1.6202530516574165e-08, | |
| "loss": 0.4495, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9803103304374218, | |
| "grad_norm": 0.4823468219123769, | |
| "learning_rate": 1.222084499138243e-08, | |
| "loss": 0.4408, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.9833453469403239, | |
| "grad_norm": 0.4802790220017127, | |
| "learning_rate": 8.799114950806542e-09, | |
| "loss": 0.4419, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.9863803634432262, | |
| "grad_norm": 0.46706355617957335, | |
| "learning_rate": 5.9377245370551005e-09, | |
| "loss": 0.4453, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9894153799461285, | |
| "grad_norm": 0.440626466158207, | |
| "learning_rate": 3.636994985534159e-09, | |
| "loss": 0.4345, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.9924503964490307, | |
| "grad_norm": 0.4555769223211312, | |
| "learning_rate": 1.8971845887794105e-09, | |
| "loss": 0.4468, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.9954854129519329, | |
| "grad_norm": 0.4398220412821882, | |
| "learning_rate": 7.184886674627134e-10, | |
| "loss": 0.4384, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.9985204294548352, | |
| "grad_norm": 0.45149244572807423, | |
| "learning_rate": 1.010395484624116e-10, | |
| "loss": 0.44, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 3295, | |
| "total_flos": 8.530187773318005e+18, | |
| "train_loss": 0.4728999632081421, | |
| "train_runtime": 62323.8697, | |
| "train_samples_per_second": 6.767, | |
| "train_steps_per_second": 0.053 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3295, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.530187773318005e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |