Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-70 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-70 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-70") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-70") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-70") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-70 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-70" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-70", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-70
- SGLang
How to use furproxy/9b-70 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-70" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-70", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-70" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-70", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-70 with Docker Model Runner:
docker model run hf.co/furproxy/9b-70
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1004, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00796812749003984, | |
| "grad_norm": 0.8687085509300232, | |
| "learning_rate": 3.921568627450981e-07, | |
| "loss": 2.0886306762695312, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01593625498007968, | |
| "grad_norm": 0.8256942629814148, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 2.0069615840911865, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02390438247011952, | |
| "grad_norm": 7.175477027893066, | |
| "learning_rate": 1.96078431372549e-06, | |
| "loss": 2.1241238117218018, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03187250996015936, | |
| "grad_norm": 0.5740149617195129, | |
| "learning_rate": 2.7450980392156867e-06, | |
| "loss": 1.8192579746246338, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0398406374501992, | |
| "grad_norm": 0.9182372093200684, | |
| "learning_rate": 3.529411764705883e-06, | |
| "loss": 1.282393455505371, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04780876494023904, | |
| "grad_norm": 0.5668178796768188, | |
| "learning_rate": 4.313725490196079e-06, | |
| "loss": 1.586937665939331, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.055776892430278883, | |
| "grad_norm": 0.509973406791687, | |
| "learning_rate": 5.098039215686274e-06, | |
| "loss": 1.6420693397521973, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06374501992031872, | |
| "grad_norm": 8.057994842529297, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 1.7643235921859741, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07171314741035857, | |
| "grad_norm": 10.05883502960205, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.5502036809921265, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0796812749003984, | |
| "grad_norm": 0.6553092002868652, | |
| "learning_rate": 7.450980392156863e-06, | |
| "loss": 1.4846529960632324, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08764940239043825, | |
| "grad_norm": 0.5488642454147339, | |
| "learning_rate": 8.23529411764706e-06, | |
| "loss": 1.144375205039978, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09561752988047809, | |
| "grad_norm": 0.6479690670967102, | |
| "learning_rate": 9.019607843137256e-06, | |
| "loss": 1.2942190170288086, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10358565737051793, | |
| "grad_norm": 0.27360522747039795, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": 1.4014755487442017, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11155378486055777, | |
| "grad_norm": 0.4664228558540344, | |
| "learning_rate": 1.0588235294117648e-05, | |
| "loss": 1.6535426378250122, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11952191235059761, | |
| "grad_norm": 0.8422274589538574, | |
| "learning_rate": 1.1372549019607844e-05, | |
| "loss": 1.3123046159744263, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12749003984063745, | |
| "grad_norm": 0.6282227635383606, | |
| "learning_rate": 1.215686274509804e-05, | |
| "loss": 1.153808832168579, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13545816733067728, | |
| "grad_norm": 1.4378248453140259, | |
| "learning_rate": 1.2941176470588238e-05, | |
| "loss": 1.2372275590896606, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.14342629482071714, | |
| "grad_norm": 0.4517601728439331, | |
| "learning_rate": 1.3725490196078432e-05, | |
| "loss": 1.3791842460632324, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15139442231075698, | |
| "grad_norm": 0.9697667360305786, | |
| "learning_rate": 1.4509803921568629e-05, | |
| "loss": 1.1730167865753174, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1593625498007968, | |
| "grad_norm": 0.3961893916130066, | |
| "learning_rate": 1.5294117647058822e-05, | |
| "loss": 1.292687177658081, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16733067729083664, | |
| "grad_norm": 1.2038718461990356, | |
| "learning_rate": 1.607843137254902e-05, | |
| "loss": 0.8232792615890503, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1752988047808765, | |
| "grad_norm": 0.4270932078361511, | |
| "learning_rate": 1.686274509803922e-05, | |
| "loss": 0.9939359426498413, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.18326693227091634, | |
| "grad_norm": 0.7167245149612427, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": 0.9492533802986145, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.19123505976095617, | |
| "grad_norm": 0.45268625020980835, | |
| "learning_rate": 1.843137254901961e-05, | |
| "loss": 1.1746896505355835, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.199203187250996, | |
| "grad_norm": 0.41129186749458313, | |
| "learning_rate": 1.9215686274509807e-05, | |
| "loss": 1.3339111804962158, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20717131474103587, | |
| "grad_norm": 0.4729901850223541, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2871543169021606, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2151394422310757, | |
| "grad_norm": 1.602961778640747, | |
| "learning_rate": 1.9999804392782173e-05, | |
| "loss": 1.207393765449524, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.22310756972111553, | |
| "grad_norm": 0.46118712425231934, | |
| "learning_rate": 1.9999217579631398e-05, | |
| "loss": 1.5350557565689087, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.23107569721115537, | |
| "grad_norm": 0.8428758382797241, | |
| "learning_rate": 1.9998239586055426e-05, | |
| "loss": 1.3389298915863037, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.23904382470119523, | |
| "grad_norm": 0.30435454845428467, | |
| "learning_rate": 1.999687045456595e-05, | |
| "loss": 0.7883101105690002, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24701195219123506, | |
| "grad_norm": 0.37775376439094543, | |
| "learning_rate": 1.9995110244676744e-05, | |
| "loss": 1.3454675674438477, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2549800796812749, | |
| "grad_norm": 0.5100111365318298, | |
| "learning_rate": 1.999295903290109e-05, | |
| "loss": 1.1524405479431152, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.26294820717131473, | |
| "grad_norm": 0.3763675093650818, | |
| "learning_rate": 1.999041691274844e-05, | |
| "loss": 1.2753775119781494, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.27091633466135456, | |
| "grad_norm": 2.065988063812256, | |
| "learning_rate": 1.998748399472037e-05, | |
| "loss": 1.1413958072662354, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.2788844621513944, | |
| "grad_norm": 0.4919253885746002, | |
| "learning_rate": 1.9984160406305745e-05, | |
| "loss": 1.6083652973175049, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2868525896414343, | |
| "grad_norm": 0.816326916217804, | |
| "learning_rate": 1.9980446291975217e-05, | |
| "loss": 0.9345599412918091, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2948207171314741, | |
| "grad_norm": 0.6255055665969849, | |
| "learning_rate": 1.99763418131749e-05, | |
| "loss": 1.1615979671478271, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.30278884462151395, | |
| "grad_norm": 0.5406404733657837, | |
| "learning_rate": 1.9971847148319392e-05, | |
| "loss": 1.4970171451568604, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3107569721115538, | |
| "grad_norm": 1.2664341926574707, | |
| "learning_rate": 1.9966962492784006e-05, | |
| "loss": 1.1129828691482544, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3187250996015936, | |
| "grad_norm": 0.3932921290397644, | |
| "learning_rate": 1.996168805889627e-05, | |
| "loss": 0.9678430557250977, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.32669322709163345, | |
| "grad_norm": 0.7051851153373718, | |
| "learning_rate": 1.99560240759267e-05, | |
| "loss": 1.42896568775177, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3346613545816733, | |
| "grad_norm": 0.461885541677475, | |
| "learning_rate": 1.9949970790078847e-05, | |
| "loss": 0.9989966154098511, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3426294820717131, | |
| "grad_norm": 0.3382815718650818, | |
| "learning_rate": 1.9943528464478573e-05, | |
| "loss": 1.3530247211456299, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.350597609561753, | |
| "grad_norm": 0.6359050869941711, | |
| "learning_rate": 1.993669737916263e-05, | |
| "loss": 1.0272246599197388, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.35856573705179284, | |
| "grad_norm": 0.4157576560974121, | |
| "learning_rate": 1.9929477831066485e-05, | |
| "loss": 1.3488913774490356, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3665338645418327, | |
| "grad_norm": 0.5085843205451965, | |
| "learning_rate": 1.992187013401141e-05, | |
| "loss": 1.1709015369415283, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3745019920318725, | |
| "grad_norm": 0.3481963872909546, | |
| "learning_rate": 1.9913874618690837e-05, | |
| "loss": 1.2576426267623901, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.38247011952191234, | |
| "grad_norm": 0.5730408430099487, | |
| "learning_rate": 1.9905491632655996e-05, | |
| "loss": 1.142296314239502, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3904382470119522, | |
| "grad_norm": 0.34298205375671387, | |
| "learning_rate": 1.989672154030078e-05, | |
| "loss": 1.2693374156951904, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.398406374501992, | |
| "grad_norm": 0.34943079948425293, | |
| "learning_rate": 1.988756472284595e-05, | |
| "loss": 0.903995931148529, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4063745019920319, | |
| "grad_norm": 0.3412981927394867, | |
| "learning_rate": 1.9878021578322518e-05, | |
| "loss": 1.2715134620666504, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.41434262948207173, | |
| "grad_norm": 0.29975271224975586, | |
| "learning_rate": 1.9868092521554478e-05, | |
| "loss": 1.042019248008728, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.42231075697211157, | |
| "grad_norm": 2.0418882369995117, | |
| "learning_rate": 1.9857777984140746e-05, | |
| "loss": 0.7882804870605469, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4302788844621514, | |
| "grad_norm": 0.5299073457717896, | |
| "learning_rate": 1.9847078414436438e-05, | |
| "loss": 1.0833579301834106, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.43824701195219123, | |
| "grad_norm": 0.335245817899704, | |
| "learning_rate": 1.983599427753334e-05, | |
| "loss": 1.2959026098251343, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.44621513944223107, | |
| "grad_norm": 0.5007705688476562, | |
| "learning_rate": 1.9824526055239712e-05, | |
| "loss": 0.92479407787323, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.4541832669322709, | |
| "grad_norm": 0.5642781257629395, | |
| "learning_rate": 1.981267424605935e-05, | |
| "loss": 1.1809154748916626, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.46215139442231074, | |
| "grad_norm": 0.32401934266090393, | |
| "learning_rate": 1.98004393651699e-05, | |
| "loss": 1.2920209169387817, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.4701195219123506, | |
| "grad_norm": 0.31211525201797485, | |
| "learning_rate": 1.9787821944400477e-05, | |
| "loss": 1.3557462692260742, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.47808764940239046, | |
| "grad_norm": 0.34074831008911133, | |
| "learning_rate": 1.9774822532208537e-05, | |
| "loss": 1.2343910932540894, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4860557768924303, | |
| "grad_norm": 0.4543141722679138, | |
| "learning_rate": 1.9761441693656052e-05, | |
| "loss": 1.018692970275879, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.4940239043824701, | |
| "grad_norm": 0.7025930881500244, | |
| "learning_rate": 1.974768001038493e-05, | |
| "loss": 1.0649566650390625, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.50199203187251, | |
| "grad_norm": 0.3762131333351135, | |
| "learning_rate": 1.973353808059175e-05, | |
| "loss": 0.9526509642601013, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5099601593625498, | |
| "grad_norm": 0.33326825499534607, | |
| "learning_rate": 1.9719016519001738e-05, | |
| "loss": 1.2880756855010986, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5179282868525896, | |
| "grad_norm": 0.2846304774284363, | |
| "learning_rate": 1.9704115956842066e-05, | |
| "loss": 0.9497164487838745, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5258964143426295, | |
| "grad_norm": 0.4433225095272064, | |
| "learning_rate": 1.9688837041814396e-05, | |
| "loss": 0.9790914058685303, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5338645418326693, | |
| "grad_norm": 0.4545140564441681, | |
| "learning_rate": 1.967318043806675e-05, | |
| "loss": 0.9353604316711426, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5418326693227091, | |
| "grad_norm": 0.3878089189529419, | |
| "learning_rate": 1.965714682616461e-05, | |
| "loss": 1.3441240787506104, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.549800796812749, | |
| "grad_norm": 2.243861436843872, | |
| "learning_rate": 1.9640736903061363e-05, | |
| "loss": 0.9743496775627136, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5577689243027888, | |
| "grad_norm": 0.9255803227424622, | |
| "learning_rate": 1.9623951382067983e-05, | |
| "loss": 1.113278865814209, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5657370517928287, | |
| "grad_norm": 0.2995326519012451, | |
| "learning_rate": 1.960679099282204e-05, | |
| "loss": 1.0455888509750366, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5737051792828686, | |
| "grad_norm": 0.29048699140548706, | |
| "learning_rate": 1.9589256481255984e-05, | |
| "loss": 1.2361657619476318, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5816733067729084, | |
| "grad_norm": 0.45288383960723877, | |
| "learning_rate": 1.9571348609564697e-05, | |
| "loss": 1.191515326499939, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5896414342629482, | |
| "grad_norm": 0.4257696270942688, | |
| "learning_rate": 1.9553068156172404e-05, | |
| "loss": 1.0653226375579834, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.5976095617529881, | |
| "grad_norm": 0.8186884522438049, | |
| "learning_rate": 1.9534415915698793e-05, | |
| "loss": 0.9824154376983643, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6055776892430279, | |
| "grad_norm": 0.33720093965530396, | |
| "learning_rate": 1.9515392698924504e-05, | |
| "loss": 0.9725387096405029, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6135458167330677, | |
| "grad_norm": 0.9997106194496155, | |
| "learning_rate": 1.949599933275587e-05, | |
| "loss": 1.1494280099868774, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6215139442231076, | |
| "grad_norm": 1.5014402866363525, | |
| "learning_rate": 1.9476236660188982e-05, | |
| "loss": 1.0726919174194336, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6294820717131474, | |
| "grad_norm": 0.7345775365829468, | |
| "learning_rate": 1.9456105540273035e-05, | |
| "loss": 1.194985270500183, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6374501992031872, | |
| "grad_norm": 0.3693063259124756, | |
| "learning_rate": 1.9435606848073003e-05, | |
| "loss": 1.0574865341186523, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6454183266932271, | |
| "grad_norm": 0.16802608966827393, | |
| "learning_rate": 1.9414741474631586e-05, | |
| "loss": 0.7915345430374146, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6533864541832669, | |
| "grad_norm": 0.24361276626586914, | |
| "learning_rate": 1.939351032693048e-05, | |
| "loss": 1.1572449207305908, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6613545816733067, | |
| "grad_norm": 0.2987329065799713, | |
| "learning_rate": 1.9371914327850958e-05, | |
| "loss": 0.880090057849884, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6693227091633466, | |
| "grad_norm": 0.757393479347229, | |
| "learning_rate": 1.934995441613376e-05, | |
| "loss": 1.2498260736465454, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6772908366533864, | |
| "grad_norm": 1.6156872510910034, | |
| "learning_rate": 1.9327631546338263e-05, | |
| "loss": 1.065740704536438, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6852589641434262, | |
| "grad_norm": 0.285990446805954, | |
| "learning_rate": 1.9304946688801014e-05, | |
| "loss": 0.9508867859840393, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.6932270916334662, | |
| "grad_norm": 0.2722166180610657, | |
| "learning_rate": 1.9281900829593544e-05, | |
| "loss": 1.246326208114624, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.701195219123506, | |
| "grad_norm": 0.6919268369674683, | |
| "learning_rate": 1.9258494970479494e-05, | |
| "loss": 0.8008362054824829, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7091633466135459, | |
| "grad_norm": 0.7445635795593262, | |
| "learning_rate": 1.923473012887109e-05, | |
| "loss": 0.9828277826309204, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7171314741035857, | |
| "grad_norm": 0.55351722240448, | |
| "learning_rate": 1.9210607337784885e-05, | |
| "loss": 1.158448576927185, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7250996015936255, | |
| "grad_norm": 0.13257816433906555, | |
| "learning_rate": 1.9186127645796902e-05, | |
| "loss": 0.5361558198928833, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7330677290836654, | |
| "grad_norm": 0.24445906281471252, | |
| "learning_rate": 1.916129211699701e-05, | |
| "loss": 1.3766955137252808, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7410358565737052, | |
| "grad_norm": 0.4627974033355713, | |
| "learning_rate": 1.91361018309427e-05, | |
| "loss": 1.304077386856079, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.749003984063745, | |
| "grad_norm": 0.26658743619918823, | |
| "learning_rate": 1.911055788261214e-05, | |
| "loss": 1.151049256324768, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7569721115537849, | |
| "grad_norm": 0.3607999086380005, | |
| "learning_rate": 1.9084661382356592e-05, | |
| "loss": 0.9221289157867432, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7649402390438247, | |
| "grad_norm": 1.0667331218719482, | |
| "learning_rate": 1.9058413455852142e-05, | |
| "loss": 1.2540066242218018, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7729083665338645, | |
| "grad_norm": 0.35192084312438965, | |
| "learning_rate": 1.9031815244050752e-05, | |
| "loss": 0.9460887908935547, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.7808764940239044, | |
| "grad_norm": 0.39151957631111145, | |
| "learning_rate": 1.9004867903130694e-05, | |
| "loss": 0.6812059283256531, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.7888446215139442, | |
| "grad_norm": 0.4794232249259949, | |
| "learning_rate": 1.897757260444628e-05, | |
| "loss": 0.7969710826873779, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.796812749003984, | |
| "grad_norm": 0.4055477976799011, | |
| "learning_rate": 1.8949930534476938e-05, | |
| "loss": 1.0776989459991455, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8047808764940239, | |
| "grad_norm": 0.13282935321331024, | |
| "learning_rate": 1.892194289477565e-05, | |
| "loss": 1.0324515104293823, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8127490039840638, | |
| "grad_norm": 0.31035351753234863, | |
| "learning_rate": 1.8893610901916707e-05, | |
| "loss": 1.5635279417037964, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8207171314741036, | |
| "grad_norm": 0.34337109327316284, | |
| "learning_rate": 1.886493578744284e-05, | |
| "loss": 1.242823600769043, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8286852589641435, | |
| "grad_norm": 0.1983502358198166, | |
| "learning_rate": 1.8835918797811693e-05, | |
| "loss": 0.6256985068321228, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8366533864541833, | |
| "grad_norm": 0.34944215416908264, | |
| "learning_rate": 1.880656119434162e-05, | |
| "loss": 1.284050464630127, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8446215139442231, | |
| "grad_norm": 0.22934027016162872, | |
| "learning_rate": 1.8776864253156875e-05, | |
| "loss": 1.2318283319473267, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.852589641434263, | |
| "grad_norm": 0.2371150404214859, | |
| "learning_rate": 1.874682926513213e-05, | |
| "loss": 0.9791166186332703, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8605577689243028, | |
| "grad_norm": 1.4798812866210938, | |
| "learning_rate": 1.871645753583638e-05, | |
| "loss": 1.452410340309143, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8685258964143426, | |
| "grad_norm": 0.7958971858024597, | |
| "learning_rate": 1.8685750385476166e-05, | |
| "loss": 0.916776716709137, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.8764940239043825, | |
| "grad_norm": 1.7680853605270386, | |
| "learning_rate": 1.8654709148838218e-05, | |
| "loss": 1.2467654943466187, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8844621513944223, | |
| "grad_norm": 0.6561282873153687, | |
| "learning_rate": 1.8623335175231402e-05, | |
| "loss": 1.5111079216003418, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.8924302788844621, | |
| "grad_norm": 1.2204989194869995, | |
| "learning_rate": 1.85916298284281e-05, | |
| "loss": 0.7252726554870605, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.900398406374502, | |
| "grad_norm": 0.29036325216293335, | |
| "learning_rate": 1.8559594486604905e-05, | |
| "loss": 1.0382778644561768, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9083665338645418, | |
| "grad_norm": 0.6182889938354492, | |
| "learning_rate": 1.8527230542282724e-05, | |
| "loss": 1.1135358810424805, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9163346613545816, | |
| "grad_norm": 0.3456054925918579, | |
| "learning_rate": 1.849453940226625e-05, | |
| "loss": 1.1879663467407227, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9243027888446215, | |
| "grad_norm": 0.5696732401847839, | |
| "learning_rate": 1.8461522487582803e-05, | |
| "loss": 1.3329076766967773, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9322709163346613, | |
| "grad_norm": 0.38616132736206055, | |
| "learning_rate": 1.8428181233420565e-05, | |
| "loss": 0.8451637029647827, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9402390438247012, | |
| "grad_norm": 0.25511351227760315, | |
| "learning_rate": 1.8394517089066197e-05, | |
| "loss": 0.8644623756408691, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9482071713147411, | |
| "grad_norm": 0.2771114408969879, | |
| "learning_rate": 1.8360531517841832e-05, | |
| "loss": 0.5260741114616394, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9561752988047809, | |
| "grad_norm": 1.012864589691162, | |
| "learning_rate": 1.8326225997041482e-05, | |
| "loss": 0.9196873903274536, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9641434262948207, | |
| "grad_norm": 0.35297006368637085, | |
| "learning_rate": 1.8291602017866806e-05, | |
| "loss": 1.1390161514282227, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9721115537848606, | |
| "grad_norm": 0.30612561106681824, | |
| "learning_rate": 1.8256661085362308e-05, | |
| "loss": 1.2218761444091797, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9800796812749004, | |
| "grad_norm": 0.6584000587463379, | |
| "learning_rate": 1.8221404718349888e-05, | |
| "loss": 0.9740089774131775, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.9880478087649402, | |
| "grad_norm": 0.4018401801586151, | |
| "learning_rate": 1.8185834449362855e-05, | |
| "loss": 1.025275468826294, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.9960159362549801, | |
| "grad_norm": 1.4323194026947021, | |
| "learning_rate": 1.8149951824579283e-05, | |
| "loss": 1.0199095010757446, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.00398406374502, | |
| "grad_norm": 0.341948926448822, | |
| "learning_rate": 1.8113758403754823e-05, | |
| "loss": 0.9343042373657227, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.0119521912350598, | |
| "grad_norm": 0.38058942556381226, | |
| "learning_rate": 1.8077255760154883e-05, | |
| "loss": 0.7046458721160889, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.0199203187250996, | |
| "grad_norm": 0.5085623860359192, | |
| "learning_rate": 1.8040445480486254e-05, | |
| "loss": 1.0890356302261353, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.0278884462151394, | |
| "grad_norm": 0.4822426438331604, | |
| "learning_rate": 1.8003329164828133e-05, | |
| "loss": 1.0524687767028809, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.0358565737051793, | |
| "grad_norm": 0.38201844692230225, | |
| "learning_rate": 1.7965908426562582e-05, | |
| "loss": 0.9374608993530273, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.043824701195219, | |
| "grad_norm": 0.4644787311553955, | |
| "learning_rate": 1.7928184892304366e-05, | |
| "loss": 0.9212496876716614, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.051792828685259, | |
| "grad_norm": 0.721108078956604, | |
| "learning_rate": 1.789016020183029e-05, | |
| "loss": 1.007407546043396, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.0597609561752988, | |
| "grad_norm": 0.7453786730766296, | |
| "learning_rate": 1.7851836008007883e-05, | |
| "loss": 0.75822514295578, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.0677290836653386, | |
| "grad_norm": 1.0901387929916382, | |
| "learning_rate": 1.781321397672358e-05, | |
| "loss": 0.8778277039527893, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.0756972111553784, | |
| "grad_norm": 0.289546400308609, | |
| "learning_rate": 1.777429578681029e-05, | |
| "loss": 0.5031875371932983, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0836653386454183, | |
| "grad_norm": 0.48452669382095337, | |
| "learning_rate": 1.7735083129974423e-05, | |
| "loss": 1.1954156160354614, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.091633466135458, | |
| "grad_norm": 0.5753158330917358, | |
| "learning_rate": 1.769557771072236e-05, | |
| "loss": 0.6099209189414978, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.099601593625498, | |
| "grad_norm": 0.4469282329082489, | |
| "learning_rate": 1.7655781246286345e-05, | |
| "loss": 0.8086526989936829, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1075697211155378, | |
| "grad_norm": 0.8727370500564575, | |
| "learning_rate": 1.761569546654988e-05, | |
| "loss": 0.7694612741470337, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.1155378486055776, | |
| "grad_norm": 0.4654983580112457, | |
| "learning_rate": 1.7575322113972472e-05, | |
| "loss": 0.9192116260528564, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.1235059760956174, | |
| "grad_norm": 0.31477856636047363, | |
| "learning_rate": 1.753466294351395e-05, | |
| "loss": 1.049499750137329, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.1314741035856573, | |
| "grad_norm": 0.4698004126548767, | |
| "learning_rate": 1.7493719722558135e-05, | |
| "loss": 1.1166934967041016, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.139442231075697, | |
| "grad_norm": 0.20743758976459503, | |
| "learning_rate": 1.7452494230836034e-05, | |
| "loss": 0.3405384123325348, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.1474103585657371, | |
| "grad_norm": 0.21572747826576233, | |
| "learning_rate": 1.741098826034848e-05, | |
| "loss": 0.6110213994979858, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.155378486055777, | |
| "grad_norm": 0.9294064044952393, | |
| "learning_rate": 1.7369203615288227e-05, | |
| "loss": 0.86930912733078, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.1633466135458168, | |
| "grad_norm": 0.9067952036857605, | |
| "learning_rate": 1.7327142111961537e-05, | |
| "loss": 0.7051547169685364, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.1713147410358566, | |
| "grad_norm": 0.3776642382144928, | |
| "learning_rate": 1.7284805578709218e-05, | |
| "loss": 1.1381611824035645, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.1792828685258965, | |
| "grad_norm": 0.20979639887809753, | |
| "learning_rate": 1.724219585582716e-05, | |
| "loss": 1.2165122032165527, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.1872509960159363, | |
| "grad_norm": 0.3244563043117523, | |
| "learning_rate": 1.7199314795486325e-05, | |
| "loss": 1.2280006408691406, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.1952191235059761, | |
| "grad_norm": 0.7252885699272156, | |
| "learning_rate": 1.7156164261652247e-05, | |
| "loss": 0.6707150340080261, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.203187250996016, | |
| "grad_norm": 0.3757059574127197, | |
| "learning_rate": 1.711274613000401e-05, | |
| "loss": 0.8710638880729675, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.2111553784860558, | |
| "grad_norm": 0.30097636580467224, | |
| "learning_rate": 1.7069062287852717e-05, | |
| "loss": 0.5094844102859497, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.2191235059760956, | |
| "grad_norm": 0.48242101073265076, | |
| "learning_rate": 1.702511463405943e-05, | |
| "loss": 1.133294701576233, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.2270916334661355, | |
| "grad_norm": 0.7108588814735413, | |
| "learning_rate": 1.698090507895266e-05, | |
| "loss": 0.9296752214431763, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.2350597609561753, | |
| "grad_norm": 0.41015177965164185, | |
| "learning_rate": 1.693643554424532e-05, | |
| "loss": 1.068943738937378, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.2430278884462151, | |
| "grad_norm": 0.34054386615753174, | |
| "learning_rate": 1.6891707962951173e-05, | |
| "loss": 1.0081392526626587, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.250996015936255, | |
| "grad_norm": 2.2303950786590576, | |
| "learning_rate": 1.684672427930083e-05, | |
| "loss": 0.8653253316879272, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.2589641434262948, | |
| "grad_norm": 0.3943715989589691, | |
| "learning_rate": 1.680148644865722e-05, | |
| "loss": 0.7365275621414185, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.2669322709163346, | |
| "grad_norm": 0.33673974871635437, | |
| "learning_rate": 1.6755996437430622e-05, | |
| "loss": 0.6356416344642639, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.2749003984063745, | |
| "grad_norm": 0.9208086729049683, | |
| "learning_rate": 1.6710256222993137e-05, | |
| "loss": 0.5271879434585571, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.2828685258964143, | |
| "grad_norm": 0.45602428913116455, | |
| "learning_rate": 1.6664267793592797e-05, | |
| "loss": 0.9226617217063904, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.2908366533864541, | |
| "grad_norm": 0.5400494337081909, | |
| "learning_rate": 1.661803314826709e-05, | |
| "loss": 0.8759934902191162, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.298804780876494, | |
| "grad_norm": 1.4296598434448242, | |
| "learning_rate": 1.65715542967561e-05, | |
| "loss": 1.1430692672729492, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.3067729083665338, | |
| "grad_norm": 0.3259183168411255, | |
| "learning_rate": 1.652483325941511e-05, | |
| "loss": 0.7194448709487915, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.3147410358565736, | |
| "grad_norm": 0.40675562620162964, | |
| "learning_rate": 1.647787206712683e-05, | |
| "loss": 0.7846541404724121, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3227091633466135, | |
| "grad_norm": 0.5686507821083069, | |
| "learning_rate": 1.6430672761213065e-05, | |
| "loss": 0.8795408010482788, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.3306772908366533, | |
| "grad_norm": 0.5391749739646912, | |
| "learning_rate": 1.6383237393346025e-05, | |
| "loss": 0.5901771187782288, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.3386454183266931, | |
| "grad_norm": 1.5842124223709106, | |
| "learning_rate": 1.633556802545911e-05, | |
| "loss": 0.8455764651298523, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.3466135458167332, | |
| "grad_norm": 0.6567497253417969, | |
| "learning_rate": 1.628766672965731e-05, | |
| "loss": 1.0685632228851318, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.354581673306773, | |
| "grad_norm": 0.28743740916252136, | |
| "learning_rate": 1.6239535588127107e-05, | |
| "loss": 1.0498569011688232, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3625498007968129, | |
| "grad_norm": 0.24235428869724274, | |
| "learning_rate": 1.6191176693046e-05, | |
| "loss": 1.1290383338928223, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.3705179282868527, | |
| "grad_norm": 0.27160707116127014, | |
| "learning_rate": 1.6142592146491517e-05, | |
| "loss": 1.0615555047988892, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.3784860557768925, | |
| "grad_norm": 0.5951285362243652, | |
| "learning_rate": 1.6093784060349876e-05, | |
| "loss": 0.6895939111709595, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.3864541832669324, | |
| "grad_norm": 0.267936646938324, | |
| "learning_rate": 1.6044754556224178e-05, | |
| "loss": 1.1047388315200806, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.3944223107569722, | |
| "grad_norm": 0.2702598571777344, | |
| "learning_rate": 1.5995505765342176e-05, | |
| "loss": 0.6365596055984497, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.402390438247012, | |
| "grad_norm": 0.3268156051635742, | |
| "learning_rate": 1.594603982846364e-05, | |
| "loss": 1.1005016565322876, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.4103585657370519, | |
| "grad_norm": 0.49795475602149963, | |
| "learning_rate": 1.5896358895787304e-05, | |
| "loss": 1.080939769744873, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.4183266932270917, | |
| "grad_norm": 0.6326602101325989, | |
| "learning_rate": 1.5846465126857386e-05, | |
| "loss": 0.8446483612060547, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.4262948207171315, | |
| "grad_norm": 0.3423049747943878, | |
| "learning_rate": 1.579636069046975e-05, | |
| "loss": 1.112828016281128, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.4342629482071714, | |
| "grad_norm": 0.331617534160614, | |
| "learning_rate": 1.5746047764577586e-05, | |
| "loss": 0.556983470916748, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.4422310756972112, | |
| "grad_norm": 0.7952442169189453, | |
| "learning_rate": 1.5695528536196774e-05, | |
| "loss": 0.7050241827964783, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.450199203187251, | |
| "grad_norm": 0.49163997173309326, | |
| "learning_rate": 1.564480520131081e-05, | |
| "loss": 1.011673927307129, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.4581673306772909, | |
| "grad_norm": 0.5051209330558777, | |
| "learning_rate": 1.559387996477534e-05, | |
| "loss": 1.0447050333023071, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.4661354581673307, | |
| "grad_norm": 0.4506257474422455, | |
| "learning_rate": 1.5542755040222325e-05, | |
| "loss": 0.5882746577262878, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.4741035856573705, | |
| "grad_norm": 0.3848789930343628, | |
| "learning_rate": 1.5491432649963823e-05, | |
| "loss": 0.6615412831306458, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.4820717131474104, | |
| "grad_norm": 1.171698808670044, | |
| "learning_rate": 1.543991502489538e-05, | |
| "loss": 1.2864530086517334, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.4900398406374502, | |
| "grad_norm": 0.4419739544391632, | |
| "learning_rate": 1.5388204404399073e-05, | |
| "loss": 1.0378817319869995, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.49800796812749, | |
| "grad_norm": 0.23308596014976501, | |
| "learning_rate": 1.533630303624614e-05, | |
| "loss": 1.0560197830200195, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.5059760956175299, | |
| "grad_norm": 0.8023196458816528, | |
| "learning_rate": 1.5284213176499302e-05, | |
| "loss": 0.8570104241371155, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.5139442231075697, | |
| "grad_norm": 0.3250346779823303, | |
| "learning_rate": 1.5231937089414675e-05, | |
| "loss": 1.092508316040039, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.5219123505976095, | |
| "grad_norm": 0.25869283080101013, | |
| "learning_rate": 1.517947704734337e-05, | |
| "loss": 0.6813873648643494, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.5298804780876494, | |
| "grad_norm": 0.28996741771698, | |
| "learning_rate": 1.5126835330632677e-05, | |
| "loss": 0.6645854115486145, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.5378486055776892, | |
| "grad_norm": 1.5717236995697021, | |
| "learning_rate": 1.5074014227526997e-05, | |
| "loss": 0.9236294031143188, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.545816733067729, | |
| "grad_norm": 0.32109910249710083, | |
| "learning_rate": 1.502101603406833e-05, | |
| "loss": 1.0823489427566528, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.5537848605577689, | |
| "grad_norm": 0.7276486158370972, | |
| "learning_rate": 1.4967843053996494e-05, | |
| "loss": 0.5451988577842712, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.5617529880478087, | |
| "grad_norm": 0.3085494041442871, | |
| "learning_rate": 1.4914497598648973e-05, | |
| "loss": 0.5751717686653137, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.5697211155378485, | |
| "grad_norm": 1.1126803159713745, | |
| "learning_rate": 1.4860981986860464e-05, | |
| "loss": 1.058145523071289, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.5776892430278884, | |
| "grad_norm": 0.6459108591079712, | |
| "learning_rate": 1.4807298544862054e-05, | |
| "loss": 0.9958094358444214, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.5856573705179282, | |
| "grad_norm": 0.20127789676189423, | |
| "learning_rate": 1.4753449606180137e-05, | |
| "loss": 1.0852116346359253, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.593625498007968, | |
| "grad_norm": 0.3237609565258026, | |
| "learning_rate": 1.4699437511534947e-05, | |
| "loss": 0.6027621030807495, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6015936254980079, | |
| "grad_norm": 0.4658999443054199, | |
| "learning_rate": 1.4645264608738832e-05, | |
| "loss": 1.0461393594741821, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.6095617529880477, | |
| "grad_norm": 0.22849686443805695, | |
| "learning_rate": 1.4590933252594193e-05, | |
| "loss": 1.1783668994903564, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.6175298804780875, | |
| "grad_norm": 0.464072585105896, | |
| "learning_rate": 1.4536445804791125e-05, | |
| "loss": 0.699569582939148, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.6254980079681274, | |
| "grad_norm": 0.18738897144794464, | |
| "learning_rate": 1.4481804633804746e-05, | |
| "loss": 0.5158436894416809, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.6334661354581672, | |
| "grad_norm": 0.4792879521846771, | |
| "learning_rate": 1.4427012114792269e-05, | |
| "loss": 0.7027358412742615, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.641434262948207, | |
| "grad_norm": 0.2847132980823517, | |
| "learning_rate": 1.4372070629489744e-05, | |
| "loss": 0.4589526355266571, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.6494023904382469, | |
| "grad_norm": 0.16233396530151367, | |
| "learning_rate": 1.4316982566108515e-05, | |
| "loss": 0.7499012351036072, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.6573705179282867, | |
| "grad_norm": 0.34753382205963135, | |
| "learning_rate": 1.4261750319231432e-05, | |
| "loss": 0.7557674050331116, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.6653386454183265, | |
| "grad_norm": 0.5319584608078003, | |
| "learning_rate": 1.4206376289708752e-05, | |
| "loss": 0.8566523194313049, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.6733067729083664, | |
| "grad_norm": 0.291802316904068, | |
| "learning_rate": 1.4150862884553776e-05, | |
| "loss": 1.1130759716033936, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.6812749003984062, | |
| "grad_norm": 0.19954712688922882, | |
| "learning_rate": 1.4095212516838216e-05, | |
| "loss": 0.9514334201812744, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.6892430278884463, | |
| "grad_norm": 0.3112265467643738, | |
| "learning_rate": 1.4039427605587326e-05, | |
| "loss": 0.5233280062675476, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.697211155378486, | |
| "grad_norm": 0.20384936034679413, | |
| "learning_rate": 1.398351057567472e-05, | |
| "loss": 1.1268967390060425, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.705179282868526, | |
| "grad_norm": 0.21102385222911835, | |
| "learning_rate": 1.392746385771699e-05, | |
| "loss": 1.063062071800232, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.7131474103585658, | |
| "grad_norm": 0.3036285936832428, | |
| "learning_rate": 1.3871289887968034e-05, | |
| "loss": 0.6780144572257996, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.7211155378486056, | |
| "grad_norm": 0.7043957114219666, | |
| "learning_rate": 1.3814991108213173e-05, | |
| "loss": 0.8546534776687622, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.7290836653386454, | |
| "grad_norm": 0.360525518655777, | |
| "learning_rate": 1.3758569965663011e-05, | |
| "loss": 0.6243395805358887, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.7370517928286853, | |
| "grad_norm": 0.8308858871459961, | |
| "learning_rate": 1.370202891284703e-05, | |
| "loss": 0.43398115038871765, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.745019920318725, | |
| "grad_norm": 0.3703770935535431, | |
| "learning_rate": 1.3645370407507017e-05, | |
| "loss": 0.738655149936676, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.752988047808765, | |
| "grad_norm": 2.820668935775757, | |
| "learning_rate": 1.3588596912490222e-05, | |
| "loss": 0.7261440753936768, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.7609561752988048, | |
| "grad_norm": 0.43590405583381653, | |
| "learning_rate": 1.3531710895642288e-05, | |
| "loss": 1.3262492418289185, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.7689243027888446, | |
| "grad_norm": 0.34120041131973267, | |
| "learning_rate": 1.3474714829699987e-05, | |
| "loss": 0.7070552706718445, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.7768924302788844, | |
| "grad_norm": 0.18316373229026794, | |
| "learning_rate": 1.341761119218374e-05, | |
| "loss": 1.004502773284912, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.7848605577689243, | |
| "grad_norm": 0.3774756193161011, | |
| "learning_rate": 1.3360402465289916e-05, | |
| "loss": 1.0533740520477295, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.792828685258964, | |
| "grad_norm": 0.21326151490211487, | |
| "learning_rate": 1.3303091135782942e-05, | |
| "loss": 0.8273346424102783, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.800796812749004, | |
| "grad_norm": 0.2267659306526184, | |
| "learning_rate": 1.3245679694887192e-05, | |
| "loss": 0.42468857765197754, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.8087649402390438, | |
| "grad_norm": 0.5060834288597107, | |
| "learning_rate": 1.318817063817872e-05, | |
| "loss": 0.8974787592887878, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.8167330677290838, | |
| "grad_norm": 0.48161670565605164, | |
| "learning_rate": 1.3130566465476773e-05, | |
| "loss": 0.6163372993469238, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.8247011952191237, | |
| "grad_norm": 0.2777350842952728, | |
| "learning_rate": 1.307286968073511e-05, | |
| "loss": 0.9941050410270691, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.8326693227091635, | |
| "grad_norm": 0.488695353269577, | |
| "learning_rate": 1.3015082791933182e-05, | |
| "loss": 0.7475647926330566, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.8406374501992033, | |
| "grad_norm": 0.24865223467350006, | |
| "learning_rate": 1.295720831096712e-05, | |
| "loss": 0.8023468255996704, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.8486055776892432, | |
| "grad_norm": 0.17197415232658386, | |
| "learning_rate": 1.2899248753540518e-05, | |
| "loss": 0.1325809210538864, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.856573705179283, | |
| "grad_norm": 1.5523875951766968, | |
| "learning_rate": 1.2841206639055108e-05, | |
| "loss": 1.1007118225097656, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.8645418326693228, | |
| "grad_norm": 0.5094614624977112, | |
| "learning_rate": 1.2783084490501229e-05, | |
| "loss": 0.514072060585022, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.8725099601593627, | |
| "grad_norm": 0.35147467255592346, | |
| "learning_rate": 1.2724884834348163e-05, | |
| "loss": 1.0952625274658203, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.8804780876494025, | |
| "grad_norm": 0.36751729249954224, | |
| "learning_rate": 1.266661020043432e-05, | |
| "loss": 0.9377644658088684, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.8884462151394423, | |
| "grad_norm": 0.4373418092727661, | |
| "learning_rate": 1.2608263121857244e-05, | |
| "loss": 0.9737136960029602, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.8964143426294822, | |
| "grad_norm": 0.19097857177257538, | |
| "learning_rate": 1.254984613486355e-05, | |
| "loss": 0.9451252222061157, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.904382470119522, | |
| "grad_norm": 0.42185327410697937, | |
| "learning_rate": 1.2491361778738631e-05, | |
| "loss": 0.7926995158195496, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.9123505976095618, | |
| "grad_norm": 0.5808318257331848, | |
| "learning_rate": 1.243281259569631e-05, | |
| "loss": 0.5798477530479431, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.9203187250996017, | |
| "grad_norm": 0.5500463247299194, | |
| "learning_rate": 1.2374201130768325e-05, | |
| "loss": 0.7669761180877686, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.9282868525896415, | |
| "grad_norm": 0.21460963785648346, | |
| "learning_rate": 1.2315529931693682e-05, | |
| "loss": 1.054688811302185, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.9362549800796813, | |
| "grad_norm": 0.2985857129096985, | |
| "learning_rate": 1.2256801548807948e-05, | |
| "loss": 0.9884510040283203, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.9442231075697212, | |
| "grad_norm": 0.22740538418293, | |
| "learning_rate": 1.2198018534932367e-05, | |
| "loss": 1.0773851871490479, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.952191235059761, | |
| "grad_norm": 0.1833743005990982, | |
| "learning_rate": 1.2139183445262892e-05, | |
| "loss": 1.0440764427185059, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.9601593625498008, | |
| "grad_norm": 0.26161497831344604, | |
| "learning_rate": 1.2080298837259124e-05, | |
| "loss": 0.7149063348770142, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.9681274900398407, | |
| "grad_norm": 2.253765821456909, | |
| "learning_rate": 1.2021367270533138e-05, | |
| "loss": 0.7989740967750549, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.9760956175298805, | |
| "grad_norm": 0.28019073605537415, | |
| "learning_rate": 1.1962391306738226e-05, | |
| "loss": 1.0134761333465576, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.9840637450199203, | |
| "grad_norm": 0.6458740234375, | |
| "learning_rate": 1.190337350945754e-05, | |
| "loss": 0.823985755443573, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.9920318725099602, | |
| "grad_norm": 0.298125684261322, | |
| "learning_rate": 1.1844316444092667e-05, | |
| "loss": 1.132564902305603, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.3069063127040863, | |
| "learning_rate": 1.1785222677752105e-05, | |
| "loss": 0.6213638782501221, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.00796812749004, | |
| "grad_norm": 0.2165108621120453, | |
| "learning_rate": 1.1726094779139685e-05, | |
| "loss": 0.5043126344680786, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.0159362549800797, | |
| "grad_norm": 0.4651063084602356, | |
| "learning_rate": 1.1666935318442905e-05, | |
| "loss": 0.44121354818344116, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.0239043824701195, | |
| "grad_norm": 0.08969016373157501, | |
| "learning_rate": 1.1607746867221208e-05, | |
| "loss": 0.4672836661338806, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.0318725099601593, | |
| "grad_norm": 0.38188546895980835, | |
| "learning_rate": 1.1548531998294217e-05, | |
| "loss": 0.7249612808227539, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.039840637450199, | |
| "grad_norm": 0.2776472866535187, | |
| "learning_rate": 1.148929328562987e-05, | |
| "loss": 0.6245005130767822, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.047808764940239, | |
| "grad_norm": 0.6827823519706726, | |
| "learning_rate": 1.1430033304232569e-05, | |
| "loss": 0.3463484048843384, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.055776892430279, | |
| "grad_norm": 0.4058661162853241, | |
| "learning_rate": 1.137075463003122e-05, | |
| "loss": 0.7836011052131653, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.0637450199203187, | |
| "grad_norm": 12.217242240905762, | |
| "learning_rate": 1.1311459839767278e-05, | |
| "loss": 0.4506489038467407, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.0717131474103585, | |
| "grad_norm": 3.7635204792022705, | |
| "learning_rate": 1.1252151510882731e-05, | |
| "loss": 0.6491232514381409, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.0796812749003983, | |
| "grad_norm": 0.39710062742233276, | |
| "learning_rate": 1.1192832221408068e-05, | |
| "loss": 0.8215765953063965, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.087649402390438, | |
| "grad_norm": 0.4940469563007355, | |
| "learning_rate": 1.1133504549850218e-05, | |
| "loss": 0.7698899507522583, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.095617529880478, | |
| "grad_norm": 0.536239504814148, | |
| "learning_rate": 1.1074171075080469e-05, | |
| "loss": 0.36201098561286926, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.103585657370518, | |
| "grad_norm": 0.8106605410575867, | |
| "learning_rate": 1.1014834376222351e-05, | |
| "loss": 0.5792776942253113, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.1115537848605577, | |
| "grad_norm": 0.15770025551319122, | |
| "learning_rate": 1.0955497032539557e-05, | |
| "loss": 0.44848746061325073, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.1195219123505975, | |
| "grad_norm": 0.24326692521572113, | |
| "learning_rate": 1.0896161623323795e-05, | |
| "loss": 0.1985977292060852, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.1274900398406373, | |
| "grad_norm": 1.5041277408599854, | |
| "learning_rate": 1.0836830727782692e-05, | |
| "loss": 0.612493634223938, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.135458167330677, | |
| "grad_norm": 1.0610326528549194, | |
| "learning_rate": 1.0777506924927667e-05, | |
| "loss": 0.6036497950553894, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.143426294820717, | |
| "grad_norm": 0.6076405644416809, | |
| "learning_rate": 1.0718192793461849e-05, | |
| "loss": 0.6230844855308533, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.151394422310757, | |
| "grad_norm": 0.36088430881500244, | |
| "learning_rate": 1.065889091166795e-05, | |
| "loss": 0.7613107562065125, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.1593625498007967, | |
| "grad_norm": 0.3054327964782715, | |
| "learning_rate": 1.059960385729621e-05, | |
| "loss": 0.6941779851913452, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.1673306772908365, | |
| "grad_norm": 0.24954797327518463, | |
| "learning_rate": 1.0540334207452363e-05, | |
| "loss": 0.47066619992256165, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.1752988047808763, | |
| "grad_norm": 0.2986539900302887, | |
| "learning_rate": 1.0481084538485589e-05, | |
| "loss": 0.5359241366386414, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.183266932270916, | |
| "grad_norm": 0.3956416845321655, | |
| "learning_rate": 1.0421857425876524e-05, | |
| "loss": 0.7813495397567749, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.191235059760956, | |
| "grad_norm": 0.3207624852657318, | |
| "learning_rate": 1.0362655444125334e-05, | |
| "loss": 0.8096965551376343, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.199203187250996, | |
| "grad_norm": 0.4274860620498657, | |
| "learning_rate": 1.0303481166639784e-05, | |
| "loss": 0.509590208530426, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.2071713147410357, | |
| "grad_norm": 0.5321890711784363, | |
| "learning_rate": 1.0244337165623378e-05, | |
| "loss": 0.453169047832489, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.2151394422310755, | |
| "grad_norm": 0.5689948797225952, | |
| "learning_rate": 1.0185226011963563e-05, | |
| "loss": 0.6031365990638733, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.2231075697211153, | |
| "grad_norm": 2.9450175762176514, | |
| "learning_rate": 1.0126150275119958e-05, | |
| "loss": 0.8096473813056946, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.231075697211155, | |
| "grad_norm": 0.4231691062450409, | |
| "learning_rate": 1.0067112523012686e-05, | |
| "loss": 0.7732781767845154, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.239043824701195, | |
| "grad_norm": 0.31910139322280884, | |
| "learning_rate": 1.0008115321910729e-05, | |
| "loss": 0.8720684051513672, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.247011952191235, | |
| "grad_norm": 6.204998016357422, | |
| "learning_rate": 9.949161236320388e-06, | |
| "loss": 0.5987430214881897, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.2549800796812747, | |
| "grad_norm": 0.29381808638572693, | |
| "learning_rate": 9.890252828873816e-06, | |
| "loss": 0.4853253960609436, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.2629482071713145, | |
| "grad_norm": 0.2391190379858017, | |
| "learning_rate": 9.831392660217608e-06, | |
| "loss": 0.6356629729270935, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.2709163346613543, | |
| "grad_norm": 0.35715413093566895, | |
| "learning_rate": 9.772583288901502e-06, | |
| "loss": 0.7994184494018555, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.278884462151394, | |
| "grad_norm": 0.8083705306053162, | |
| "learning_rate": 9.713827271267169e-06, | |
| "loss": 0.5638254284858704, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.2868525896414345, | |
| "grad_norm": 0.25096696615219116, | |
| "learning_rate": 9.655127161337088e-06, | |
| "loss": 0.5889987349510193, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.2948207171314743, | |
| "grad_norm": 0.2611815333366394, | |
| "learning_rate": 9.59648551070351e-06, | |
| "loss": 0.2555178701877594, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.302788844621514, | |
| "grad_norm": 0.5718734264373779, | |
| "learning_rate": 9.53790486841759e-06, | |
| "loss": 0.5340811014175415, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.310756972111554, | |
| "grad_norm": 0.3825013339519501, | |
| "learning_rate": 9.479387780878531e-06, | |
| "loss": 0.5315930247306824, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.318725099601594, | |
| "grad_norm": 0.685142457485199, | |
| "learning_rate": 9.42093679172294e-06, | |
| "loss": 0.4931807219982147, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.3266932270916336, | |
| "grad_norm": 0.657268762588501, | |
| "learning_rate": 9.362554441714216e-06, | |
| "loss": 0.624793529510498, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.3346613545816735, | |
| "grad_norm": 0.36978161334991455, | |
| "learning_rate": 9.304243268632165e-06, | |
| "loss": 0.5345178842544556, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.3426294820717133, | |
| "grad_norm": 0.6148081421852112, | |
| "learning_rate": 9.246005807162632e-06, | |
| "loss": 0.6727567911148071, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.350597609561753, | |
| "grad_norm": 0.299528568983078, | |
| "learning_rate": 9.187844588787354e-06, | |
| "loss": 0.7979969382286072, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.358565737051793, | |
| "grad_norm": 0.37552663683891296, | |
| "learning_rate": 9.12976214167391e-06, | |
| "loss": 0.8545841574668884, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.366533864541833, | |
| "grad_norm": 0.31660106778144836, | |
| "learning_rate": 9.071760990565832e-06, | |
| "loss": 0.5427640676498413, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.3745019920318726, | |
| "grad_norm": 0.2705290913581848, | |
| "learning_rate": 9.013843656672854e-06, | |
| "loss": 0.7104488015174866, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.3824701195219125, | |
| "grad_norm": 0.27716466784477234, | |
| "learning_rate": 8.956012657561314e-06, | |
| "loss": 0.5637896060943604, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.3904382470119523, | |
| "grad_norm": 2.9362845420837402, | |
| "learning_rate": 8.89827050704474e-06, | |
| "loss": 0.6183806657791138, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.398406374501992, | |
| "grad_norm": 0.2689732313156128, | |
| "learning_rate": 8.840619715074553e-06, | |
| "loss": 0.7946896553039551, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.406374501992032, | |
| "grad_norm": 0.23491685092449188, | |
| "learning_rate": 8.78306278763098e-06, | |
| "loss": 0.4485781192779541, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.414342629482072, | |
| "grad_norm": 0.9759865999221802, | |
| "learning_rate": 8.725602226614121e-06, | |
| "loss": 0.2319762110710144, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.4223107569721116, | |
| "grad_norm": 0.15988869965076447, | |
| "learning_rate": 8.668240529735192e-06, | |
| "loss": 0.3493569493293762, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.4302788844621515, | |
| "grad_norm": 0.17206406593322754, | |
| "learning_rate": 8.610980190407958e-06, | |
| "loss": 0.6467002630233765, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.4382470119521913, | |
| "grad_norm": 0.25553181767463684, | |
| "learning_rate": 8.55382369764034e-06, | |
| "loss": 0.583063006401062, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.446215139442231, | |
| "grad_norm": 0.3950878381729126, | |
| "learning_rate": 8.496773535926242e-06, | |
| "loss": 0.46504709124565125, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.454183266932271, | |
| "grad_norm": 0.26962918043136597, | |
| "learning_rate": 8.439832185137529e-06, | |
| "loss": 0.6895124912261963, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.462151394422311, | |
| "grad_norm": 0.41879427433013916, | |
| "learning_rate": 8.383002120416241e-06, | |
| "loss": 0.5566327571868896, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.4701195219123506, | |
| "grad_norm": 1.4798431396484375, | |
| "learning_rate": 8.326285812067023e-06, | |
| "loss": 0.6534566879272461, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.4780876494023905, | |
| "grad_norm": 0.307312548160553, | |
| "learning_rate": 8.269685725449708e-06, | |
| "loss": 0.6504800915718079, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.4860557768924303, | |
| "grad_norm": 0.5427370071411133, | |
| "learning_rate": 8.213204320872176e-06, | |
| "loss": 0.6383635997772217, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.49402390438247, | |
| "grad_norm": 0.26320162415504456, | |
| "learning_rate": 8.156844053483412e-06, | |
| "loss": 0.4011181890964508, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.50199203187251, | |
| "grad_norm": 0.37865692377090454, | |
| "learning_rate": 8.100607373166773e-06, | |
| "loss": 0.7987120151519775, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.50996015936255, | |
| "grad_norm": 0.4195093512535095, | |
| "learning_rate": 8.044496724433493e-06, | |
| "loss": 0.2998882532119751, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.5179282868525896, | |
| "grad_norm": 0.9238357543945312, | |
| "learning_rate": 7.988514546316437e-06, | |
| "loss": 0.34079158306121826, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.5258964143426295, | |
| "grad_norm": 0.27209165692329407, | |
| "learning_rate": 7.932663272264079e-06, | |
| "loss": 0.8152284622192383, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.5338645418326693, | |
| "grad_norm": 0.3044971227645874, | |
| "learning_rate": 7.87694533003471e-06, | |
| "loss": 0.7516045570373535, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.541832669322709, | |
| "grad_norm": 0.5729214549064636, | |
| "learning_rate": 7.82136314159092e-06, | |
| "loss": 0.8303281664848328, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.549800796812749, | |
| "grad_norm": 0.19276340305805206, | |
| "learning_rate": 7.765919122994319e-06, | |
| "loss": 0.29311320185661316, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.557768924302789, | |
| "grad_norm": 0.4937683045864105, | |
| "learning_rate": 7.710615684300518e-06, | |
| "loss": 0.7855862379074097, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.5657370517928286, | |
| "grad_norm": 0.2187207043170929, | |
| "learning_rate": 7.655455229454354e-06, | |
| "loss": 0.7239468097686768, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.5737051792828685, | |
| "grad_norm": 1.5033259391784668, | |
| "learning_rate": 7.6004401561854e-06, | |
| "loss": 0.5276362299919128, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.5816733067729083, | |
| "grad_norm": 0.33054134249687195, | |
| "learning_rate": 7.545572855903756e-06, | |
| "loss": 0.7969309687614441, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.589641434262948, | |
| "grad_norm": 0.3152036666870117, | |
| "learning_rate": 7.490855713596081e-06, | |
| "loss": 0.6026449203491211, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.597609561752988, | |
| "grad_norm": 0.33109569549560547, | |
| "learning_rate": 7.4362911077219155e-06, | |
| "loss": 0.5262701511383057, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.605577689243028, | |
| "grad_norm": 0.29034096002578735, | |
| "learning_rate": 7.381881410110326e-06, | |
| "loss": 0.6021281480789185, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.6135458167330676, | |
| "grad_norm": 0.25865310430526733, | |
| "learning_rate": 7.327628985856765e-06, | |
| "loss": 0.25670185685157776, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.6215139442231075, | |
| "grad_norm": 0.39587166905403137, | |
| "learning_rate": 7.2735361932202965e-06, | |
| "loss": 0.47300997376441956, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.6294820717131473, | |
| "grad_norm": 0.32333165407180786, | |
| "learning_rate": 7.219605383521067e-06, | |
| "loss": 0.6984432935714722, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.637450199203187, | |
| "grad_norm": 0.8259761929512024, | |
| "learning_rate": 7.165838901038107e-06, | |
| "loss": 0.5749056935310364, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.645418326693227, | |
| "grad_norm": 1.1761703491210938, | |
| "learning_rate": 7.112239082907433e-06, | |
| "loss": 0.452744722366333, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.653386454183267, | |
| "grad_norm": 0.3080389201641083, | |
| "learning_rate": 7.058808259020442e-06, | |
| "loss": 0.7340803146362305, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.6613545816733066, | |
| "grad_norm": 0.13718551397323608, | |
| "learning_rate": 7.005548751922642e-06, | |
| "loss": 0.5631315112113953, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.6693227091633465, | |
| "grad_norm": 0.48113957047462463, | |
| "learning_rate": 6.952462876712707e-06, | |
| "loss": 0.5454351305961609, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.6772908366533863, | |
| "grad_norm": 0.10773833841085434, | |
| "learning_rate": 6.899552940941829e-06, | |
| "loss": 0.27837103605270386, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.685258964143426, | |
| "grad_norm": 0.22576193511486053, | |
| "learning_rate": 6.846821244513411e-06, | |
| "loss": 0.8114858865737915, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.6932270916334664, | |
| "grad_norm": 0.784417450428009, | |
| "learning_rate": 6.794270079583101e-06, | |
| "loss": 0.5748249292373657, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.7011952191235062, | |
| "grad_norm": 0.3679204285144806, | |
| "learning_rate": 6.741901730459166e-06, | |
| "loss": 0.4891148507595062, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.709163346613546, | |
| "grad_norm": 0.29442018270492554, | |
| "learning_rate": 6.68971847350317e-06, | |
| "loss": 0.8089858293533325, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.717131474103586, | |
| "grad_norm": 0.22440965473651886, | |
| "learning_rate": 6.6377225770310514e-06, | |
| "loss": 0.8312827348709106, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.7250996015936257, | |
| "grad_norm": 0.8225514888763428, | |
| "learning_rate": 6.585916301214519e-06, | |
| "loss": 0.5896182656288147, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.7330677290836656, | |
| "grad_norm": 3.1597442626953125, | |
| "learning_rate": 6.534301897982774e-06, | |
| "loss": 0.3686487376689911, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.7410358565737054, | |
| "grad_norm": 0.24462340772151947, | |
| "learning_rate": 6.482881610924674e-06, | |
| "loss": 0.8054239749908447, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.7490039840637452, | |
| "grad_norm": 1.4142591953277588, | |
| "learning_rate": 6.43165767519117e-06, | |
| "loss": 0.4302287697792053, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.756972111553785, | |
| "grad_norm": 0.3220599293708801, | |
| "learning_rate": 6.380632317398168e-06, | |
| "loss": 0.9143630862236023, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.764940239043825, | |
| "grad_norm": 0.6856467723846436, | |
| "learning_rate": 6.3298077555297245e-06, | |
| "loss": 0.6507794857025146, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.7729083665338647, | |
| "grad_norm": 0.4382452070713043, | |
| "learning_rate": 6.279186198841647e-06, | |
| "loss": 0.3969390094280243, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.7808764940239046, | |
| "grad_norm": 0.2783852219581604, | |
| "learning_rate": 6.228769847765468e-06, | |
| "loss": 0.626663327217102, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.7888446215139444, | |
| "grad_norm": 0.2537877857685089, | |
| "learning_rate": 6.178560893812774e-06, | |
| "loss": 0.8403069376945496, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.7968127490039842, | |
| "grad_norm": 0.1850716918706894, | |
| "learning_rate": 6.1285615194799695e-06, | |
| "loss": 0.48170754313468933, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.804780876494024, | |
| "grad_norm": 0.4740298390388489, | |
| "learning_rate": 6.0787738981533825e-06, | |
| "loss": 0.8492451906204224, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.812749003984064, | |
| "grad_norm": 0.20938153564929962, | |
| "learning_rate": 6.0292001940148174e-06, | |
| "loss": 0.7743903398513794, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.8207171314741037, | |
| "grad_norm": 0.3462710380554199, | |
| "learning_rate": 5.979842561947455e-06, | |
| "loss": 0.8912703990936279, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.8286852589641436, | |
| "grad_norm": 0.2809637188911438, | |
| "learning_rate": 5.9307031474422074e-06, | |
| "loss": 0.43541547656059265, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.8366533864541834, | |
| "grad_norm": 0.20937590301036835, | |
| "learning_rate": 5.88178408650445e-06, | |
| "loss": 0.9104054570198059, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.8446215139442232, | |
| "grad_norm": 0.2386903315782547, | |
| "learning_rate": 5.833087505561148e-06, | |
| "loss": 0.5239242315292358, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.852589641434263, | |
| "grad_norm": 0.3977036476135254, | |
| "learning_rate": 5.784615521368468e-06, | |
| "loss": 0.7960683107376099, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.860557768924303, | |
| "grad_norm": 0.9357836842536926, | |
| "learning_rate": 5.736370240919735e-06, | |
| "loss": 0.5691960453987122, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.8685258964143427, | |
| "grad_norm": 0.204420268535614, | |
| "learning_rate": 5.688353761353862e-06, | |
| "loss": 0.49336379766464233, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.8764940239043826, | |
| "grad_norm": 0.2908172607421875, | |
| "learning_rate": 5.640568169864173e-06, | |
| "loss": 0.4811123311519623, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.8844621513944224, | |
| "grad_norm": 0.5298336148262024, | |
| "learning_rate": 5.5930155436076875e-06, | |
| "loss": 0.565929114818573, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.8924302788844622, | |
| "grad_norm": 0.1939915418624878, | |
| "learning_rate": 5.5456979496148385e-06, | |
| "loss": 0.7743662595748901, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.900398406374502, | |
| "grad_norm": 0.2288486510515213, | |
| "learning_rate": 5.498617444699603e-06, | |
| "loss": 0.46328917145729065, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.908366533864542, | |
| "grad_norm": 0.35470640659332275, | |
| "learning_rate": 5.451776075370114e-06, | |
| "loss": 0.7746002674102783, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.9163346613545817, | |
| "grad_norm": 0.39310505986213684, | |
| "learning_rate": 5.405175877739684e-06, | |
| "loss": 0.22616282105445862, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.9243027888446216, | |
| "grad_norm": 0.27109962701797485, | |
| "learning_rate": 5.3588188774383235e-06, | |
| "loss": 0.8417502641677856, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.9322709163346614, | |
| "grad_norm": 1.1222072839736938, | |
| "learning_rate": 5.3127070895246604e-06, | |
| "loss": 0.3744705617427826, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.9402390438247012, | |
| "grad_norm": 0.9205291867256165, | |
| "learning_rate": 5.266842518398369e-06, | |
| "loss": 0.5713416934013367, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.948207171314741, | |
| "grad_norm": 0.26441994309425354, | |
| "learning_rate": 5.2212271577130495e-06, | |
| "loss": 0.848888635635376, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.956175298804781, | |
| "grad_norm": 0.9064684510231018, | |
| "learning_rate": 5.1758629902895375e-06, | |
| "loss": 0.25246933102607727, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.9641434262948207, | |
| "grad_norm": 0.6127145886421204, | |
| "learning_rate": 5.1307519880297385e-06, | |
| "loss": 0.6468943953514099, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.9721115537848606, | |
| "grad_norm": 0.4546615779399872, | |
| "learning_rate": 5.085896111830908e-06, | |
| "loss": 0.6896576881408691, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.9800796812749004, | |
| "grad_norm": 0.5989493727684021, | |
| "learning_rate": 5.041297311500417e-06, | |
| "loss": 0.3880175054073334, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.9880478087649402, | |
| "grad_norm": 0.24675695598125458, | |
| "learning_rate": 4.996957525670984e-06, | |
| "loss": 0.12411849945783615, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.99601593625498, | |
| "grad_norm": 0.26221850514411926, | |
| "learning_rate": 4.952878681716416e-06, | |
| "loss": 0.7962881922721863, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.00398406374502, | |
| "grad_norm": 0.34225115180015564, | |
| "learning_rate": 4.9090626956678355e-06, | |
| "loss": 0.3844722807407379, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.0119521912350598, | |
| "grad_norm": 0.3443976640701294, | |
| "learning_rate": 4.865511472130379e-06, | |
| "loss": 0.35608866810798645, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.0199203187250996, | |
| "grad_norm": 0.2815234661102295, | |
| "learning_rate": 4.822226904200422e-06, | |
| "loss": 0.17388103902339935, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.0278884462151394, | |
| "grad_norm": 0.29143962264060974, | |
| "learning_rate": 4.779210873383273e-06, | |
| "loss": 0.3388209342956543, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.0358565737051793, | |
| "grad_norm": 1.1175932884216309, | |
| "learning_rate": 4.7364652495114e-06, | |
| "loss": 0.3835744559764862, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.043824701195219, | |
| "grad_norm": 0.3416759967803955, | |
| "learning_rate": 4.693991890663152e-06, | |
| "loss": 0.5161765813827515, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.051792828685259, | |
| "grad_norm": 0.2444799840450287, | |
| "learning_rate": 4.651792643081986e-06, | |
| "loss": 0.17736996710300446, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.0597609561752988, | |
| "grad_norm": 0.28991037607192993, | |
| "learning_rate": 4.609869341096217e-06, | |
| "loss": 0.37515631318092346, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.0677290836653386, | |
| "grad_norm": 0.32158225774765015, | |
| "learning_rate": 4.568223807039282e-06, | |
| "loss": 0.3763376772403717, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.0756972111553784, | |
| "grad_norm": 0.34686094522476196, | |
| "learning_rate": 4.52685785117052e-06, | |
| "loss": 0.5709128975868225, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.0836653386454183, | |
| "grad_norm": 0.7250041961669922, | |
| "learning_rate": 4.485773271596503e-06, | |
| "loss": 0.5019816160202026, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.091633466135458, | |
| "grad_norm": 0.054395563900470734, | |
| "learning_rate": 4.444971854192848e-06, | |
| "loss": 0.3104590177536011, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.099601593625498, | |
| "grad_norm": 0.2409711331129074, | |
| "learning_rate": 4.404455372526615e-06, | |
| "loss": 0.33854958415031433, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.1075697211155378, | |
| "grad_norm": 0.2990105450153351, | |
| "learning_rate": 4.3642255877791876e-06, | |
| "loss": 0.5261071920394897, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.1155378486055776, | |
| "grad_norm": 0.09115161001682281, | |
| "learning_rate": 4.324284248669742e-06, | |
| "loss": 0.11824757605791092, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.1235059760956174, | |
| "grad_norm": 0.2456287443637848, | |
| "learning_rate": 4.2846330913792136e-06, | |
| "loss": 0.6229012608528137, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.1314741035856573, | |
| "grad_norm": 0.4126826524734497, | |
| "learning_rate": 4.245273839474835e-06, | |
| "loss": 0.41413745284080505, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.139442231075697, | |
| "grad_norm": 0.7544516324996948, | |
| "learning_rate": 4.20620820383523e-06, | |
| "loss": 0.3049668073654175, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.147410358565737, | |
| "grad_norm": 0.1292313188314438, | |
| "learning_rate": 4.167437882576009e-06, | |
| "loss": 0.2927045524120331, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.1553784860557768, | |
| "grad_norm": 0.5389977693557739, | |
| "learning_rate": 4.128964560975993e-06, | |
| "loss": 0.38354364037513733, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.1633466135458166, | |
| "grad_norm": 0.05460236594080925, | |
| "learning_rate": 4.090789911403938e-06, | |
| "loss": 0.1887034922838211, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.1713147410358564, | |
| "grad_norm": 0.4996011257171631, | |
| "learning_rate": 4.0529155932458444e-06, | |
| "loss": 0.33719515800476074, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.1792828685258963, | |
| "grad_norm": 0.3487769365310669, | |
| "learning_rate": 4.015343252832824e-06, | |
| "loss": 0.3880740702152252, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.187250996015936, | |
| "grad_norm": 0.3392852544784546, | |
| "learning_rate": 3.978074523369533e-06, | |
| "loss": 0.4826694428920746, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.195219123505976, | |
| "grad_norm": 0.20547737181186676, | |
| "learning_rate": 3.941111024863193e-06, | |
| "loss": 0.38525742292404175, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.2031872509960158, | |
| "grad_norm": 0.3879428207874298, | |
| "learning_rate": 3.90445436405316e-06, | |
| "loss": 0.25935789942741394, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.2111553784860556, | |
| "grad_norm": 0.6928282976150513, | |
| "learning_rate": 3.86810613434109e-06, | |
| "loss": 0.4072348177433014, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.2191235059760954, | |
| "grad_norm": 0.4690568745136261, | |
| "learning_rate": 3.832067915721661e-06, | |
| "loss": 0.0453445203602314, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.2270916334661353, | |
| "grad_norm": 0.24398498237133026, | |
| "learning_rate": 3.7963412747139204e-06, | |
| "loss": 0.3684697151184082, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.235059760956175, | |
| "grad_norm": 0.45747777819633484, | |
| "learning_rate": 3.7609277642931642e-06, | |
| "loss": 0.27333521842956543, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.243027888446215, | |
| "grad_norm": 0.2253321260213852, | |
| "learning_rate": 3.725828923823447e-06, | |
| "loss": 0.45263350009918213, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.2509960159362548, | |
| "grad_norm": 0.40673020482063293, | |
| "learning_rate": 3.6910462789906697e-06, | |
| "loss": 0.0962015688419342, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.2589641434262946, | |
| "grad_norm": 0.4036368429660797, | |
| "learning_rate": 3.6565813417362476e-06, | |
| "loss": 0.5429524779319763, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.2669322709163344, | |
| "grad_norm": 0.5956811904907227, | |
| "learning_rate": 3.622435610191398e-06, | |
| "loss": 0.20509321987628937, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.2749003984063743, | |
| "grad_norm": 1.8914116621017456, | |
| "learning_rate": 3.5886105686120267e-06, | |
| "loss": 0.20021887123584747, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.2828685258964145, | |
| "grad_norm": 0.8176438212394714, | |
| "learning_rate": 3.5551076873141875e-06, | |
| "loss": 0.3322441577911377, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.2908366533864544, | |
| "grad_norm": 0.24719004333019257, | |
| "learning_rate": 3.5219284226101992e-06, | |
| "loss": 0.521284818649292, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.298804780876494, | |
| "grad_norm": 0.4595783054828644, | |
| "learning_rate": 3.4890742167453134e-06, | |
| "loss": 0.660916268825531, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.306772908366534, | |
| "grad_norm": 0.32658329606056213, | |
| "learning_rate": 3.4565464978350447e-06, | |
| "loss": 0.4208177924156189, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.314741035856574, | |
| "grad_norm": 0.2831469178199768, | |
| "learning_rate": 3.4243466798030813e-06, | |
| "loss": 0.23003339767456055, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.3227091633466137, | |
| "grad_norm": 1.099875569343567, | |
| "learning_rate": 3.3924761623198276e-06, | |
| "loss": 0.17236332595348358, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.3306772908366535, | |
| "grad_norm": 0.6602292060852051, | |
| "learning_rate": 3.360936330741563e-06, | |
| "loss": 0.33426642417907715, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.3386454183266934, | |
| "grad_norm": 0.5026458501815796, | |
| "learning_rate": 3.329728556050219e-06, | |
| "loss": 0.41616302728652954, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.346613545816733, | |
| "grad_norm": 3.4541831016540527, | |
| "learning_rate": 3.2988541947937948e-06, | |
| "loss": 0.07192525267601013, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.354581673306773, | |
| "grad_norm": 0.48100268840789795, | |
| "learning_rate": 3.2683145890273803e-06, | |
| "loss": 0.2855004668235779, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.362549800796813, | |
| "grad_norm": 0.5678504705429077, | |
| "learning_rate": 3.2381110662548244e-06, | |
| "loss": 0.43228018283843994, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.3705179282868527, | |
| "grad_norm": 0.24944768846035004, | |
| "learning_rate": 3.208244939371029e-06, | |
| "loss": 0.5930905342102051, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.3784860557768925, | |
| "grad_norm": 0.2541601359844208, | |
| "learning_rate": 3.178717506604878e-06, | |
| "loss": 0.5573995113372803, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.3864541832669324, | |
| "grad_norm": 0.4352867901325226, | |
| "learning_rate": 3.1495300514628137e-06, | |
| "loss": 0.4143456220626831, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.394422310756972, | |
| "grad_norm": 0.27721530199050903, | |
| "learning_rate": 3.120683842673029e-06, | |
| "loss": 0.5134739875793457, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.402390438247012, | |
| "grad_norm": 0.2205602526664734, | |
| "learning_rate": 3.092180134130341e-06, | |
| "loss": 0.3087632358074188, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.410358565737052, | |
| "grad_norm": 0.3095087707042694, | |
| "learning_rate": 3.064020164841661e-06, | |
| "loss": 0.46302488446235657, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.4183266932270917, | |
| "grad_norm": 0.3580918312072754, | |
| "learning_rate": 3.03620515887216e-06, | |
| "loss": 0.4601234793663025, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.4262948207171315, | |
| "grad_norm": 0.4854097366333008, | |
| "learning_rate": 3.0087363252920392e-06, | |
| "loss": 0.5634459257125854, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.4342629482071714, | |
| "grad_norm": 0.24722632765769958, | |
| "learning_rate": 2.9816148581239945e-06, | |
| "loss": 0.3312654495239258, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.442231075697211, | |
| "grad_norm": 0.8358548283576965, | |
| "learning_rate": 2.954841936291304e-06, | |
| "loss": 0.23872403800487518, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.450199203187251, | |
| "grad_norm": 0.3561617136001587, | |
| "learning_rate": 2.9284187235665706e-06, | |
| "loss": 0.4579213261604309, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.458167330677291, | |
| "grad_norm": 0.2745280861854553, | |
| "learning_rate": 2.902346368521161e-06, | |
| "loss": 0.38387465476989746, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.4661354581673307, | |
| "grad_norm": 0.33510804176330566, | |
| "learning_rate": 2.8766260044752596e-06, | |
| "loss": 0.3727482557296753, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.4741035856573705, | |
| "grad_norm": 0.05946040898561478, | |
| "learning_rate": 2.851258749448613e-06, | |
| "loss": 0.336728572845459, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.4820717131474104, | |
| "grad_norm": 0.08459708839654922, | |
| "learning_rate": 2.8262457061119275e-06, | |
| "loss": 0.31364670395851135, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.49003984063745, | |
| "grad_norm": 0.24611854553222656, | |
| "learning_rate": 2.801587961738936e-06, | |
| "loss": 0.26714128255844116, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.49800796812749, | |
| "grad_norm": 0.3433525860309601, | |
| "learning_rate": 2.7772865881591487e-06, | |
| "loss": 0.3233880400657654, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.50597609561753, | |
| "grad_norm": 0.077357217669487, | |
| "learning_rate": 2.7533426417112463e-06, | |
| "loss": 0.1428777575492859, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.5139442231075697, | |
| "grad_norm": 0.23806530237197876, | |
| "learning_rate": 2.729757163197175e-06, | |
| "loss": 0.1798655092716217, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.5219123505976095, | |
| "grad_norm": 0.41716670989990234, | |
| "learning_rate": 2.706531177836896e-06, | |
| "loss": 0.04974370449781418, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.5298804780876494, | |
| "grad_norm": 0.7920284271240234, | |
| "learning_rate": 2.683665695223827e-06, | |
| "loss": 0.46149638295173645, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.537848605577689, | |
| "grad_norm": 0.3813696801662445, | |
| "learning_rate": 2.661161709280954e-06, | |
| "loss": 0.47457778453826904, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.545816733067729, | |
| "grad_norm": 0.4823150932788849, | |
| "learning_rate": 2.6390201982176284e-06, | |
| "loss": 0.2846560478210449, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.553784860557769, | |
| "grad_norm": 1.2098549604415894, | |
| "learning_rate": 2.617242124487048e-06, | |
| "loss": 0.3184957206249237, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.5617529880478087, | |
| "grad_norm": 0.2593185305595398, | |
| "learning_rate": 2.595828434744411e-06, | |
| "loss": 0.7096160650253296, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.5697211155378485, | |
| "grad_norm": 0.10369162261486053, | |
| "learning_rate": 2.574780059805779e-06, | |
| "loss": 0.10589734464883804, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.5776892430278884, | |
| "grad_norm": 0.26918283104896545, | |
| "learning_rate": 2.5540979146076145e-06, | |
| "loss": 0.2939029335975647, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.585657370517928, | |
| "grad_norm": 0.2926011383533478, | |
| "learning_rate": 2.5337828981669966e-06, | |
| "loss": 0.09356319904327393, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.593625498007968, | |
| "grad_norm": 1.341439962387085, | |
| "learning_rate": 2.513835893542561e-06, | |
| "loss": 0.09488323330879211, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 3.601593625498008, | |
| "grad_norm": 0.47240838408470154, | |
| "learning_rate": 2.494257767796101e-06, | |
| "loss": 0.3548218905925751, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 3.6095617529880477, | |
| "grad_norm": 0.49076366424560547, | |
| "learning_rate": 2.475049371954888e-06, | |
| "loss": 0.2988426685333252, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 3.6175298804780875, | |
| "grad_norm": 0.2681836783885956, | |
| "learning_rate": 2.4562115409746662e-06, | |
| "loss": 0.5401598215103149, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 3.6254980079681274, | |
| "grad_norm": 0.406819611787796, | |
| "learning_rate": 2.4377450937033753e-06, | |
| "loss": 0.4441235661506653, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.633466135458167, | |
| "grad_norm": 0.05496419966220856, | |
| "learning_rate": 2.419650832845541e-06, | |
| "loss": 0.019690319895744324, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 3.641434262948207, | |
| "grad_norm": 0.29483741521835327, | |
| "learning_rate": 2.40192954492739e-06, | |
| "loss": 0.4060436189174652, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 3.649402390438247, | |
| "grad_norm": 0.4651680290699005, | |
| "learning_rate": 2.3845820002626623e-06, | |
| "loss": 0.4076462984085083, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 3.6573705179282867, | |
| "grad_norm": 2.9567272663116455, | |
| "learning_rate": 2.3676089529191234e-06, | |
| "loss": 0.047276318073272705, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 3.6653386454183265, | |
| "grad_norm": 0.32289329171180725, | |
| "learning_rate": 2.3510111406857915e-06, | |
| "loss": 0.5000100135803223, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.6733067729083664, | |
| "grad_norm": 0.23058217763900757, | |
| "learning_rate": 2.3347892850408575e-06, | |
| "loss": 0.06835205107927322, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 3.681274900398406, | |
| "grad_norm": 1.3678746223449707, | |
| "learning_rate": 2.3189440911203328e-06, | |
| "loss": 0.35598766803741455, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 3.6892430278884465, | |
| "grad_norm": 0.30284255743026733, | |
| "learning_rate": 2.3034762476873973e-06, | |
| "loss": 0.13118457794189453, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 3.6972111553784863, | |
| "grad_norm": 0.8098158836364746, | |
| "learning_rate": 2.2883864271024524e-06, | |
| "loss": 0.29144927859306335, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 3.705179282868526, | |
| "grad_norm": 0.131113201379776, | |
| "learning_rate": 2.273675285293905e-06, | |
| "loss": 0.4039907455444336, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.713147410358566, | |
| "grad_norm": 0.34469276666641235, | |
| "learning_rate": 2.2593434617296474e-06, | |
| "loss": 0.5197895765304565, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 3.721115537848606, | |
| "grad_norm": 0.2677764296531677, | |
| "learning_rate": 2.2453915793892665e-06, | |
| "loss": 0.23381216824054718, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 3.7290836653386457, | |
| "grad_norm": 0.4345526099205017, | |
| "learning_rate": 2.231820244736958e-06, | |
| "loss": 0.4805620014667511, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 3.7370517928286855, | |
| "grad_norm": 0.4274682402610779, | |
| "learning_rate": 2.2186300476951693e-06, | |
| "loss": 0.1564408391714096, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 3.7450199203187253, | |
| "grad_norm": 1.1103200912475586, | |
| "learning_rate": 2.2058215616189578e-06, | |
| "loss": 0.47250640392303467, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.752988047808765, | |
| "grad_norm": 0.2901248037815094, | |
| "learning_rate": 2.1933953432710588e-06, | |
| "loss": 0.25581350922584534, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 3.760956175298805, | |
| "grad_norm": 0.2037886381149292, | |
| "learning_rate": 2.181351932797695e-06, | |
| "loss": 0.3167116641998291, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 3.768924302788845, | |
| "grad_norm": 0.36940625309944153, | |
| "learning_rate": 2.169691853705094e-06, | |
| "loss": 0.6154989004135132, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 3.7768924302788847, | |
| "grad_norm": 0.3643878102302551, | |
| "learning_rate": 2.1584156128367303e-06, | |
| "loss": 0.3078484535217285, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 3.7848605577689245, | |
| "grad_norm": 0.4483620822429657, | |
| "learning_rate": 2.147523700351291e-06, | |
| "loss": 0.5290223956108093, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.7928286852589643, | |
| "grad_norm": 0.5038286447525024, | |
| "learning_rate": 2.137016589701375e-06, | |
| "loss": 0.5246944427490234, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 3.800796812749004, | |
| "grad_norm": 0.3047266900539398, | |
| "learning_rate": 2.126894737612914e-06, | |
| "loss": 0.29258349537849426, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 3.808764940239044, | |
| "grad_norm": 0.3544226884841919, | |
| "learning_rate": 2.117158584065313e-06, | |
| "loss": 0.3819803297519684, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 3.816733067729084, | |
| "grad_norm": 0.9558009505271912, | |
| "learning_rate": 2.107808552272329e-06, | |
| "loss": 0.25099891424179077, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 3.8247011952191237, | |
| "grad_norm": 0.3349020779132843, | |
| "learning_rate": 2.0988450486636745e-06, | |
| "loss": 0.17486253380775452, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.8326693227091635, | |
| "grad_norm": 0.13526098430156708, | |
| "learning_rate": 2.090268462867351e-06, | |
| "loss": 0.31952333450317383, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 3.8406374501992033, | |
| "grad_norm": 0.412202388048172, | |
| "learning_rate": 2.0820791676927093e-06, | |
| "loss": 0.6047512292861938, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 3.848605577689243, | |
| "grad_norm": 0.3296831250190735, | |
| "learning_rate": 2.0742775191142496e-06, | |
| "loss": 0.16787327826023102, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 3.856573705179283, | |
| "grad_norm": 0.28071603178977966, | |
| "learning_rate": 2.066863856256143e-06, | |
| "loss": 0.3841116726398468, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 3.864541832669323, | |
| "grad_norm": 0.29851338267326355, | |
| "learning_rate": 2.0598385013774933e-06, | |
| "loss": 0.346584677696228, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.8725099601593627, | |
| "grad_norm": 0.03955311328172684, | |
| "learning_rate": 2.053201759858327e-06, | |
| "loss": 0.2920938730239868, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 3.8804780876494025, | |
| "grad_norm": 0.051783155649900436, | |
| "learning_rate": 2.0469539201863224e-06, | |
| "loss": 0.2272939682006836, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 3.8884462151394423, | |
| "grad_norm": 0.3298843204975128, | |
| "learning_rate": 2.0410952539442624e-06, | |
| "loss": 0.5179523229598999, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 3.896414342629482, | |
| "grad_norm": 0.28545433282852173, | |
| "learning_rate": 2.0356260157982393e-06, | |
| "loss": 0.27209949493408203, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 3.904382470119522, | |
| "grad_norm": 1.0891438722610474, | |
| "learning_rate": 2.030546443486575e-06, | |
| "loss": 0.24100713431835175, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.912350597609562, | |
| "grad_norm": 0.6865116357803345, | |
| "learning_rate": 2.0258567578094947e-06, | |
| "loss": 0.18800026178359985, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 3.9203187250996017, | |
| "grad_norm": 0.24787557125091553, | |
| "learning_rate": 2.0215571626195242e-06, | |
| "loss": 0.5200425982475281, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 3.9282868525896415, | |
| "grad_norm": 0.6841551065444946, | |
| "learning_rate": 2.0176478448126306e-06, | |
| "loss": 0.1371404379606247, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 3.9362549800796813, | |
| "grad_norm": 1.6476190090179443, | |
| "learning_rate": 2.014128974320099e-06, | |
| "loss": 0.41096165776252747, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 3.944223107569721, | |
| "grad_norm": 0.20494024455547333, | |
| "learning_rate": 2.011000704101143e-06, | |
| "loss": 0.30098965764045715, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.952191235059761, | |
| "grad_norm": 0.48796623945236206, | |
| "learning_rate": 2.00826317013626e-06, | |
| "loss": 0.5071312785148621, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 3.960159362549801, | |
| "grad_norm": 0.33370301127433777, | |
| "learning_rate": 2.0059164914213175e-06, | |
| "loss": 0.13360963761806488, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 3.9681274900398407, | |
| "grad_norm": 0.24733327329158783, | |
| "learning_rate": 2.00396076996238e-06, | |
| "loss": 0.41967302560806274, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 3.9760956175298805, | |
| "grad_norm": 0.22083163261413574, | |
| "learning_rate": 2.0023960907712782e-06, | |
| "loss": 0.3552241027355194, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 3.9840637450199203, | |
| "grad_norm": 0.4792640805244446, | |
| "learning_rate": 2.0012225218619097e-06, | |
| "loss": 0.2334318608045578, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.99203187250996, | |
| "grad_norm": 0.2573681175708771, | |
| "learning_rate": 2.0004401142472857e-06, | |
| "loss": 0.5154892802238464, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.1436181664466858, | |
| "learning_rate": 2.000048901937313e-06, | |
| "loss": 0.12288407981395721, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1004, | |
| "total_flos": 4.038502240003031e+18, | |
| "train_loss": 0.7417308912200637, | |
| "train_runtime": 11316.072, | |
| "train_samples_per_second": 5.323, | |
| "train_steps_per_second": 0.089 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1004, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.038502240003031e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |