Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-44 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-44 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-44") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-44") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-44") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-44 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-44" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-44", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-44
- SGLang
How to use furproxy/9b-44 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-44" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-44", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-44" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-44", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-44 with Docker Model Runner:
docker model run hf.co/furproxy/9b-44
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 532, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.015037593984962405, | |
| "grad_norm": 0.8192565441131592, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 1.936692237854004, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.03007518796992481, | |
| "grad_norm": 0.7943888306617737, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 2.246655225753784, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.045112781954887216, | |
| "grad_norm": 0.5120864510536194, | |
| "learning_rate": 5.555555555555555e-06, | |
| "loss": 2.0245468616485596, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.06015037593984962, | |
| "grad_norm": 0.3112846314907074, | |
| "learning_rate": 7.777777777777777e-06, | |
| "loss": 1.894791603088379, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.07518796992481203, | |
| "grad_norm": 0.7659847140312195, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 1.8956142663955688, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09022556390977443, | |
| "grad_norm": 1.0534412860870361, | |
| "learning_rate": 1.2222222222222222e-05, | |
| "loss": 2.20853590965271, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 0.2340962290763855, | |
| "learning_rate": 1.4444444444444444e-05, | |
| "loss": 1.858487606048584, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.12030075187969924, | |
| "grad_norm": 1.1463453769683838, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 2.3683369159698486, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.13533834586466165, | |
| "grad_norm": 1.6465355157852173, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 2.907562017440796, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.15037593984962405, | |
| "grad_norm": 0.883372962474823, | |
| "learning_rate": 2.111111111111111e-05, | |
| "loss": 1.6304150819778442, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16541353383458646, | |
| "grad_norm": 0.7589054703712463, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 1.670052170753479, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.18045112781954886, | |
| "grad_norm": 0.5909481048583984, | |
| "learning_rate": 2.5555555555555557e-05, | |
| "loss": 1.6498050689697266, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.19548872180451127, | |
| "grad_norm": 1.89938223361969, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 1.5983651876449585, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 0.8610926270484924, | |
| "learning_rate": 3e-05, | |
| "loss": 1.25473952293396, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.22556390977443608, | |
| "grad_norm": 1.2555012702941895, | |
| "learning_rate": 2.999111925794138e-05, | |
| "loss": 1.228848934173584, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.24060150375939848, | |
| "grad_norm": 1.1694159507751465, | |
| "learning_rate": 2.996448940315055e-05, | |
| "loss": 1.39642333984375, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2556390977443609, | |
| "grad_norm": 0.687818169593811, | |
| "learning_rate": 2.9920147532548513e-05, | |
| "loss": 1.2702100276947021, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.2706766917293233, | |
| "grad_norm": 0.33861589431762695, | |
| "learning_rate": 2.9858155416914135e-05, | |
| "loss": 1.326142430305481, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 2.371178388595581, | |
| "learning_rate": 2.9778599414833865e-05, | |
| "loss": 1.4221186637878418, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.3007518796992481, | |
| "grad_norm": 0.7431687712669373, | |
| "learning_rate": 2.9681590352399252e-05, | |
| "loss": 1.0404337644577026, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 0.42008474469184875, | |
| "learning_rate": 2.956726336881985e-05, | |
| "loss": 1.2850358486175537, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.3308270676691729, | |
| "grad_norm": 0.24258685111999512, | |
| "learning_rate": 2.9435777728166477e-05, | |
| "loss": 0.9888750314712524, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.3458646616541353, | |
| "grad_norm": 0.19806115329265594, | |
| "learning_rate": 2.928731659750722e-05, | |
| "loss": 1.389718770980835, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.3609022556390977, | |
| "grad_norm": 0.5233106017112732, | |
| "learning_rate": 2.912208679174516e-05, | |
| "loss": 1.0381746292114258, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.37593984962406013, | |
| "grad_norm": 0.3707257807254791, | |
| "learning_rate": 2.8940318485513296e-05, | |
| "loss": 1.0249922275543213, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.39097744360902253, | |
| "grad_norm": 0.29312264919281006, | |
| "learning_rate": 2.8742264892528024e-05, | |
| "loss": 0.9782328009605408, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.40601503759398494, | |
| "grad_norm": 0.34875017404556274, | |
| "learning_rate": 2.8528201912847877e-05, | |
| "loss": 1.0573807954788208, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 0.14175978302955627, | |
| "learning_rate": 2.829842774852883e-05, | |
| "loss": 1.005712628364563, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.43609022556390975, | |
| "grad_norm": 0.3452085852622986, | |
| "learning_rate": 2.805326248821166e-05, | |
| "loss": 0.9127753376960754, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.45112781954887216, | |
| "grad_norm": 0.27766284346580505, | |
| "learning_rate": 2.7793047661220094e-05, | |
| "loss": 1.0905134677886963, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.46616541353383456, | |
| "grad_norm": 0.4942661225795746, | |
| "learning_rate": 2.751814576179072e-05, | |
| "loss": 0.8560956120491028, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.48120300751879697, | |
| "grad_norm": 0.225164532661438, | |
| "learning_rate": 2.722893974409769e-05, | |
| "loss": 1.1211824417114258, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.49624060150375937, | |
| "grad_norm": 0.5361406803131104, | |
| "learning_rate": 2.6925832488775517e-05, | |
| "loss": 1.101810336112976, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.5112781954887218, | |
| "grad_norm": 0.1772085279226303, | |
| "learning_rate": 2.660924624168312e-05, | |
| "loss": 1.2826346158981323, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.7415258288383484, | |
| "learning_rate": 2.627962202569103e-05, | |
| "loss": 1.0522770881652832, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5413533834586466, | |
| "grad_norm": 0.14701074361801147, | |
| "learning_rate": 2.593741902631119e-05, | |
| "loss": 0.7640881538391113, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.556390977443609, | |
| "grad_norm": 0.2193162888288498, | |
| "learning_rate": 2.558311395202502e-05, | |
| "loss": 0.8525770306587219, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.38675349950790405, | |
| "learning_rate": 2.5217200370201126e-05, | |
| "loss": 1.0316098928451538, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.5864661654135338, | |
| "grad_norm": 0.15427573025226593, | |
| "learning_rate": 2.4840188019527494e-05, | |
| "loss": 1.2194627523422241, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.6015037593984962, | |
| "grad_norm": 1.0054173469543457, | |
| "learning_rate": 2.445260209991616e-05, | |
| "loss": 0.6321249008178711, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6165413533834586, | |
| "grad_norm": 0.3735978603363037, | |
| "learning_rate": 2.4054982540869497e-05, | |
| "loss": 1.1763536930084229, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 0.1597137153148651, | |
| "learning_rate": 2.3647883249327334e-05, | |
| "loss": 1.271316409111023, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.6466165413533834, | |
| "grad_norm": 0.20840084552764893, | |
| "learning_rate": 2.3231871338042668e-05, | |
| "loss": 0.9115048050880432, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.6616541353383458, | |
| "grad_norm": 0.11203482747077942, | |
| "learning_rate": 2.280752633556098e-05, | |
| "loss": 1.0730034112930298, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.6766917293233082, | |
| "grad_norm": 0.2053004801273346, | |
| "learning_rate": 2.2375439378903597e-05, | |
| "loss": 1.1552447080612183, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6917293233082706, | |
| "grad_norm": 0.1656394600868225, | |
| "learning_rate": 2.1936212390079758e-05, | |
| "loss": 1.035262107849121, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.706766917293233, | |
| "grad_norm": 0.18118023872375488, | |
| "learning_rate": 2.1490457237574638e-05, | |
| "loss": 0.961626410484314, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.7218045112781954, | |
| "grad_norm": 0.3013463020324707, | |
| "learning_rate": 2.103879488398128e-05, | |
| "loss": 1.3001712560653687, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 0.21438409388065338, | |
| "learning_rate": 2.058185452096397e-05, | |
| "loss": 1.1097919940948486, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.7518796992481203, | |
| "grad_norm": 0.4920536279678345, | |
| "learning_rate": 2.0120272692758044e-05, | |
| "loss": 0.6178168058395386, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7669172932330827, | |
| "grad_norm": 0.2382662147283554, | |
| "learning_rate": 1.965469240942704e-05, | |
| "loss": 1.3048324584960938, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.7819548872180451, | |
| "grad_norm": 0.18292629718780518, | |
| "learning_rate": 1.918576225111276e-05, | |
| "loss": 0.9727452397346497, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.7969924812030075, | |
| "grad_norm": 0.35222965478897095, | |
| "learning_rate": 1.8714135464525706e-05, | |
| "loss": 0.6771279573440552, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.8120300751879699, | |
| "grad_norm": 0.32725006341934204, | |
| "learning_rate": 1.824046905293483e-05, | |
| "loss": 0.9753497242927551, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.8270676691729323, | |
| "grad_norm": 0.40115198493003845, | |
| "learning_rate": 1.7765422860924167e-05, | |
| "loss": 0.8510618209838867, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 0.26472556591033936, | |
| "learning_rate": 1.7289658655191308e-05, | |
| "loss": 0.7452026605606079, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.6424808502197266, | |
| "learning_rate": 1.6813839202668314e-05, | |
| "loss": 0.9618666768074036, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.8721804511278195, | |
| "grad_norm": 0.2608170509338379, | |
| "learning_rate": 1.6338627347249194e-05, | |
| "loss": 1.3540914058685303, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.8872180451127819, | |
| "grad_norm": 0.16167515516281128, | |
| "learning_rate": 1.5864685086410205e-05, | |
| "loss": 1.023390293121338, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.9022556390977443, | |
| "grad_norm": 0.36847585439682007, | |
| "learning_rate": 1.539267264900926e-05, | |
| "loss": 0.8938322067260742, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.9172932330827067, | |
| "grad_norm": 0.42921480536460876, | |
| "learning_rate": 1.4923247575549108e-05, | |
| "loss": 0.8056025505065918, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.9323308270676691, | |
| "grad_norm": 0.6019521355628967, | |
| "learning_rate": 1.4457063802185558e-05, | |
| "loss": 0.8153986930847168, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.9473684210526315, | |
| "grad_norm": 0.17627808451652527, | |
| "learning_rate": 1.3994770749756746e-05, | |
| "loss": 0.7411991953849792, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.9624060150375939, | |
| "grad_norm": 0.27609097957611084, | |
| "learning_rate": 1.3537012419102535e-05, | |
| "loss": 1.0465192794799805, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.9774436090225563, | |
| "grad_norm": 0.7103683948516846, | |
| "learning_rate": 1.3084426493934257e-05, | |
| "loss": 0.8794819712638855, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9924812030075187, | |
| "grad_norm": 0.13337653875350952, | |
| "learning_rate": 1.2637643452504579e-05, | |
| "loss": 1.019758701324463, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.0075187969924813, | |
| "grad_norm": 0.15754307806491852, | |
| "learning_rate": 1.2197285689315004e-05, | |
| "loss": 0.7352499961853027, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.0225563909774436, | |
| "grad_norm": 0.1713666021823883, | |
| "learning_rate": 1.1763966648084505e-05, | |
| "loss": 0.8829557299613953, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.037593984962406, | |
| "grad_norm": 0.3357762098312378, | |
| "learning_rate": 1.1338289967187079e-05, | |
| "loss": 0.6713441610336304, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 0.17261233925819397, | |
| "learning_rate": 1.0920848638748748e-05, | |
| "loss": 0.7187601327896118, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.0676691729323309, | |
| "grad_norm": 0.13400448858737946, | |
| "learning_rate": 1.0512224182575395e-05, | |
| "loss": 0.7740556597709656, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.0827067669172932, | |
| "grad_norm": 0.3450721502304077, | |
| "learning_rate": 1.0112985836062175e-05, | |
| "loss": 0.969446063041687, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.0977443609022557, | |
| "grad_norm": 0.2375430464744568, | |
| "learning_rate": 9.723689761213051e-06, | |
| "loss": 0.9913895130157471, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.112781954887218, | |
| "grad_norm": 0.15823714435100555, | |
| "learning_rate": 9.34487826987512e-06, | |
| "loss": 0.925875186920166, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.1278195488721805, | |
| "grad_norm": 0.23397411406040192, | |
| "learning_rate": 8.97707906826694e-06, | |
| "loss": 0.8095348477363586, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.23221950232982635, | |
| "learning_rate": 8.620804521853441e-06, | |
| "loss": 0.9493626952171326, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.1578947368421053, | |
| "grad_norm": 0.1775089055299759, | |
| "learning_rate": 8.27655094159128e-06, | |
| "loss": 0.9873220920562744, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.1729323308270676, | |
| "grad_norm": 0.22347131371498108, | |
| "learning_rate": 7.944797892539146e-06, | |
| "loss": 0.9379909634590149, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.1879699248120301, | |
| "grad_norm": 0.17521372437477112, | |
| "learning_rate": 7.626007525795976e-06, | |
| "loss": 0.9363319277763367, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.2030075187969924, | |
| "grad_norm": 0.5944448113441467, | |
| "learning_rate": 7.320623934697899e-06, | |
| "loss": 0.5006011724472046, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.218045112781955, | |
| "grad_norm": 0.21691644191741943, | |
| "learning_rate": 7.029072536170642e-06, | |
| "loss": 0.877805233001709, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.2330827067669172, | |
| "grad_norm": 0.19128523766994476, | |
| "learning_rate": 6.751759478099246e-06, | |
| "loss": 1.0667612552642822, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.2481203007518797, | |
| "grad_norm": 0.32106316089630127, | |
| "learning_rate": 6.489071073540686e-06, | |
| "loss": 0.8215808868408203, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.263157894736842, | |
| "grad_norm": 0.1843944638967514, | |
| "learning_rate": 6.241373262567537e-06, | |
| "loss": 0.6570966243743896, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.2781954887218046, | |
| "grad_norm": 0.24442797899246216, | |
| "learning_rate": 6.009011102492393e-06, | |
| "loss": 0.7164343595504761, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.2932330827067668, | |
| "grad_norm": 0.17113906145095825, | |
| "learning_rate": 5.7923082871831375e-06, | |
| "loss": 0.8579428791999817, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.3082706766917294, | |
| "grad_norm": 0.24567635357379913, | |
| "learning_rate": 5.591566696138772e-06, | |
| "loss": 0.8993586301803589, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.3233082706766917, | |
| "grad_norm": 0.19532179832458496, | |
| "learning_rate": 5.407065973953888e-06, | |
| "loss": 0.6733898520469666, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.3383458646616542, | |
| "grad_norm": 0.3204295337200165, | |
| "learning_rate": 5.239063140757639e-06, | |
| "loss": 0.6425676345825195, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.3533834586466165, | |
| "grad_norm": 0.22146816551685333, | |
| "learning_rate": 5.0877922341699066e-06, | |
| "loss": 1.1642075777053833, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.368421052631579, | |
| "grad_norm": 0.2698806822299957, | |
| "learning_rate": 4.953463983273412e-06, | |
| "loss": 0.9253040552139282, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.3834586466165413, | |
| "grad_norm": 0.2668271064758301, | |
| "learning_rate": 4.836265515055985e-06, | |
| "loss": 0.7467199563980103, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.3984962406015038, | |
| "grad_norm": 0.17395268380641937, | |
| "learning_rate": 4.736360093731884e-06, | |
| "loss": 1.0783255100250244, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.413533834586466, | |
| "grad_norm": 0.20097728073596954, | |
| "learning_rate": 4.653886893305353e-06, | |
| "loss": 0.5329846143722534, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.2769680917263031, | |
| "learning_rate": 4.588960803693209e-06, | |
| "loss": 0.8872597813606262, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.443609022556391, | |
| "grad_norm": 0.1900765597820282, | |
| "learning_rate": 4.5416722706765875e-06, | |
| "loss": 0.653458297252655, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.4586466165413534, | |
| "grad_norm": 0.299067884683609, | |
| "learning_rate": 4.512087169904754e-06, | |
| "loss": 0.7420106530189514, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.4736842105263157, | |
| "grad_norm": 0.19370242953300476, | |
| "learning_rate": 4.500246715126523e-06, | |
| "loss": 0.8862230181694031, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.4887218045112782, | |
| "grad_norm": 0.17850318551063538, | |
| "learning_rate": 4.506167400777152e-06, | |
| "loss": 0.8613809943199158, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.5037593984962405, | |
| "grad_norm": 0.17290696501731873, | |
| "learning_rate": 4.52984097900063e-06, | |
| "loss": 0.8784961104393005, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.518796992481203, | |
| "grad_norm": 0.3493019938468933, | |
| "learning_rate": 4.5712344711394154e-06, | |
| "loss": 1.2700152397155762, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.5338345864661656, | |
| "grad_norm": 0.42357122898101807, | |
| "learning_rate": 4.630290213675614e-06, | |
| "loss": 0.9580332636833191, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.5488721804511278, | |
| "grad_norm": 1.1557518243789673, | |
| "learning_rate": 4.706925938559573e-06, | |
| "loss": 0.7860268354415894, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.5639097744360901, | |
| "grad_norm": 0.28890499472618103, | |
| "learning_rate": 4.801034887814009e-06, | |
| "loss": 0.9093602895736694, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 0.5986707210540771, | |
| "learning_rate": 4.912485962254024e-06, | |
| "loss": 0.8598864674568176, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.5939849624060152, | |
| "grad_norm": 0.17273662984371185, | |
| "learning_rate": 5.04112390411581e-06, | |
| "loss": 0.5818964242935181, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.6090225563909775, | |
| "grad_norm": 0.21565358340740204, | |
| "learning_rate": 5.186769513339663e-06, | |
| "loss": 0.884915292263031, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.6240601503759398, | |
| "grad_norm": 0.26930728554725647, | |
| "learning_rate": 5.349219897205977e-06, | |
| "loss": 0.9705126881599426, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.6390977443609023, | |
| "grad_norm": 0.1285410076379776, | |
| "learning_rate": 5.5282487529764855e-06, | |
| "loss": 0.7298458218574524, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.6541353383458648, | |
| "grad_norm": 0.21168453991413116, | |
| "learning_rate": 5.7236066831470105e-06, | |
| "loss": 0.7564178109169006, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.669172932330827, | |
| "grad_norm": 0.5188248753547668, | |
| "learning_rate": 5.935021542872539e-06, | |
| "loss": 0.8646745681762695, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.6842105263157894, | |
| "grad_norm": 0.24589960277080536, | |
| "learning_rate": 6.162198819080668e-06, | |
| "loss": 0.699385941028595, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.699248120300752, | |
| "grad_norm": 0.3245724141597748, | |
| "learning_rate": 6.404822040745263e-06, | |
| "loss": 1.0145379304885864, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.24885950982570648, | |
| "learning_rate": 6.662553219748833e-06, | |
| "loss": 0.7830167412757874, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.7293233082706767, | |
| "grad_norm": 0.38476553559303284, | |
| "learning_rate": 6.935033321719419e-06, | |
| "loss": 0.9040583372116089, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.744360902255639, | |
| "grad_norm": 0.3446056544780731, | |
| "learning_rate": 7.2218827661861725e-06, | |
| "loss": 1.0128272771835327, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.7593984962406015, | |
| "grad_norm": 0.15947787463665009, | |
| "learning_rate": 7.522701955356779e-06, | |
| "loss": 0.9765535593032837, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.774436090225564, | |
| "grad_norm": 0.2729548513889313, | |
| "learning_rate": 7.837071830780217e-06, | |
| "loss": 1.0480151176452637, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.7894736842105263, | |
| "grad_norm": 0.3705751597881317, | |
| "learning_rate": 8.164554457119286e-06, | |
| "loss": 0.5091387033462524, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.8045112781954886, | |
| "grad_norm": 0.3041347563266754, | |
| "learning_rate": 8.504693632219755e-06, | |
| "loss": 0.9318640232086182, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.8195488721804511, | |
| "grad_norm": 0.3558288812637329, | |
| "learning_rate": 8.857015522626238e-06, | |
| "loss": 0.6968544721603394, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.8345864661654137, | |
| "grad_norm": 0.2972772717475891, | |
| "learning_rate": 9.221029323659478e-06, | |
| "loss": 1.047217607498169, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.849624060150376, | |
| "grad_norm": 0.1722293198108673, | |
| "learning_rate": 9.596227943135503e-06, | |
| "loss": 1.0219006538391113, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.8646616541353382, | |
| "grad_norm": 1.3821203708648682, | |
| "learning_rate": 9.982088707774262e-06, | |
| "loss": 0.9025890827178955, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.8796992481203008, | |
| "grad_norm": 0.15428081154823303, | |
| "learning_rate": 1.0378074091313615e-05, | |
| "loss": 1.1540802717208862, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.8947368421052633, | |
| "grad_norm": 0.16087524592876434, | |
| "learning_rate": 1.0783632463314283e-05, | |
| "loss": 0.9519784450531006, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.9097744360902256, | |
| "grad_norm": 0.1820048987865448, | |
| "learning_rate": 1.1198198857612926e-05, | |
| "loss": 1.1188257932662964, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.9248120300751879, | |
| "grad_norm": 0.8227013349533081, | |
| "learning_rate": 1.1621195759352438e-05, | |
| "loss": 0.8220981955528259, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.9398496240601504, | |
| "grad_norm": 0.39509618282318115, | |
| "learning_rate": 1.2052033909493471e-05, | |
| "loss": 0.5426740646362305, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.954887218045113, | |
| "grad_norm": 0.2340533286333084, | |
| "learning_rate": 1.2490113125686138e-05, | |
| "loss": 0.8964567184448242, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.9699248120300752, | |
| "grad_norm": 0.1862659901380539, | |
| "learning_rate": 1.2934823138358649e-05, | |
| "loss": 0.9239405393600464, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.9849624060150375, | |
| "grad_norm": 0.21919912099838257, | |
| "learning_rate": 1.338554444085792e-05, | |
| "loss": 0.8809694051742554, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.2186099886894226, | |
| "learning_rate": 1.3841649152458003e-05, | |
| "loss": 0.7980599403381348, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 2.0150375939849625, | |
| "grad_norm": 0.38081610202789307, | |
| "learning_rate": 1.430250189303413e-05, | |
| "loss": 0.463468998670578, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 2.030075187969925, | |
| "grad_norm": 0.2002028077840805, | |
| "learning_rate": 1.4767460668183795e-05, | |
| "loss": 0.6159178614616394, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.045112781954887, | |
| "grad_norm": 0.21730108559131622, | |
| "learning_rate": 1.523587776356188e-05, | |
| "loss": 0.46370548009872437, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 2.0601503759398496, | |
| "grad_norm": 1.0244005918502808, | |
| "learning_rate": 1.5707100647184093e-05, | |
| "loss": 0.897263765335083, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 2.075187969924812, | |
| "grad_norm": 0.1658545732498169, | |
| "learning_rate": 1.6180472878441575e-05, | |
| "loss": 0.7874804735183716, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 2.090225563909774, | |
| "grad_norm": 0.47336888313293457, | |
| "learning_rate": 1.6655335022560423e-05, | |
| "loss": 0.7593191266059875, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 0.3983185589313507, | |
| "learning_rate": 1.7131025569232362e-05, | |
| "loss": 0.8093394637107849, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.1203007518796992, | |
| "grad_norm": 0.5871224999427795, | |
| "learning_rate": 1.7606881854136644e-05, | |
| "loss": 0.8642159104347229, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 2.1353383458646618, | |
| "grad_norm": 0.16395071148872375, | |
| "learning_rate": 1.8082240982069634e-05, | |
| "loss": 0.5777812004089355, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 2.1503759398496243, | |
| "grad_norm": 0.266190767288208, | |
| "learning_rate": 1.8556440750395985e-05, | |
| "loss": 0.8966842889785767, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 2.1654135338345863, | |
| "grad_norm": 0.26495200395584106, | |
| "learning_rate": 1.9028820571535015e-05, | |
| "loss": 1.0453461408615112, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 2.180451127819549, | |
| "grad_norm": 0.19011439383029938, | |
| "learning_rate": 1.949872239319729e-05, | |
| "loss": 1.0706809759140015, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.1954887218045114, | |
| "grad_norm": 0.25460541248321533, | |
| "learning_rate": 1.996549161508929e-05, | |
| "loss": 0.6951987743377686, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 2.2105263157894735, | |
| "grad_norm": 0.2636259198188782, | |
| "learning_rate": 2.042847800080939e-05, | |
| "loss": 0.8458771705627441, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.225563909774436, | |
| "grad_norm": 0.3756290674209595, | |
| "learning_rate": 2.0887036583664505e-05, | |
| "loss": 0.3105054199695587, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 2.2406015037593985, | |
| "grad_norm": 0.23921579122543335, | |
| "learning_rate": 2.1340528565145932e-05, | |
| "loss": 1.1170181035995483, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 2.255639097744361, | |
| "grad_norm": 0.2471323013305664, | |
| "learning_rate": 2.1788322204812397e-05, | |
| "loss": 0.9951118230819702, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.2706766917293235, | |
| "grad_norm": 0.2469598799943924, | |
| "learning_rate": 2.2229793700340833e-05, | |
| "loss": 1.0403016805648804, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.34822267293930054, | |
| "learning_rate": 2.2664328056519028e-05, | |
| "loss": 0.7423543334007263, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 2.300751879699248, | |
| "grad_norm": 0.5992878079414368, | |
| "learning_rate": 2.3091319941969266e-05, | |
| "loss": 0.7819874286651611, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.3157894736842106, | |
| "grad_norm": 0.31358832120895386, | |
| "learning_rate": 2.3510174532409867e-05, | |
| "loss": 1.109780192375183, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 2.3308270676691727, | |
| "grad_norm": 0.656645655632019, | |
| "learning_rate": 2.392030833927959e-05, | |
| "loss": 0.4651540219783783, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.345864661654135, | |
| "grad_norm": 0.20808559656143188, | |
| "learning_rate": 2.4321150022570873e-05, | |
| "loss": 0.8532482385635376, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.3609022556390977, | |
| "grad_norm": 0.20093803107738495, | |
| "learning_rate": 2.471214118673929e-05, | |
| "loss": 0.568276584148407, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 2.3759398496240602, | |
| "grad_norm": 0.2839231491088867, | |
| "learning_rate": 2.509273715858074e-05, | |
| "loss": 0.9199910163879395, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 2.3909774436090228, | |
| "grad_norm": 0.19820740818977356, | |
| "learning_rate": 2.546240774599257e-05, | |
| "loss": 0.8895071744918823, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.406015037593985, | |
| "grad_norm": 0.26256436109542847, | |
| "learning_rate": 2.582063797656167e-05, | |
| "loss": 1.0534682273864746, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.4210526315789473, | |
| "grad_norm": 0.18550805747509003, | |
| "learning_rate": 2.6166928814950743e-05, | |
| "loss": 1.1147539615631104, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 2.43609022556391, | |
| "grad_norm": 0.509353518486023, | |
| "learning_rate": 2.6500797858083262e-05, | |
| "loss": 0.9222637414932251, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.451127819548872, | |
| "grad_norm": 0.18731118738651276, | |
| "learning_rate": 2.682178000715866e-05, | |
| "loss": 1.0500245094299316, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.4661654135338344, | |
| "grad_norm": 0.29536113142967224, | |
| "learning_rate": 2.712942811556184e-05, | |
| "loss": 0.8539433479309082, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.481203007518797, | |
| "grad_norm": 0.3995274007320404, | |
| "learning_rate": 2.7423313611764086e-05, | |
| "loss": 0.6855474710464478, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.4962406015037595, | |
| "grad_norm": 0.23789720237255096, | |
| "learning_rate": 2.77030270963479e-05, | |
| "loss": 1.0410560369491577, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.511278195488722, | |
| "grad_norm": 0.21356765925884247, | |
| "learning_rate": 2.796817891232397e-05, | |
| "loss": 0.6004407405853271, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.526315789473684, | |
| "grad_norm": 1.3617724180221558, | |
| "learning_rate": 2.8218399687945758e-05, | |
| "loss": 0.7526741027832031, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.5413533834586466, | |
| "grad_norm": 0.6006386876106262, | |
| "learning_rate": 2.8453340851265676e-05, | |
| "loss": 0.6869713664054871, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.556390977443609, | |
| "grad_norm": 0.3373733460903168, | |
| "learning_rate": 2.8672675115715806e-05, | |
| "loss": 1.0165461301803589, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.5051329731941223, | |
| "learning_rate": 2.887609693603699e-05, | |
| "loss": 0.9631428718566895, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.5864661654135337, | |
| "grad_norm": 0.3386491537094116, | |
| "learning_rate": 2.906332293392093e-05, | |
| "loss": 0.8245996236801147, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.601503759398496, | |
| "grad_norm": 0.21825748682022095, | |
| "learning_rate": 2.92340922927725e-05, | |
| "loss": 0.5915822386741638, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.6165413533834587, | |
| "grad_norm": 0.24130862951278687, | |
| "learning_rate": 2.9388167121042307e-05, | |
| "loss": 0.7320323586463928, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 0.5413809418678284, | |
| "learning_rate": 2.952533278362327e-05, | |
| "loss": 0.8300567269325256, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.6466165413533833, | |
| "grad_norm": 0.25651729106903076, | |
| "learning_rate": 2.9645398200849713e-05, | |
| "loss": 0.7235583066940308, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.661654135338346, | |
| "grad_norm": 0.15655282139778137, | |
| "learning_rate": 2.9748196114682335e-05, | |
| "loss": 1.0085736513137817, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.6766917293233083, | |
| "grad_norm": 0.7222322225570679, | |
| "learning_rate": 2.983358332170829e-05, | |
| "loss": 0.7790261507034302, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.6917293233082704, | |
| "grad_norm": 0.21224772930145264, | |
| "learning_rate": 2.9901440872631778e-05, | |
| "loss": 0.42803671956062317, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.706766917293233, | |
| "grad_norm": 0.22365406155586243, | |
| "learning_rate": 2.9951674237977273e-05, | |
| "loss": 1.0629819631576538, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.7218045112781954, | |
| "grad_norm": 0.2804076373577118, | |
| "learning_rate": 2.998421343977452e-05, | |
| "loss": 0.550415575504303, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.736842105263158, | |
| "grad_norm": 0.2370826154947281, | |
| "learning_rate": 2.9999013149041885e-05, | |
| "loss": 0.721561074256897, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.7518796992481205, | |
| "grad_norm": 0.835011899471283, | |
| "learning_rate": 2.999605274893222e-05, | |
| "loss": 0.8219574689865112, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.7669172932330826, | |
| "grad_norm": 0.14573420584201813, | |
| "learning_rate": 2.9975336363453326e-05, | |
| "loss": 0.7218166589736938, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.781954887218045, | |
| "grad_norm": 3.228212356567383, | |
| "learning_rate": 2.993689285172299e-05, | |
| "loss": 0.8398270010948181, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.7969924812030076, | |
| "grad_norm": 0.5311354994773865, | |
| "learning_rate": 2.9880775767766535e-05, | |
| "loss": 0.8649424314498901, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.8120300751879697, | |
| "grad_norm": 0.44514158368110657, | |
| "learning_rate": 2.980706328591302e-05, | |
| "loss": 0.7094336152076721, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.827067669172932, | |
| "grad_norm": 0.41514015197753906, | |
| "learning_rate": 2.971585809189387e-05, | |
| "loss": 0.9906347393989563, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.8421052631578947, | |
| "grad_norm": 0.14243760704994202, | |
| "learning_rate": 2.9607287239795747e-05, | |
| "loss": 1.0890015363693237, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.4031289517879486, | |
| "learning_rate": 2.94815019750669e-05, | |
| "loss": 0.7638394832611084, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.8721804511278197, | |
| "grad_norm": 0.2597931921482086, | |
| "learning_rate": 2.933867752382353e-05, | |
| "loss": 0.9143038392066956, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.887218045112782, | |
| "grad_norm": 0.3925493061542511, | |
| "learning_rate": 2.917901284874975e-05, | |
| "loss": 0.9326249957084656, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.9022556390977443, | |
| "grad_norm": 0.31625744700431824, | |
| "learning_rate": 2.9002730371931074e-05, | |
| "loss": 0.6936108469963074, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.917293233082707, | |
| "grad_norm": 0.2891203463077545, | |
| "learning_rate": 2.881007566500768e-05, | |
| "loss": 0.9043726921081543, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.932330827067669, | |
| "grad_norm": 0.30182725191116333, | |
| "learning_rate": 2.8601317107078944e-05, | |
| "loss": 0.8188687562942505, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.9473684210526314, | |
| "grad_norm": 0.43725159764289856, | |
| "learning_rate": 2.8376745510835926e-05, | |
| "loss": 0.9015698432922363, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.962406015037594, | |
| "grad_norm": 0.39825642108917236, | |
| "learning_rate": 2.813667371744254e-05, | |
| "loss": 0.7247455716133118, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.9774436090225564, | |
| "grad_norm": 0.15052802860736847, | |
| "learning_rate": 2.7881436160729783e-05, | |
| "loss": 0.9713034629821777, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.992481203007519, | |
| "grad_norm": 0.4360320270061493, | |
| "learning_rate": 2.7611388401310196e-05, | |
| "loss": 0.7928329706192017, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 3.007518796992481, | |
| "grad_norm": 0.20822873711585999, | |
| "learning_rate": 2.7326906631261394e-05, | |
| "loss": 0.7827808856964111, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.0225563909774436, | |
| "grad_norm": 0.09618931263685226, | |
| "learning_rate": 2.7028387150068913e-05, | |
| "loss": 0.6030799150466919, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 3.037593984962406, | |
| "grad_norm": 0.3093872666358948, | |
| "learning_rate": 2.6716245812558134e-05, | |
| "loss": 0.7962419390678406, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 3.0526315789473686, | |
| "grad_norm": 0.3837755024433136, | |
| "learning_rate": 2.6390917449584653e-05, | |
| "loss": 0.6203740239143372, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 3.0676691729323307, | |
| "grad_norm": 0.14874151349067688, | |
| "learning_rate": 2.605285526228978e-05, | |
| "loss": 0.51124107837677, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 3.082706766917293, | |
| "grad_norm": 0.21628743410110474, | |
| "learning_rate": 2.570253019076529e-05, | |
| "loss": 0.7276190519332886, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.0977443609022557, | |
| "grad_norm": 0.284242182970047, | |
| "learning_rate": 2.5340430258006786e-05, | |
| "loss": 0.5125940442085266, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 3.112781954887218, | |
| "grad_norm": 0.25154373049736023, | |
| "learning_rate": 2.496705989006952e-05, | |
| "loss": 0.8815844058990479, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 3.1278195488721803, | |
| "grad_norm": 0.14138440787792206, | |
| "learning_rate": 2.4582939213373886e-05, | |
| "loss": 0.37600424885749817, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 3.142857142857143, | |
| "grad_norm": 0.3758879005908966, | |
| "learning_rate": 2.4188603330139344e-05, | |
| "loss": 0.6696433424949646, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 0.16141177713871002, | |
| "learning_rate": 2.378460157295626e-05, | |
| "loss": 0.6787968277931213, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.172932330827068, | |
| "grad_norm": 0.29796668887138367, | |
| "learning_rate": 2.3371496739533913e-05, | |
| "loss": 0.5915691256523132, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 3.18796992481203, | |
| "grad_norm": 0.4774704575538635, | |
| "learning_rate": 2.294986430869094e-05, | |
| "loss": 0.733458399772644, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 3.2030075187969924, | |
| "grad_norm": 0.36931291222572327, | |
| "learning_rate": 2.252029163868019e-05, | |
| "loss": 0.6868959069252014, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 3.218045112781955, | |
| "grad_norm": 0.45511841773986816, | |
| "learning_rate": 2.208337714896483e-05, | |
| "loss": 0.569706380367279, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 3.2330827067669174, | |
| "grad_norm": 1.0468262434005737, | |
| "learning_rate": 2.1639729486585647e-05, | |
| "loss": 0.4343474209308624, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.2481203007518795, | |
| "grad_norm": 0.131326824426651, | |
| "learning_rate": 2.1189966678280585e-05, | |
| "loss": 0.4525618553161621, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 3.263157894736842, | |
| "grad_norm": 0.1383962780237198, | |
| "learning_rate": 2.0734715269537963e-05, | |
| "loss": 0.44801121950149536, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 3.2781954887218046, | |
| "grad_norm": 0.34697940945625305, | |
| "learning_rate": 2.0274609451782568e-05, | |
| "loss": 0.42984333634376526, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 3.293233082706767, | |
| "grad_norm": 0.10286783427000046, | |
| "learning_rate": 1.9810290178910406e-05, | |
| "loss": 0.4518528878688812, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 3.308270676691729, | |
| "grad_norm": 0.18340881168842316, | |
| "learning_rate": 1.934240427440311e-05, | |
| "loss": 0.9285587072372437, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.3233082706766917, | |
| "grad_norm": 0.18978752195835114, | |
| "learning_rate": 1.8871603530265477e-05, | |
| "loss": 0.39083921909332275, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 3.338345864661654, | |
| "grad_norm": 0.21235691010951996, | |
| "learning_rate": 1.8398543799041773e-05, | |
| "loss": 0.6497979760169983, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 3.3533834586466167, | |
| "grad_norm": 0.9397839903831482, | |
| "learning_rate": 1.792388408017536e-05, | |
| "loss": 0.5017030239105225, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 3.3684210526315788, | |
| "grad_norm": 0.23365262150764465, | |
| "learning_rate": 1.744828560198448e-05, | |
| "loss": 0.7379826903343201, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 3.3834586466165413, | |
| "grad_norm": 0.42739665508270264, | |
| "learning_rate": 1.697241090053319e-05, | |
| "loss": 0.7720116972923279, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.398496240601504, | |
| "grad_norm": 0.2359744757413864, | |
| "learning_rate": 1.6496922896680423e-05, | |
| "loss": 0.7877475619316101, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 3.4135338345864663, | |
| "grad_norm": 0.4221789240837097, | |
| "learning_rate": 1.6022483972593128e-05, | |
| "loss": 0.7371859550476074, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 0.46123459935188293, | |
| "learning_rate": 1.5549755049009714e-05, | |
| "loss": 0.731837809085846, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 3.443609022556391, | |
| "grad_norm": 0.20335260033607483, | |
| "learning_rate": 1.5079394664539421e-05, | |
| "loss": 0.48273712396621704, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 3.4586466165413534, | |
| "grad_norm": 0.2316899299621582, | |
| "learning_rate": 1.4612058058280153e-05, | |
| "loss": 0.7381947636604309, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.473684210526316, | |
| "grad_norm": 0.3751467168331146, | |
| "learning_rate": 1.4148396257032674e-05, | |
| "loss": 0.769965648651123, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 3.488721804511278, | |
| "grad_norm": 0.5033459663391113, | |
| "learning_rate": 1.3689055168382717e-05, | |
| "loss": 0.6628371477127075, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 3.5037593984962405, | |
| "grad_norm": 0.30648085474967957, | |
| "learning_rate": 1.3234674680914651e-05, | |
| "loss": 0.7021836638450623, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 3.518796992481203, | |
| "grad_norm": 0.19549153745174408, | |
| "learning_rate": 1.2785887772809783e-05, | |
| "loss": 0.5976605415344238, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 3.5338345864661656, | |
| "grad_norm": 0.47317057847976685, | |
| "learning_rate": 1.2343319630071227e-05, | |
| "loss": 0.678418755531311, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.548872180451128, | |
| "grad_norm": 0.3564242720603943, | |
| "learning_rate": 1.1907586775603957e-05, | |
| "loss": 0.6626768708229065, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 3.56390977443609, | |
| "grad_norm": 0.33226093649864197, | |
| "learning_rate": 1.147929621036279e-05, | |
| "loss": 0.7116915583610535, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 3.5789473684210527, | |
| "grad_norm": 0.3433665931224823, | |
| "learning_rate": 1.1059044567765164e-05, | |
| "loss": 0.36730286478996277, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 3.593984962406015, | |
| "grad_norm": 0.17942893505096436, | |
| "learning_rate": 1.0647417282546353e-05, | |
| "loss": 0.3575655221939087, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 3.6090225563909772, | |
| "grad_norm": 0.14913895726203918, | |
| "learning_rate": 1.024498777521529e-05, | |
| "loss": 0.751462996006012, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.6240601503759398, | |
| "grad_norm": 0.6876167058944702, | |
| "learning_rate": 9.852316653246724e-06, | |
| "loss": 0.7515479922294617, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 3.6390977443609023, | |
| "grad_norm": 0.30825933814048767, | |
| "learning_rate": 9.469950930122665e-06, | |
| "loss": 0.6766018867492676, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 3.654135338345865, | |
| "grad_norm": 0.3747425675392151, | |
| "learning_rate": 9.098423263311226e-06, | |
| "loss": 0.3269270956516266, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 3.6691729323308273, | |
| "grad_norm": 0.19600830972194672, | |
| "learning_rate": 8.738251212244036e-06, | |
| "loss": 0.6345582008361816, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 3.6842105263157894, | |
| "grad_norm": 0.21161755919456482, | |
| "learning_rate": 8.389936517326165e-06, | |
| "loss": 0.8583235144615173, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.699248120300752, | |
| "grad_norm": 2.3246822357177734, | |
| "learning_rate": 8.053964400982803e-06, | |
| "loss": 0.7647910714149475, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.7142857142857144, | |
| "grad_norm": 0.3167845606803894, | |
| "learning_rate": 7.730802891716579e-06, | |
| "loss": 0.3876282870769501, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 3.7293233082706765, | |
| "grad_norm": 0.16611672937870026, | |
| "learning_rate": 7.420902172116848e-06, | |
| "loss": 0.8268077969551086, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.744360902255639, | |
| "grad_norm": 0.27863407135009766, | |
| "learning_rate": 7.124693951729393e-06, | |
| "loss": 0.9286668300628662, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 3.7593984962406015, | |
| "grad_norm": 0.37909525632858276, | |
| "learning_rate": 6.842590865660255e-06, | |
| "loss": 0.6480289101600647, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.774436090225564, | |
| "grad_norm": 0.2283431440591812, | |
| "learning_rate": 6.574985899751219e-06, | |
| "loss": 0.576987624168396, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 3.7894736842105265, | |
| "grad_norm": 0.6403146982192993, | |
| "learning_rate": 6.322251843127883e-06, | |
| "loss": 0.5665578842163086, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 3.8045112781954886, | |
| "grad_norm": 0.24255450069904327, | |
| "learning_rate": 6.0847407688830226e-06, | |
| "loss": 0.44220831990242004, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 3.819548872180451, | |
| "grad_norm": 0.17681249976158142, | |
| "learning_rate": 5.862783543618414e-06, | |
| "loss": 0.6706622242927551, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 3.8345864661654137, | |
| "grad_norm": 0.6901529431343079, | |
| "learning_rate": 5.65668936652867e-06, | |
| "loss": 0.45284244418144226, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.8496240601503757, | |
| "grad_norm": 0.2059166431427002, | |
| "learning_rate": 5.466745338668931e-06, | |
| "loss": 0.6849936246871948, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 3.8646616541353382, | |
| "grad_norm": 0.17384979128837585, | |
| "learning_rate": 5.293216063006581e-06, | |
| "loss": 0.6412226557731628, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 3.8796992481203008, | |
| "grad_norm": 1.0503121614456177, | |
| "learning_rate": 5.136343275814039e-06, | |
| "loss": 0.8608755469322205, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 3.8947368421052633, | |
| "grad_norm": 0.19333027303218842, | |
| "learning_rate": 4.9963455099162615e-06, | |
| "loss": 0.5098147392272949, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 3.909774436090226, | |
| "grad_norm": 0.14984972774982452, | |
| "learning_rate": 4.8734177902619205e-06, | |
| "loss": 0.7260234951972961, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.924812030075188, | |
| "grad_norm": 0.14300547540187836, | |
| "learning_rate": 4.7677313622423905e-06, | |
| "loss": 0.8742654919624329, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 3.9398496240601504, | |
| "grad_norm": 0.1549258679151535, | |
| "learning_rate": 4.6794334531371056e-06, | |
| "loss": 0.9179413318634033, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 3.954887218045113, | |
| "grad_norm": 3.7465577125549316, | |
| "learning_rate": 4.608647067017448e-06, | |
| "loss": 0.8616862297058105, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 3.969924812030075, | |
| "grad_norm": 0.1470378190279007, | |
| "learning_rate": 4.555470813395014e-06, | |
| "loss": 0.5497387647628784, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 3.9849624060150375, | |
| "grad_norm": 0.2949855327606201, | |
| "learning_rate": 4.519978769852865e-06, | |
| "loss": 0.42557334899902344, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.2642301321029663, | |
| "learning_rate": 4.502220378851213e-06, | |
| "loss": 0.6198008060455322, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 532, | |
| "total_flos": 3.873354436822696e+18, | |
| "train_loss": 0.8859393315431767, | |
| "train_runtime": 11385.0199, | |
| "train_samples_per_second": 5.607, | |
| "train_steps_per_second": 0.047 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 532, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.873354436822696e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |