Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-29 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-29 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-29") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-29") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-29") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-29 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-29" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-29", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-29
- SGLang
How to use furproxy/9b-29 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-29" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-29", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-29" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-29", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-29 with Docker Model Runner:
docker model run hf.co/furproxy/9b-29
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 327, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0061162079510703364, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 1.96957528591156, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.012232415902140673, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 7.058823529411766e-06, | |
| "loss": 2.0347838401794434, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01834862385321101, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 1.9940425157546997, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.024464831804281346, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 1.647058823529412e-05, | |
| "loss": 2.06846284866333, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03058103975535168, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.1176470588235296e-05, | |
| "loss": 1.9072656631469727, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03669724770642202, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 2.5882352941176475e-05, | |
| "loss": 1.8469384908676147, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.04281345565749235, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 3.0588235294117644e-05, | |
| "loss": 1.822761058807373, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04892966360856269, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 1.7572168111801147, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.05504587155963303, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 4e-05, | |
| "loss": 1.7028628587722778, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.06116207951070336, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 3.999630287622123e-05, | |
| "loss": 1.8212647438049316, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0672782874617737, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 3.9985213023632064e-05, | |
| "loss": 1.6563327312469482, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.07339449541284404, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 3.996673499785002e-05, | |
| "loss": 1.6510090827941895, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07951070336391437, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 3.99408763894916e-05, | |
| "loss": 1.5338315963745117, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0856269113149847, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 3.9907647821054114e-05, | |
| "loss": 1.451381802558899, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.09174311926605505, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 3.986706294255207e-05, | |
| "loss": 1.5903527736663818, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09785932721712538, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 3.981913842590985e-05, | |
| "loss": 1.5307936668395996, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.10397553516819572, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 3.9763893958113005e-05, | |
| "loss": 1.413094401359558, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.11009174311926606, | |
| "grad_norm": 0.625, | |
| "learning_rate": 3.970135223312103e-05, | |
| "loss": 1.4268063306808472, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1162079510703364, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 3.96315389425449e-05, | |
| "loss": 1.4923566579818726, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.12232415902140673, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 3.9554482765093134e-05, | |
| "loss": 1.574055790901184, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12844036697247707, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 3.947021535479083e-05, | |
| "loss": 1.415814995765686, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1345565749235474, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 3.937877132797649e-05, | |
| "loss": 1.4422385692596436, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.14067278287461774, | |
| "grad_norm": 0.375, | |
| "learning_rate": 3.928018824908187e-05, | |
| "loss": 1.4337655305862427, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.14678899082568808, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 3.917450661520088e-05, | |
| "loss": 1.5849357843399048, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1529051987767584, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 3.906176983945371e-05, | |
| "loss": 1.5441187620162964, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15902140672782875, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.894202423315306e-05, | |
| "loss": 1.4055734872817993, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1651376146788991, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 3.8815318986779875e-05, | |
| "loss": 1.543579339981079, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1712538226299694, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 3.868170614977628e-05, | |
| "loss": 1.4346128702163696, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.17737003058103976, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.854124060916415e-05, | |
| "loss": 1.5318936109542847, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1834862385321101, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 3.839398006699797e-05, | |
| "loss": 1.3749366998672485, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18960244648318042, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 3.823998501666131e-05, | |
| "loss": 1.436793565750122, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.19571865443425077, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 3.8079318718016666e-05, | |
| "loss": 1.3264566659927368, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.2018348623853211, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 3.7912047171418815e-05, | |
| "loss": 1.4578938484191895, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.20795107033639143, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 3.773823909060248e-05, | |
| "loss": 1.3888816833496094, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.21406727828746178, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 3.755796587445528e-05, | |
| "loss": 1.4028871059417725, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.22018348623853212, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 3.7371301577687666e-05, | |
| "loss": 1.3480572700500488, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.22629969418960244, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 3.717832288041188e-05, | |
| "loss": 1.4603177309036255, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.2324159021406728, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 3.697910905664243e-05, | |
| "loss": 1.4285566806793213, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.23853211009174313, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 3.6773741941730975e-05, | |
| "loss": 1.462870478630066, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.24464831804281345, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 3.656230589874905e-05, | |
| "loss": 1.3728106021881104, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.25076452599388377, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 3.6344887783832474e-05, | |
| "loss": 1.3789210319519043, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.25688073394495414, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 3.6121576910501517e-05, | |
| "loss": 1.438659429550171, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.26299694189602446, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 3.589246501297172e-05, | |
| "loss": 1.4267497062683105, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2691131498470948, | |
| "grad_norm": 0.265625, | |
| "learning_rate": 3.565764620847024e-05, | |
| "loss": 1.297946572303772, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.27522935779816515, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 3.541721695857328e-05, | |
| "loss": 1.3422534465789795, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.28134556574923547, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 3.5171276029580485e-05, | |
| "loss": 1.3573137521743774, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2874617737003058, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 3.4919924451942625e-05, | |
| "loss": 1.4575246572494507, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.29357798165137616, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 3.466326547875907e-05, | |
| "loss": 1.3683665990829468, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2996941896024465, | |
| "grad_norm": 0.244140625, | |
| "learning_rate": 3.440140454336236e-05, | |
| "loss": 1.2624497413635254, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.3058103975535168, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 3.413444921600705e-05, | |
| "loss": 1.4250640869140625, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3119266055045872, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 3.3862509159680775e-05, | |
| "loss": 1.4060899019241333, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.3180428134556575, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 3.3585696085055594e-05, | |
| "loss": 1.44656503200531, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.3241590214067278, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 3.330412370459823e-05, | |
| "loss": 1.5084904432296753, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.3302752293577982, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 3.301790768585793e-05, | |
| "loss": 1.4001638889312744, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3363914373088685, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 3.272716560395123e-05, | |
| "loss": 1.4370101690292358, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3425076452599388, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 3.243201689326306e-05, | |
| "loss": 1.4086581468582153, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3486238532110092, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 3.213258279838416e-05, | |
| "loss": 1.389402151107788, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.3547400611620795, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 3.182898632430479e-05, | |
| "loss": 1.371553897857666, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.36085626911314983, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 3.1521352185885326e-05, | |
| "loss": 1.4507074356079102, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3669724770642202, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 3.120980675662449e-05, | |
| "loss": 1.3349779844284058, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3730886850152905, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 3.0894478016746106e-05, | |
| "loss": 1.3503204584121704, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.37920489296636084, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 3.0575495500626015e-05, | |
| "loss": 1.4363230466842651, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3853211009174312, | |
| "grad_norm": 0.25390625, | |
| "learning_rate": 3.025299024358036e-05, | |
| "loss": 1.3254035711288452, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.39143730886850153, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 2.9927094728037422e-05, | |
| "loss": 1.410750389099121, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.39755351681957185, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 2.9597942829114976e-05, | |
| "loss": 1.397456169128418, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4036697247706422, | |
| "grad_norm": 0.5, | |
| "learning_rate": 2.926566975962551e-05, | |
| "loss": 1.3679834604263306, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.40978593272171254, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 2.8930412014531924e-05, | |
| "loss": 1.4191218614578247, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.41590214067278286, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 2.859230731487661e-05, | |
| "loss": 1.2690709829330444, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.42201834862385323, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 2.8251494551206767e-05, | |
| "loss": 1.3012070655822754, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.42813455657492355, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 2.7908113726519356e-05, | |
| "loss": 1.2863593101501465, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.43425076452599387, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 2.7562305898749054e-05, | |
| "loss": 1.442170262336731, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.44036697247706424, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 2.7214213122822864e-05, | |
| "loss": 1.3614513874053955, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.44648318042813456, | |
| "grad_norm": 0.28125, | |
| "learning_rate": 2.6863978392305118e-05, | |
| "loss": 1.2583218812942505, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.4525993883792049, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 2.651174558065697e-05, | |
| "loss": 1.4321554899215698, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.45871559633027525, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 2.6157659382134384e-05, | |
| "loss": 1.414106845855713, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4648318042813456, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 2.5801865252348935e-05, | |
| "loss": 1.4366655349731445, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4709480122324159, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 2.5444509348515912e-05, | |
| "loss": 1.3970258235931396, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.47706422018348627, | |
| "grad_norm": 3.078125, | |
| "learning_rate": 2.508573846941417e-05, | |
| "loss": 1.4000701904296875, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.4831804281345566, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 2.472569999508238e-05, | |
| "loss": 1.4267001152038574, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4892966360856269, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 2.4364541826276638e-05, | |
| "loss": 1.4192265272140503, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4954128440366973, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 2.4002412323714026e-05, | |
| "loss": 1.314334511756897, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.5015290519877675, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 2.363946024712732e-05, | |
| "loss": 1.355038046836853, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.5076452599388379, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 2.3275834694155716e-05, | |
| "loss": 1.2806464433670044, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.5137614678899083, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 2.2911685039096834e-05, | |
| "loss": 1.3307536840438843, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5198776758409785, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 2.2547160871544973e-05, | |
| "loss": 1.3525010347366333, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5259938837920489, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 2.2182411934941004e-05, | |
| "loss": 1.3771812915802002, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5321100917431193, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 2.1817588065059008e-05, | |
| "loss": 1.342699408531189, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5382262996941896, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.145283912845504e-05, | |
| "loss": 1.3631731271743774, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5443425076452599, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 2.1088314960903172e-05, | |
| "loss": 1.3468397855758667, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5504587155963303, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 2.072416530584429e-05, | |
| "loss": 1.3079278469085693, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5565749235474006, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 2.0360539752872688e-05, | |
| "loss": 1.3927726745605469, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5626911314984709, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1.9997587676285976e-05, | |
| "loss": 1.3745498657226562, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5688073394495413, | |
| "grad_norm": 0.259765625, | |
| "learning_rate": 1.9635458173723365e-05, | |
| "loss": 1.3934822082519531, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5749235474006116, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 1.9274300004917625e-05, | |
| "loss": 1.4668513536453247, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.581039755351682, | |
| "grad_norm": 0.375, | |
| "learning_rate": 1.8914261530585842e-05, | |
| "loss": 1.2894922494888306, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5871559633027523, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 1.855549065148409e-05, | |
| "loss": 1.3256540298461914, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5932721712538226, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 1.8198134747651067e-05, | |
| "loss": 1.3802309036254883, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.599388379204893, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 1.7842340617865625e-05, | |
| "loss": 1.3032485246658325, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.6055045871559633, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.748825441934303e-05, | |
| "loss": 1.236058235168457, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.6116207951070336, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 1.713602160769489e-05, | |
| "loss": 1.4403380155563354, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.617737003058104, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 1.6785786877177145e-05, | |
| "loss": 1.3596861362457275, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.6238532110091743, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 1.643769410125095e-05, | |
| "loss": 1.3777844905853271, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.6299694189602446, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 1.609188627348065e-05, | |
| "loss": 1.3549814224243164, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.636085626911315, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 1.574850544879324e-05, | |
| "loss": 1.2403396368026733, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6422018348623854, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 1.5407692685123392e-05, | |
| "loss": 1.3685152530670166, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6483180428134556, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 1.5069587985468078e-05, | |
| "loss": 1.3501070737838745, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.654434250764526, | |
| "grad_norm": 0.625, | |
| "learning_rate": 1.4734330240374504e-05, | |
| "loss": 1.292812466621399, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6605504587155964, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 1.4402057170885026e-05, | |
| "loss": 1.287298560142517, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.27734375, | |
| "learning_rate": 1.4072905271962585e-05, | |
| "loss": 1.3475459814071655, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.672782874617737, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 1.3747009756419657e-05, | |
| "loss": 1.2376188039779663, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6788990825688074, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 1.3424504499373994e-05, | |
| "loss": 1.315477728843689, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6850152905198776, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 1.31055219832539e-05, | |
| "loss": 1.364182949066162, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.691131498470948, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 1.2790193243375521e-05, | |
| "loss": 1.3197802305221558, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6972477064220184, | |
| "grad_norm": 0.26171875, | |
| "learning_rate": 1.2478647814114683e-05, | |
| "loss": 1.2909021377563477, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.7033639143730887, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 1.2171013675695222e-05, | |
| "loss": 1.2902398109436035, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.709480122324159, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.1867417201615848e-05, | |
| "loss": 1.3986810445785522, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.7155963302752294, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.1567983106736946e-05, | |
| "loss": 1.2758734226226807, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.7217125382262997, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 1.1272834396048777e-05, | |
| "loss": 1.317265272140503, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.72782874617737, | |
| "grad_norm": 3.296875, | |
| "learning_rate": 1.0982092314142068e-05, | |
| "loss": 1.4039820432662964, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.7339449541284404, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 1.0695876295401771e-05, | |
| "loss": 1.330233097076416, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7400611620795107, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 1.041430391494441e-05, | |
| "loss": 1.3207037448883057, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.746177370030581, | |
| "grad_norm": 0.2451171875, | |
| "learning_rate": 1.013749084031923e-05, | |
| "loss": 1.3986611366271973, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.7522935779816514, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 9.86555078399295e-06, | |
| "loss": 1.3146181106567383, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.7584097859327217, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 9.598595456637642e-06, | |
| "loss": 1.3460875749588013, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.764525993883792, | |
| "grad_norm": 0.267578125, | |
| "learning_rate": 9.336734521240934e-06, | |
| "loss": 1.2672019004821777, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7706422018348624, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 9.080075548057383e-06, | |
| "loss": 1.3179923295974731, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.7767584097859327, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8.828723970419519e-06, | |
| "loss": 1.4186333417892456, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.7828746177370031, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8.582783041426728e-06, | |
| "loss": 1.2970129251480103, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7889908256880734, | |
| "grad_norm": 0.21875, | |
| "learning_rate": 8.342353791529765e-06, | |
| "loss": 1.3273588418960571, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7951070336391437, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8.107534987028286e-06, | |
| "loss": 1.3187005519866943, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8012232415902141, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 7.878423089498492e-06, | |
| "loss": 1.288523554801941, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.8073394495412844, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 7.655112216167533e-06, | |
| "loss": 1.3323516845703125, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.8134556574923547, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 7.437694101250949e-06, | |
| "loss": 1.3667104244232178, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.8195718654434251, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 7.226258058269031e-06, | |
| "loss": 1.354952096939087, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.8256880733944955, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 7.020890943357573e-06, | |
| "loss": 1.2679226398468018, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8318042813455657, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 6.821677119588124e-06, | |
| "loss": 1.3207221031188965, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.8379204892966361, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 6.628698422312341e-06, | |
| "loss": 1.4085071086883545, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.8440366972477065, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 6.442034125544726e-06, | |
| "loss": 1.3547308444976807, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.8501529051987767, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 6.261760909397523e-06, | |
| "loss": 1.346556305885315, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.8562691131498471, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 6.087952828581187e-06, | |
| "loss": 1.3009809255599976, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8623853211009175, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 5.920681281983339e-06, | |
| "loss": 1.3994947671890259, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.8685015290519877, | |
| "grad_norm": 0.23046875, | |
| "learning_rate": 5.760014983338687e-06, | |
| "loss": 1.291093111038208, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.8746177370030581, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 5.606019933002025e-06, | |
| "loss": 1.2565807104110718, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.8807339449541285, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 5.458759390835851e-06, | |
| "loss": 1.2513775825500488, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.8868501529051988, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 5.318293850223726e-06, | |
| "loss": 1.3214612007141113, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8929663608562691, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 5.184681013220134e-06, | |
| "loss": 1.4418059587478638, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8990825688073395, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 5.057975766846941e-06, | |
| "loss": 1.3208688497543335, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.9051987767584098, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 4.938230160546292e-06, | |
| "loss": 1.4112577438354492, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.9113149847094801, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 4.825493384799122e-06, | |
| "loss": 1.356927514076233, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.9174311926605505, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 4.719811750918136e-06, | |
| "loss": 1.3079042434692383, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9235474006116208, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 4.621228672023517e-06, | |
| "loss": 1.2854888439178467, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.9296636085626911, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 4.529784645209172e-06, | |
| "loss": 1.2977776527404785, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.9357798165137615, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 4.4455172349068696e-06, | |
| "loss": 1.2907675504684448, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.9418960244648318, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.3684610574551e-06, | |
| "loss": 1.2298390865325928, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.9480122324159022, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 4.298647766878974e-06, | |
| "loss": 1.3057259321212769, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9541284403669725, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 4.236106041887007e-06, | |
| "loss": 1.3313639163970947, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.9602446483180428, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 4.180861574090156e-06, | |
| "loss": 1.3252257108688354, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.9663608562691132, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 4.1329370574479296e-06, | |
| "loss": 1.258653163909912, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.9724770642201835, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 4.0923521789458876e-06, | |
| "loss": 1.3827545642852783, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.9785932721712538, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 4.059123610508407e-06, | |
| "loss": 1.2586948871612549, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9847094801223242, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 4.033265002149985e-06, | |
| "loss": 1.1728885173797607, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.9908256880733946, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 4.014786976367939e-06, | |
| "loss": 1.230262041091919, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.9969418960244648, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.00369712377877e-06, | |
| "loss": 1.3188743591308594, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 327, | |
| "total_flos": 8.183535699912294e+17, | |
| "train_loss": 1.4031060467437138, | |
| "train_runtime": 3980.916, | |
| "train_samples_per_second": 2.629, | |
| "train_steps_per_second": 0.082 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 327, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 9999999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.183535699912294e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |