Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-45 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-45 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-45") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-45") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-45") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-45 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-45" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-45", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-45
- SGLang
How to use furproxy/9b-45 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-45" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-45", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-45" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-45", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-45 with Docker Model Runner:
docker model run hf.co/furproxy/9b-45
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 532, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.015037593984962405, | |
| "grad_norm": 0.4758685231208801, | |
| "learning_rate": 7.407407407407407e-07, | |
| "loss": 1.9719613790512085, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.03007518796992481, | |
| "grad_norm": 1.2873609066009521, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 2.2840397357940674, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.045112781954887216, | |
| "grad_norm": 0.4226410984992981, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 2.0337564945220947, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.06015037593984962, | |
| "grad_norm": 0.20389820635318756, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 1.938320517539978, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.07518796992481203, | |
| "grad_norm": 1.1121097803115845, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.9521509408950806, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09022556390977443, | |
| "grad_norm": 1.0452852249145508, | |
| "learning_rate": 8.148148148148148e-06, | |
| "loss": 2.3038036823272705, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 0.17127464711666107, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 1.9031141996383667, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.12030075187969924, | |
| "grad_norm": 1.0378482341766357, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 2.545740842819214, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.13533834586466165, | |
| "grad_norm": 1.3770403861999512, | |
| "learning_rate": 1.2592592592592593e-05, | |
| "loss": 3.2497336864471436, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.15037593984962405, | |
| "grad_norm": 0.6125630140304565, | |
| "learning_rate": 1.4074074074074075e-05, | |
| "loss": 1.7776570320129395, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16541353383458646, | |
| "grad_norm": 0.6343645453453064, | |
| "learning_rate": 1.555555555555556e-05, | |
| "loss": 1.9716706275939941, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.18045112781954886, | |
| "grad_norm": 0.6397086977958679, | |
| "learning_rate": 1.7037037037037038e-05, | |
| "loss": 1.9381436109542847, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.19548872180451127, | |
| "grad_norm": 1.0100289583206177, | |
| "learning_rate": 1.851851851851852e-05, | |
| "loss": 1.9333921670913696, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 0.6244422793388367, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7134058475494385, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.22556390977443608, | |
| "grad_norm": 1.4532567262649536, | |
| "learning_rate": 1.9994079505294254e-05, | |
| "loss": 2.0251219272613525, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.24060150375939848, | |
| "grad_norm": 0.5191428661346436, | |
| "learning_rate": 1.9976326268767035e-05, | |
| "loss": 1.578896164894104, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2556390977443609, | |
| "grad_norm": 0.2822546064853668, | |
| "learning_rate": 1.994676502169901e-05, | |
| "loss": 1.5274699926376343, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.2706766917293233, | |
| "grad_norm": 0.26378244161605835, | |
| "learning_rate": 1.9905436944609424e-05, | |
| "loss": 1.510546326637268, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 2.3758437633514404, | |
| "learning_rate": 1.9852399609889242e-05, | |
| "loss": 2.110567331314087, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.3007518796992481, | |
| "grad_norm": 0.419758677482605, | |
| "learning_rate": 1.9787726901599502e-05, | |
| "loss": 1.1862285137176514, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 0.24496647715568542, | |
| "learning_rate": 1.9711508912546566e-05, | |
| "loss": 1.440342903137207, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.3308270676691729, | |
| "grad_norm": 0.3002743721008301, | |
| "learning_rate": 1.9623851818777652e-05, | |
| "loss": 1.0962785482406616, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.3458646616541353, | |
| "grad_norm": 0.11880263686180115, | |
| "learning_rate": 1.9524877731671482e-05, | |
| "loss": 1.493391513824463, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.3609022556390977, | |
| "grad_norm": 1.7871476411819458, | |
| "learning_rate": 1.941472452783011e-05, | |
| "loss": 1.2433573007583618, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.37593984962406013, | |
| "grad_norm": 0.2921432852745056, | |
| "learning_rate": 1.9293545657008865e-05, | |
| "loss": 1.1390293836593628, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.39097744360902253, | |
| "grad_norm": 0.12673601508140564, | |
| "learning_rate": 1.9161509928352017e-05, | |
| "loss": 1.0903499126434326, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.40601503759398494, | |
| "grad_norm": 0.1648157387971878, | |
| "learning_rate": 1.901880127523192e-05, | |
| "loss": 1.1544872522354126, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 0.08962542563676834, | |
| "learning_rate": 1.886561849901922e-05, | |
| "loss": 1.0822100639343262, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.43609022556390975, | |
| "grad_norm": 0.724937379360199, | |
| "learning_rate": 1.870217499214111e-05, | |
| "loss": 1.0416043996810913, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.45112781954887216, | |
| "grad_norm": 0.19841401278972626, | |
| "learning_rate": 1.8528698440813397e-05, | |
| "loss": 1.1815505027770996, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.46616541353383456, | |
| "grad_norm": 0.2616511285305023, | |
| "learning_rate": 1.8345430507860478e-05, | |
| "loss": 0.9047210812568665, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.48120300751879697, | |
| "grad_norm": 0.22353343665599823, | |
| "learning_rate": 1.8152626496065128e-05, | |
| "loss": 1.201892614364624, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.49624060150375937, | |
| "grad_norm": 0.6634237766265869, | |
| "learning_rate": 1.7950554992517014e-05, | |
| "loss": 1.1795772314071655, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.5112781954887218, | |
| "grad_norm": 0.13962750136852264, | |
| "learning_rate": 1.7739497494455412e-05, | |
| "loss": 1.3534270524978638, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.665572464466095, | |
| "learning_rate": 1.7519748017127354e-05, | |
| "loss": 1.125345230102539, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5413533834586466, | |
| "grad_norm": 0.179820716381073, | |
| "learning_rate": 1.729161268420746e-05, | |
| "loss": 0.8090606331825256, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.556390977443609, | |
| "grad_norm": 0.11726067215204239, | |
| "learning_rate": 1.7055409301350013e-05, | |
| "loss": 0.9213717579841614, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.1627010852098465, | |
| "learning_rate": 1.681146691346742e-05, | |
| "loss": 1.0821505784988403, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.5864661654135338, | |
| "grad_norm": 0.1170893982052803, | |
| "learning_rate": 1.6560125346351663e-05, | |
| "loss": 1.300316333770752, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.6015037593984962, | |
| "grad_norm": 0.5334263443946838, | |
| "learning_rate": 1.6301734733277442e-05, | |
| "loss": 0.6484270691871643, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6165413533834586, | |
| "grad_norm": 0.101778045296669, | |
| "learning_rate": 1.603665502724633e-05, | |
| "loss": 1.2577356100082397, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 0.1670590043067932, | |
| "learning_rate": 1.576525549955156e-05, | |
| "loss": 1.3543009757995605, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.6466165413533834, | |
| "grad_norm": 0.20087628066539764, | |
| "learning_rate": 1.548791422536178e-05, | |
| "loss": 0.9327285885810852, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.6616541353383458, | |
| "grad_norm": 0.23011328279972076, | |
| "learning_rate": 1.5205017557040656e-05, | |
| "loss": 1.1237722635269165, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.6766917293233082, | |
| "grad_norm": 0.159201517701149, | |
| "learning_rate": 1.4916959585935732e-05, | |
| "loss": 1.1964070796966553, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6917293233082706, | |
| "grad_norm": 0.44444212317466736, | |
| "learning_rate": 1.4624141593386507e-05, | |
| "loss": 1.0308165550231934, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.706766917293233, | |
| "grad_norm": 0.22338902950286865, | |
| "learning_rate": 1.4326971491716427e-05, | |
| "loss": 0.9982426762580872, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.7218045112781954, | |
| "grad_norm": 0.2096806913614273, | |
| "learning_rate": 1.402586325598752e-05, | |
| "loss": 1.3940727710723877, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 0.21466873586177826, | |
| "learning_rate": 1.3721236347309314e-05, | |
| "loss": 1.1801196336746216, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.7518796992481203, | |
| "grad_norm": 0.30590999126434326, | |
| "learning_rate": 1.3413515128505363e-05, | |
| "loss": 0.6430416703224182, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7669172932330827, | |
| "grad_norm": 0.13401509821414948, | |
| "learning_rate": 1.3103128272951363e-05, | |
| "loss": 1.3783133029937744, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.7819548872180451, | |
| "grad_norm": 0.12506185472011566, | |
| "learning_rate": 1.2790508167408509e-05, | |
| "loss": 0.9889219403266907, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.7969924812030075, | |
| "grad_norm": 0.22044184803962708, | |
| "learning_rate": 1.2476090309683804e-05, | |
| "loss": 0.6871194243431091, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.8120300751879699, | |
| "grad_norm": 0.18642327189445496, | |
| "learning_rate": 1.2160312701956553e-05, | |
| "loss": 1.0068978071212769, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.8270676691729323, | |
| "grad_norm": 0.5397107601165771, | |
| "learning_rate": 1.1843615240616111e-05, | |
| "loss": 0.8988245725631714, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 0.23860491812229156, | |
| "learning_rate": 1.1526439103460874e-05, | |
| "loss": 0.7688661813735962, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.1502920240163803, | |
| "learning_rate": 1.120922613511221e-05, | |
| "loss": 1.0024021863937378, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.8721804511278195, | |
| "grad_norm": 0.16608496010303497, | |
| "learning_rate": 1.0892418231499461e-05, | |
| "loss": 1.4191375970840454, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.8872180451127819, | |
| "grad_norm": 0.17546993494033813, | |
| "learning_rate": 1.057645672427347e-05, | |
| "loss": 1.073761224746704, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.9022556390977443, | |
| "grad_norm": 5.0469841957092285, | |
| "learning_rate": 1.0261781766006174e-05, | |
| "loss": 0.9056495428085327, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.9172932330827067, | |
| "grad_norm": 0.36213457584381104, | |
| "learning_rate": 9.948831717032738e-06, | |
| "loss": 0.8672894835472107, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.9323308270676691, | |
| "grad_norm": 0.4218904972076416, | |
| "learning_rate": 9.638042534790373e-06, | |
| "loss": 0.827739417552948, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.9473684210526315, | |
| "grad_norm": 0.0983240008354187, | |
| "learning_rate": 9.329847166504497e-06, | |
| "loss": 0.7799423933029175, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.9624060150375939, | |
| "grad_norm": 0.14850644767284393, | |
| "learning_rate": 9.024674946068357e-06, | |
| "loss": 1.0653791427612305, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.9774436090225563, | |
| "grad_norm": 0.27504855394363403, | |
| "learning_rate": 8.722950995956172e-06, | |
| "loss": 0.9135006666183472, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9924812030075187, | |
| "grad_norm": 0.27222368121147156, | |
| "learning_rate": 8.425095635003053e-06, | |
| "loss": 1.0402815341949463, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.0075187969924813, | |
| "grad_norm": 0.1408149003982544, | |
| "learning_rate": 8.13152379287667e-06, | |
| "loss": 0.7629735469818115, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.0225563909774436, | |
| "grad_norm": 0.15156933665275574, | |
| "learning_rate": 7.842644432056336e-06, | |
| "loss": 0.9513287544250488, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.037593984962406, | |
| "grad_norm": 0.9048015475273132, | |
| "learning_rate": 7.55885997812472e-06, | |
| "loss": 0.6946667432785034, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 0.24624674022197723, | |
| "learning_rate": 7.280565759165833e-06, | |
| "loss": 0.7876754403114319, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.0676691729323309, | |
| "grad_norm": 0.09362401068210602, | |
| "learning_rate": 7.008149455050264e-06, | |
| "loss": 0.8406305909156799, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.0827067669172932, | |
| "grad_norm": 0.17641493678092957, | |
| "learning_rate": 6.741990557374784e-06, | |
| "loss": 1.0424906015396118, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.0977443609022557, | |
| "grad_norm": 0.12022742629051208, | |
| "learning_rate": 6.4824598408087015e-06, | |
| "loss": 1.0722497701644897, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.112781954887218, | |
| "grad_norm": 0.08780567348003387, | |
| "learning_rate": 6.229918846583414e-06, | |
| "loss": 1.0312786102294922, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.1278195488721805, | |
| "grad_norm": 0.3304075598716736, | |
| "learning_rate": 5.984719378844628e-06, | |
| "loss": 1.0075746774673462, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.10900267213582993, | |
| "learning_rate": 5.7472030145689604e-06, | |
| "loss": 1.0427347421646118, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.1578947368421053, | |
| "grad_norm": 0.09693319350481033, | |
| "learning_rate": 5.51770062772752e-06, | |
| "loss": 1.0927042961120605, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.1729323308270676, | |
| "grad_norm": 0.12883000075817108, | |
| "learning_rate": 5.296531928359431e-06, | |
| "loss": 0.9705473780632019, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.1879699248120301, | |
| "grad_norm": 0.11416517943143845, | |
| "learning_rate": 5.084005017197318e-06, | |
| "loss": 1.0172467231750488, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.2030075187969924, | |
| "grad_norm": 0.10797861963510513, | |
| "learning_rate": 4.8804159564652665e-06, | |
| "loss": 0.5409541726112366, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.218045112781955, | |
| "grad_norm": 0.24861538410186768, | |
| "learning_rate": 4.686048357447095e-06, | |
| "loss": 0.9430153965950012, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.2330827067669172, | |
| "grad_norm": 0.14524255692958832, | |
| "learning_rate": 4.501172985399498e-06, | |
| "loss": 1.178081750869751, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.2481203007518797, | |
| "grad_norm": 0.2940176725387573, | |
| "learning_rate": 4.326047382360457e-06, | |
| "loss": 0.8844167590141296, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.263157894736842, | |
| "grad_norm": 0.1306038349866867, | |
| "learning_rate": 4.160915508378359e-06, | |
| "loss": 0.7063813209533691, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.2781954887218046, | |
| "grad_norm": 0.15677547454833984, | |
| "learning_rate": 4.006007401661596e-06, | |
| "loss": 0.7762787938117981, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.2932330827067668, | |
| "grad_norm": 0.14480236172676086, | |
| "learning_rate": 3.861538858122092e-06, | |
| "loss": 0.937335193157196, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.3082706766917294, | |
| "grad_norm": 0.15142542123794556, | |
| "learning_rate": 3.727711130759182e-06, | |
| "loss": 0.9747655987739563, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.3233082706766917, | |
| "grad_norm": 0.18726477026939392, | |
| "learning_rate": 3.6047106493025923e-06, | |
| "loss": 0.7746855020523071, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.3383458646616542, | |
| "grad_norm": 2.189209461212158, | |
| "learning_rate": 3.492708760505093e-06, | |
| "loss": 0.6926825642585754, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.3533834586466165, | |
| "grad_norm": 0.12323262542486191, | |
| "learning_rate": 3.3918614894466045e-06, | |
| "loss": 1.2502151727676392, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.368421052631579, | |
| "grad_norm": 0.1458672434091568, | |
| "learning_rate": 3.3023093221822746e-06, | |
| "loss": 0.9960780143737793, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.3834586466165413, | |
| "grad_norm": 0.1072113886475563, | |
| "learning_rate": 3.224177010037323e-06, | |
| "loss": 0.8326720595359802, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.3984962406015038, | |
| "grad_norm": 0.09054487943649292, | |
| "learning_rate": 3.1575733958212563e-06, | |
| "loss": 1.1920455694198608, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.413533834586466, | |
| "grad_norm": 0.13850678503513336, | |
| "learning_rate": 3.1025912622035687e-06, | |
| "loss": 0.5979896783828735, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.2317809760570526, | |
| "learning_rate": 3.0593072024621396e-06, | |
| "loss": 0.969947099685669, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.443609022556391, | |
| "grad_norm": 0.29049012064933777, | |
| "learning_rate": 3.0277815137843917e-06, | |
| "loss": 0.7010709643363953, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.4586466165413534, | |
| "grad_norm": 0.17280922830104828, | |
| "learning_rate": 3.008058113269836e-06, | |
| "loss": 0.7660905718803406, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.4736842105263157, | |
| "grad_norm": 0.12446308135986328, | |
| "learning_rate": 3.0001644767510154e-06, | |
| "loss": 0.9958880543708801, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.4887218045112782, | |
| "grad_norm": 0.1736506223678589, | |
| "learning_rate": 3.0041116005181016e-06, | |
| "loss": 0.9368724226951599, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.5037593984962405, | |
| "grad_norm": 0.15870751440525055, | |
| "learning_rate": 3.0198939860004202e-06, | |
| "loss": 0.9826479554176331, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.518796992481203, | |
| "grad_norm": 0.15492001175880432, | |
| "learning_rate": 3.0474896474262772e-06, | |
| "loss": 1.3954254388809204, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.5338345864661656, | |
| "grad_norm": 0.1334741860628128, | |
| "learning_rate": 3.08686014245041e-06, | |
| "loss": 1.0462982654571533, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.5488721804511278, | |
| "grad_norm": 0.26873454451560974, | |
| "learning_rate": 3.1379506257063825e-06, | |
| "loss": 0.8277729153633118, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.5639097744360901, | |
| "grad_norm": 0.16168057918548584, | |
| "learning_rate": 3.20068992520934e-06, | |
| "loss": 0.985795259475708, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 0.1431337147951126, | |
| "learning_rate": 3.274990641502683e-06, | |
| "loss": 0.908364474773407, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.5939849624060152, | |
| "grad_norm": 0.2742830514907837, | |
| "learning_rate": 3.3607492694105405e-06, | |
| "loss": 0.6370347142219543, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.6090225563909775, | |
| "grad_norm": 0.14003214240074158, | |
| "learning_rate": 3.457846342226442e-06, | |
| "loss": 0.9584491848945618, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.6240601503759398, | |
| "grad_norm": 0.29483214020729065, | |
| "learning_rate": 3.5661465981373183e-06, | |
| "loss": 1.0774601697921753, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.6390977443609023, | |
| "grad_norm": 0.18227490782737732, | |
| "learning_rate": 3.6854991686509906e-06, | |
| "loss": 0.7987947463989258, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.6541353383458648, | |
| "grad_norm": 0.20908379554748535, | |
| "learning_rate": 3.815737788764674e-06, | |
| "loss": 0.8292507529258728, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.669172932330827, | |
| "grad_norm": 0.23883360624313354, | |
| "learning_rate": 3.956681028581693e-06, | |
| "loss": 0.9586336612701416, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.6842105263157894, | |
| "grad_norm": 0.3358386158943176, | |
| "learning_rate": 4.108132546053779e-06, | |
| "loss": 0.8150299191474915, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.699248120300752, | |
| "grad_norm": 0.298909068107605, | |
| "learning_rate": 4.269881360496842e-06, | |
| "loss": 1.0946331024169922, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.24479779601097107, | |
| "learning_rate": 4.441702146499222e-06, | |
| "loss": 0.9061790108680725, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.7293233082706767, | |
| "grad_norm": 0.2154252976179123, | |
| "learning_rate": 4.623355547812946e-06, | |
| "loss": 1.0011441707611084, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.744360902255639, | |
| "grad_norm": 0.1481187492609024, | |
| "learning_rate": 4.814588510790782e-06, | |
| "loss": 1.1533119678497314, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.7593984962406015, | |
| "grad_norm": 0.10200405865907669, | |
| "learning_rate": 5.01513463690452e-06, | |
| "loss": 1.1073795557022095, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.774436090225564, | |
| "grad_norm": 0.19221612811088562, | |
| "learning_rate": 5.224714553853478e-06, | |
| "loss": 1.148139476776123, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.7894736842105263, | |
| "grad_norm": 0.1456657350063324, | |
| "learning_rate": 5.443036304746191e-06, | |
| "loss": 0.6271846294403076, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.8045112781954886, | |
| "grad_norm": 0.3602541983127594, | |
| "learning_rate": 5.66979575481317e-06, | |
| "loss": 1.0237879753112793, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.8195488721804511, | |
| "grad_norm": 0.14290253818035126, | |
| "learning_rate": 5.904677015084159e-06, | |
| "loss": 0.7585715055465698, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.8345864661654137, | |
| "grad_norm": 0.15334200859069824, | |
| "learning_rate": 6.147352882439652e-06, | |
| "loss": 1.1932705640792847, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.849624060150376, | |
| "grad_norm": 0.09177304059267044, | |
| "learning_rate": 6.397485295423669e-06, | |
| "loss": 1.1568275690078735, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.8646616541353382, | |
| "grad_norm": 0.08284302055835724, | |
| "learning_rate": 6.6547258051828426e-06, | |
| "loss": 0.9782670736312866, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.8796992481203008, | |
| "grad_norm": 0.12089983373880386, | |
| "learning_rate": 6.918716060875743e-06, | |
| "loss": 1.2261128425598145, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.8947368421052633, | |
| "grad_norm": 0.1200575977563858, | |
| "learning_rate": 7.1890883088761885e-06, | |
| "loss": 1.0725328922271729, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.9097744360902256, | |
| "grad_norm": 0.2572805881500244, | |
| "learning_rate": 7.4654659050752845e-06, | |
| "loss": 1.2271314859390259, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.9248120300751879, | |
| "grad_norm": 0.2713569104671478, | |
| "learning_rate": 7.747463839568292e-06, | |
| "loss": 0.8813698291778564, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.9398496240601504, | |
| "grad_norm": 0.5769055485725403, | |
| "learning_rate": 8.034689272995649e-06, | |
| "loss": 0.5630529522895813, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.954887218045113, | |
| "grad_norm": 0.13255445659160614, | |
| "learning_rate": 8.32674208379076e-06, | |
| "loss": 1.0115076303482056, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.9699248120300752, | |
| "grad_norm": 0.15804584324359894, | |
| "learning_rate": 8.623215425572433e-06, | |
| "loss": 0.9677861332893372, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.9849624060150375, | |
| "grad_norm": 0.09761196374893188, | |
| "learning_rate": 8.923696293905279e-06, | |
| "loss": 0.9606061577796936, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.24307098984718323, | |
| "learning_rate": 9.227766101638668e-06, | |
| "loss": 0.8462581038475037, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 2.0150375939849625, | |
| "grad_norm": 0.15874817967414856, | |
| "learning_rate": 9.535001262022755e-06, | |
| "loss": 0.5168899893760681, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 2.030075187969925, | |
| "grad_norm": 0.09884876012802124, | |
| "learning_rate": 9.844973778789198e-06, | |
| "loss": 0.725158154964447, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.045112781954887, | |
| "grad_norm": 0.11747175455093384, | |
| "learning_rate": 1.0157251842374587e-05, | |
| "loss": 0.5184513330459595, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 2.0601503759398496, | |
| "grad_norm": 0.17868469655513763, | |
| "learning_rate": 1.0471400431456062e-05, | |
| "loss": 1.0087833404541016, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 2.075187969924812, | |
| "grad_norm": 0.12505443394184113, | |
| "learning_rate": 1.078698191896105e-05, | |
| "loss": 0.9181981682777405, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 2.090225563909774, | |
| "grad_norm": 0.45129311084747314, | |
| "learning_rate": 1.1103556681706949e-05, | |
| "loss": 0.867568850517273, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 0.38053497672080994, | |
| "learning_rate": 1.1420683712821577e-05, | |
| "loss": 0.8937767744064331, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.1203007518796992, | |
| "grad_norm": 0.2623603045940399, | |
| "learning_rate": 1.1737921236091097e-05, | |
| "loss": 1.0325863361358643, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 2.1353383458646618, | |
| "grad_norm": 0.13423830270767212, | |
| "learning_rate": 1.2054827321379756e-05, | |
| "loss": 0.6633681654930115, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 2.1503759398496243, | |
| "grad_norm": 0.20472559332847595, | |
| "learning_rate": 1.237096050026399e-05, | |
| "loss": 0.998541533946991, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 2.1654135338345863, | |
| "grad_norm": 0.28264734148979187, | |
| "learning_rate": 1.2685880381023343e-05, | |
| "loss": 1.1486636400222778, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 2.180451127819549, | |
| "grad_norm": 0.11129236966371536, | |
| "learning_rate": 1.2999148262131527e-05, | |
| "loss": 1.1767593622207642, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.1954887218045114, | |
| "grad_norm": 0.17632298171520233, | |
| "learning_rate": 1.3310327743392861e-05, | |
| "loss": 0.8024111986160278, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 2.2105263157894735, | |
| "grad_norm": 0.1285456418991089, | |
| "learning_rate": 1.3618985333872927e-05, | |
| "loss": 0.9202947616577148, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.225563909774436, | |
| "grad_norm": 0.2532704770565033, | |
| "learning_rate": 1.3924691055776338e-05, | |
| "loss": 0.3760508596897125, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 2.2406015037593985, | |
| "grad_norm": 0.18166831135749817, | |
| "learning_rate": 1.4227019043430623e-05, | |
| "loss": 1.2139897346496582, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 2.255639097744361, | |
| "grad_norm": 0.31214964389801025, | |
| "learning_rate": 1.4525548136541599e-05, | |
| "loss": 1.1467747688293457, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.2706766917293235, | |
| "grad_norm": 0.2208021879196167, | |
| "learning_rate": 1.4819862466893891e-05, | |
| "loss": 1.152048110961914, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.28248584270477295, | |
| "learning_rate": 1.5109552037679353e-05, | |
| "loss": 0.8999449610710144, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 2.300751879699248, | |
| "grad_norm": 0.17761224508285522, | |
| "learning_rate": 1.539421329464618e-05, | |
| "loss": 0.8530235290527344, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.3157894736842106, | |
| "grad_norm": 0.12931574881076813, | |
| "learning_rate": 1.5673449688273246e-05, | |
| "loss": 1.1707788705825806, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 2.3308270676691727, | |
| "grad_norm": 0.45592963695526123, | |
| "learning_rate": 1.5946872226186394e-05, | |
| "loss": 0.543804407119751, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.345864661654135, | |
| "grad_norm": 0.13944678008556366, | |
| "learning_rate": 1.621410001504725e-05, | |
| "loss": 0.9259825944900513, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.3609022556390977, | |
| "grad_norm": 0.6154989004135132, | |
| "learning_rate": 1.6474760791159528e-05, | |
| "loss": 0.6412667036056519, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 2.3759398496240602, | |
| "grad_norm": 0.19801302254199982, | |
| "learning_rate": 1.672849143905383e-05, | |
| "loss": 1.0553992986679077, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 2.3909774436090228, | |
| "grad_norm": 0.16842691600322723, | |
| "learning_rate": 1.697493849732838e-05, | |
| "loss": 0.9918773174285889, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.406015037593985, | |
| "grad_norm": 0.17690639197826385, | |
| "learning_rate": 1.7213758651041114e-05, | |
| "loss": 1.183601975440979, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.4210526315789473, | |
| "grad_norm": 0.1753586232662201, | |
| "learning_rate": 1.7444619209967163e-05, | |
| "loss": 1.2674732208251953, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 2.43609022556391, | |
| "grad_norm": 0.1980062872171402, | |
| "learning_rate": 1.766719857205551e-05, | |
| "loss": 0.9618882536888123, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.451127819548872, | |
| "grad_norm": 0.14773985743522644, | |
| "learning_rate": 1.7881186671439107e-05, | |
| "loss": 1.1368170976638794, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.4661654135338344, | |
| "grad_norm": 0.3491007685661316, | |
| "learning_rate": 1.8086285410374563e-05, | |
| "loss": 0.9204502701759338, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.481203007518797, | |
| "grad_norm": 0.41794508695602417, | |
| "learning_rate": 1.8282209074509393e-05, | |
| "loss": 0.7325559258460999, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.4962406015037595, | |
| "grad_norm": 0.1503211408853531, | |
| "learning_rate": 1.8468684730898602e-05, | |
| "loss": 1.1228058338165283, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.511278195488722, | |
| "grad_norm": 0.29104840755462646, | |
| "learning_rate": 1.864545260821598e-05, | |
| "loss": 0.6551952958106995, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.526315789473684, | |
| "grad_norm": 0.17387054860591888, | |
| "learning_rate": 1.8812266458630505e-05, | |
| "loss": 0.7751361727714539, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.5413533834586466, | |
| "grad_norm": 0.7814794778823853, | |
| "learning_rate": 1.8968893900843786e-05, | |
| "loss": 0.7552028298377991, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.556390977443609, | |
| "grad_norm": 0.15030670166015625, | |
| "learning_rate": 1.9115116743810537e-05, | |
| "loss": 1.1345906257629395, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.2455630898475647, | |
| "learning_rate": 1.9250731290691326e-05, | |
| "loss": 1.0457109212875366, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.5864661654135337, | |
| "grad_norm": 0.17742854356765747, | |
| "learning_rate": 1.9375548622613952e-05, | |
| "loss": 0.8526303768157959, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.601503759398496, | |
| "grad_norm": 0.35299667716026306, | |
| "learning_rate": 1.9489394861848333e-05, | |
| "loss": 0.6485392451286316, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.6165413533834587, | |
| "grad_norm": 0.24292704463005066, | |
| "learning_rate": 1.9592111414028205e-05, | |
| "loss": 0.8217464685440063, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 0.11688588559627533, | |
| "learning_rate": 1.9683555189082178e-05, | |
| "loss": 0.9268519282341003, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.6466165413533833, | |
| "grad_norm": 0.8352806568145752, | |
| "learning_rate": 1.9763598800566478e-05, | |
| "loss": 0.7672562599182129, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.661654135338346, | |
| "grad_norm": 0.10471988469362259, | |
| "learning_rate": 1.983213074312156e-05, | |
| "loss": 1.0508689880371094, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.6766917293233083, | |
| "grad_norm": 0.9734071493148804, | |
| "learning_rate": 1.9889055547805527e-05, | |
| "loss": 0.8092398047447205, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.6917293233082704, | |
| "grad_norm": 0.22569192945957184, | |
| "learning_rate": 1.993429391508785e-05, | |
| "loss": 0.4492420554161072, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.706766917293233, | |
| "grad_norm": 0.19527536630630493, | |
| "learning_rate": 1.9967782825318183e-05, | |
| "loss": 1.1782996654510498, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.7218045112781954, | |
| "grad_norm": 0.4661547839641571, | |
| "learning_rate": 1.998947562651635e-05, | |
| "loss": 0.6154137253761292, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.736842105263158, | |
| "grad_norm": 0.2177896499633789, | |
| "learning_rate": 1.999934209936126e-05, | |
| "loss": 0.7517685890197754, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.7518796992481205, | |
| "grad_norm": 0.18744942545890808, | |
| "learning_rate": 1.9997368499288147e-05, | |
| "loss": 0.8893219232559204, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.7669172932330826, | |
| "grad_norm": 0.12195390462875366, | |
| "learning_rate": 1.9983557575635553e-05, | |
| "loss": 0.7814352512359619, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.781954887218045, | |
| "grad_norm": 0.20601901412010193, | |
| "learning_rate": 1.9957928567815327e-05, | |
| "loss": 0.8266798853874207, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.7969924812030076, | |
| "grad_norm": 0.24167893826961517, | |
| "learning_rate": 1.9920517178511025e-05, | |
| "loss": 0.9625144004821777, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.8120300751879697, | |
| "grad_norm": 0.20055215060710907, | |
| "learning_rate": 1.9871375523942014e-05, | |
| "loss": 0.7290459275245667, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.827067669172932, | |
| "grad_norm": 0.349552720785141, | |
| "learning_rate": 1.981057206126258e-05, | |
| "loss": 1.0641855001449585, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.8421052631578947, | |
| "grad_norm": 0.19066953659057617, | |
| "learning_rate": 1.9738191493197164e-05, | |
| "loss": 1.1699275970458984, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.4483891725540161, | |
| "learning_rate": 1.96543346500446e-05, | |
| "loss": 0.8162869811058044, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.8721804511278197, | |
| "grad_norm": 0.3727665841579437, | |
| "learning_rate": 1.9559118349215685e-05, | |
| "loss": 0.9681864380836487, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.887218045112782, | |
| "grad_norm": 0.8629833459854126, | |
| "learning_rate": 1.9452675232499833e-05, | |
| "loss": 0.9782270789146423, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.9022556390977443, | |
| "grad_norm": 0.40549924969673157, | |
| "learning_rate": 1.9335153581287384e-05, | |
| "loss": 0.7416080832481384, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.917293233082707, | |
| "grad_norm": 0.2615848481655121, | |
| "learning_rate": 1.920671711000512e-05, | |
| "loss": 0.9780222773551941, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.932330827067669, | |
| "grad_norm": 0.1308208554983139, | |
| "learning_rate": 1.906754473805263e-05, | |
| "loss": 0.8454363942146301, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.9473684210526314, | |
| "grad_norm": 0.19276592135429382, | |
| "learning_rate": 1.8917830340557287e-05, | |
| "loss": 0.974253237247467, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.962406015037594, | |
| "grad_norm": 0.40342700481414795, | |
| "learning_rate": 1.875778247829503e-05, | |
| "loss": 0.767073392868042, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.9774436090225564, | |
| "grad_norm": 0.28507354855537415, | |
| "learning_rate": 1.8587624107153192e-05, | |
| "loss": 1.0176156759262085, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.992481203007519, | |
| "grad_norm": 0.08241955190896988, | |
| "learning_rate": 1.840759226754013e-05, | |
| "loss": 0.8344214558601379, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 3.007518796992481, | |
| "grad_norm": 0.11373747140169144, | |
| "learning_rate": 1.8217937754174265e-05, | |
| "loss": 0.9761697053909302, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.0225563909774436, | |
| "grad_norm": 0.34416520595550537, | |
| "learning_rate": 1.8018924766712608e-05, | |
| "loss": 0.7501745223999023, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 3.037593984962406, | |
| "grad_norm": 0.7743138074874878, | |
| "learning_rate": 1.7810830541705425e-05, | |
| "loss": 0.9392253160476685, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 3.0526315789473686, | |
| "grad_norm": 0.2988702356815338, | |
| "learning_rate": 1.7593944966389767e-05, | |
| "loss": 0.6723920702934265, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 3.0676691729323307, | |
| "grad_norm": 0.1014496460556984, | |
| "learning_rate": 1.7368570174859855e-05, | |
| "loss": 0.5313879251480103, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 3.082706766917293, | |
| "grad_norm": 0.1569093018770218, | |
| "learning_rate": 1.713502012717686e-05, | |
| "loss": 0.8502162098884583, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.0977443609022557, | |
| "grad_norm": 0.4344463348388672, | |
| "learning_rate": 1.6893620172004526e-05, | |
| "loss": 0.6262786388397217, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 3.112781954887218, | |
| "grad_norm": 0.1802617460489273, | |
| "learning_rate": 1.664470659337968e-05, | |
| "loss": 0.9044028520584106, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 3.1278195488721803, | |
| "grad_norm": 0.09171602874994278, | |
| "learning_rate": 1.638862614224926e-05, | |
| "loss": 0.4721546471118927, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 3.142857142857143, | |
| "grad_norm": 0.21735042333602905, | |
| "learning_rate": 1.6125735553426228e-05, | |
| "loss": 0.7782120108604431, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 0.10799870640039444, | |
| "learning_rate": 1.5856401048637505e-05, | |
| "loss": 0.8657242059707642, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.172932330827068, | |
| "grad_norm": 0.2299298644065857, | |
| "learning_rate": 1.5580997826355945e-05, | |
| "loss": 0.7349554896354675, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 3.18796992481203, | |
| "grad_norm": 0.1193917915225029, | |
| "learning_rate": 1.5299909539127293e-05, | |
| "loss": 0.8061299324035645, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 3.2030075187969924, | |
| "grad_norm": 0.313683420419693, | |
| "learning_rate": 1.5013527759120128e-05, | |
| "loss": 0.7771633267402649, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 3.218045112781955, | |
| "grad_norm": 0.28184592723846436, | |
| "learning_rate": 1.4722251432643222e-05, | |
| "loss": 0.7236590385437012, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 3.2330827067669174, | |
| "grad_norm": 0.2527337968349457, | |
| "learning_rate": 1.4426486324390432e-05, | |
| "loss": 0.5557109713554382, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.2481203007518795, | |
| "grad_norm": 0.14354459941387177, | |
| "learning_rate": 1.4126644452187057e-05, | |
| "loss": 0.5296133160591125, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 3.263157894736842, | |
| "grad_norm": 0.45321938395500183, | |
| "learning_rate": 1.382314351302531e-05, | |
| "loss": 0.5751173496246338, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 3.2781954887218046, | |
| "grad_norm": 0.5287338495254517, | |
| "learning_rate": 1.351640630118838e-05, | |
| "loss": 0.5346217751502991, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 3.293233082706767, | |
| "grad_norm": 0.07787490636110306, | |
| "learning_rate": 1.3206860119273606e-05, | |
| "loss": 0.5648679137229919, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 3.308270676691729, | |
| "grad_norm": 0.13844254612922668, | |
| "learning_rate": 1.2894936182935407e-05, | |
| "loss": 1.0638585090637207, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.3233082706766917, | |
| "grad_norm": 0.19562360644340515, | |
| "learning_rate": 1.2581069020176986e-05, | |
| "loss": 0.452185720205307, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 3.338345864661654, | |
| "grad_norm": 0.3099493980407715, | |
| "learning_rate": 1.2265695866027849e-05, | |
| "loss": 0.7373262047767639, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 3.3533834586466167, | |
| "grad_norm": 0.2395428866147995, | |
| "learning_rate": 1.1949256053450243e-05, | |
| "loss": 0.5868238806724548, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 3.3684210526315788, | |
| "grad_norm": 0.1869363635778427, | |
| "learning_rate": 1.1632190401322987e-05, | |
| "loss": 0.8645040392875671, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 3.3834586466165413, | |
| "grad_norm": 0.37590962648391724, | |
| "learning_rate": 1.1314940600355459e-05, | |
| "loss": 0.9512546062469482, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.398496240601504, | |
| "grad_norm": 0.2025744765996933, | |
| "learning_rate": 1.099794859778695e-05, | |
| "loss": 0.9128929376602173, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 3.4135338345864663, | |
| "grad_norm": 0.17627839744091034, | |
| "learning_rate": 1.0681655981728753e-05, | |
| "loss": 0.9643846750259399, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 1.0564072132110596, | |
| "learning_rate": 1.0366503366006477e-05, | |
| "loss": 0.8578605055809021, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 3.443609022556391, | |
| "grad_norm": 0.3315916061401367, | |
| "learning_rate": 1.0052929776359614e-05, | |
| "loss": 0.5702735781669617, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 3.4586466165413534, | |
| "grad_norm": 0.1655089259147644, | |
| "learning_rate": 9.741372038853437e-06, | |
| "loss": 0.9215094447135925, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.473684210526316, | |
| "grad_norm": 0.26911604404449463, | |
| "learning_rate": 9.432264171355117e-06, | |
| "loss": 0.8250299096107483, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 3.488721804511278, | |
| "grad_norm": 0.22876925766468048, | |
| "learning_rate": 9.126036778921812e-06, | |
| "loss": 0.8027570843696594, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 3.5037593984962405, | |
| "grad_norm": 0.172451913356781, | |
| "learning_rate": 8.8231164539431e-06, | |
| "loss": 0.8936192989349365, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 3.518796992481203, | |
| "grad_norm": 0.2068740874528885, | |
| "learning_rate": 8.52392518187319e-06, | |
| "loss": 0.6892407536506653, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 3.5338345864661656, | |
| "grad_norm": 0.18001121282577515, | |
| "learning_rate": 8.228879753380818e-06, | |
| "loss": 0.7898232340812683, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.548872180451128, | |
| "grad_norm": 0.19327786564826965, | |
| "learning_rate": 7.938391183735972e-06, | |
| "loss": 0.7635357975959778, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 3.56390977443609, | |
| "grad_norm": 0.20077842473983765, | |
| "learning_rate": 7.65286414024186e-06, | |
| "loss": 0.8505274653434753, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 3.5789473684210527, | |
| "grad_norm": 0.19398260116577148, | |
| "learning_rate": 7.3726963785101094e-06, | |
| "loss": 0.38889795541763306, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 3.593984962406015, | |
| "grad_norm": 0.14551198482513428, | |
| "learning_rate": 7.098278188364236e-06, | |
| "loss": 0.365261048078537, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 3.6090225563909772, | |
| "grad_norm": 0.17524459958076477, | |
| "learning_rate": 6.829991850143528e-06, | |
| "loss": 0.9008171558380127, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.6240601503759398, | |
| "grad_norm": 0.3653651475906372, | |
| "learning_rate": 6.5682111021644835e-06, | |
| "loss": 0.900292158126831, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 3.6390977443609023, | |
| "grad_norm": 0.19287094473838806, | |
| "learning_rate": 6.313300620081777e-06, | |
| "loss": 0.7757176160812378, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 3.654135338345865, | |
| "grad_norm": 0.20128026604652405, | |
| "learning_rate": 6.065615508874151e-06, | |
| "loss": 0.3761276602745056, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 3.6691729323308273, | |
| "grad_norm": 0.1511046588420868, | |
| "learning_rate": 5.82550080816269e-06, | |
| "loss": 0.7676253318786621, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 3.6842105263157894, | |
| "grad_norm": 0.13384103775024414, | |
| "learning_rate": 5.593291011550776e-06, | |
| "loss": 0.9202339053153992, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.699248120300752, | |
| "grad_norm": 0.4986729025840759, | |
| "learning_rate": 5.3693096006552025e-06, | |
| "loss": 0.970383882522583, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.7142857142857144, | |
| "grad_norm": 0.5382466316223145, | |
| "learning_rate": 5.15386859447772e-06, | |
| "loss": 0.48905929923057556, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 3.7293233082706765, | |
| "grad_norm": 0.14699164032936096, | |
| "learning_rate": 4.947268114744566e-06, | |
| "loss": 0.9869436025619507, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.744360902255639, | |
| "grad_norm": 0.17605887353420258, | |
| "learning_rate": 4.749795967819596e-06, | |
| "loss": 1.0997637510299683, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 3.7593984962406015, | |
| "grad_norm": 0.24531900882720947, | |
| "learning_rate": 4.561727243773504e-06, | |
| "loss": 0.9353739023208618, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.774436090225564, | |
| "grad_norm": 0.23771578073501587, | |
| "learning_rate": 4.383323933167479e-06, | |
| "loss": 0.7048070430755615, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 3.7894736842105265, | |
| "grad_norm": 0.1848079115152359, | |
| "learning_rate": 4.214834562085255e-06, | |
| "loss": 0.6421667337417603, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 3.8045112781954886, | |
| "grad_norm": 1.46511971950531, | |
| "learning_rate": 4.056493845922015e-06, | |
| "loss": 0.5478267669677734, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 3.819548872180451, | |
| "grad_norm": 0.1415521800518036, | |
| "learning_rate": 3.908522362412276e-06, | |
| "loss": 0.8992588520050049, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 3.8345864661654137, | |
| "grad_norm": 0.6491537690162659, | |
| "learning_rate": 3.7711262443524467e-06, | |
| "loss": 0.591274082660675, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.8496240601503757, | |
| "grad_norm": 0.2289588451385498, | |
| "learning_rate": 3.6444968924459545e-06, | |
| "loss": 0.771586000919342, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 3.8646616541353382, | |
| "grad_norm": 0.1354115605354309, | |
| "learning_rate": 3.5288107086710545e-06, | |
| "loss": 0.7341738939285278, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 3.8796992481203008, | |
| "grad_norm": 0.1334567666053772, | |
| "learning_rate": 3.4242288505426928e-06, | |
| "loss": 1.0383778810501099, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 3.8947368421052633, | |
| "grad_norm": 0.17545144259929657, | |
| "learning_rate": 3.3308970066108413e-06, | |
| "loss": 0.5621640682220459, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 3.909774436090226, | |
| "grad_norm": 0.09879021346569061, | |
| "learning_rate": 3.2489451935079473e-06, | |
| "loss": 0.9413120150566101, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.924812030075188, | |
| "grad_norm": 0.2358972579240799, | |
| "learning_rate": 3.1784875748282608e-06, | |
| "loss": 1.0549259185791016, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 3.9398496240601504, | |
| "grad_norm": 0.1280510276556015, | |
| "learning_rate": 3.119622302091404e-06, | |
| "loss": 1.0559005737304688, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 3.954887218045113, | |
| "grad_norm": 0.24243058264255524, | |
| "learning_rate": 3.072431378011632e-06, | |
| "loss": 0.9401967525482178, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 3.969924812030075, | |
| "grad_norm": 0.1816936582326889, | |
| "learning_rate": 3.0369805422633432e-06, | |
| "loss": 0.6218282580375671, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 3.9849624060150375, | |
| "grad_norm": 0.4231378734111786, | |
| "learning_rate": 3.0133191799019103e-06, | |
| "loss": 0.5278748869895935, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.3513203263282776, | |
| "learning_rate": 3.001480252567475e-06, | |
| "loss": 0.6851533651351929, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 532, | |
| "total_flos": 3.800554784105169e+18, | |
| "train_loss": 0.9838281438538903, | |
| "train_runtime": 15715.7329, | |
| "train_samples_per_second": 4.062, | |
| "train_steps_per_second": 0.034 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 532, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 260, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.800554784105169e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |