Instructions to use StevenHH2000/Fine-R1-7B-Stage1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use StevenHH2000/Fine-R1-7B-Stage1 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="StevenHH2000/Fine-R1-7B-Stage1") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("StevenHH2000/Fine-R1-7B-Stage1") model = AutoModelForImageTextToText.from_pretrained("StevenHH2000/Fine-R1-7B-Stage1") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use StevenHH2000/Fine-R1-7B-Stage1 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "StevenHH2000/Fine-R1-7B-Stage1" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "StevenHH2000/Fine-R1-7B-Stage1", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/StevenHH2000/Fine-R1-7B-Stage1
- SGLang
How to use StevenHH2000/Fine-R1-7B-Stage1 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "StevenHH2000/Fine-R1-7B-Stage1" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "StevenHH2000/Fine-R1-7B-Stage1", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "StevenHH2000/Fine-R1-7B-Stage1" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "StevenHH2000/Fine-R1-7B-Stage1", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use StevenHH2000/Fine-R1-7B-Stage1 with Docker Model Runner:
docker model run hf.co/StevenHH2000/Fine-R1-7B-Stage1
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.235294117647058, | |
| "eval_steps": 500, | |
| "global_step": 120, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0784313725490196, | |
| "grad_norm": 5.935066953271119, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "loss": 1.1559, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.1568627450980392, | |
| "grad_norm": 5.931820940431562, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 1.1107, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 6.143905320771078, | |
| "learning_rate": 1.25e-06, | |
| "loss": 1.1384, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.3137254901960784, | |
| "grad_norm": 6.065274861977263, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.1421, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 5.8625107951536535, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 1.1439, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 5.067374092992189, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.0618, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.5490196078431373, | |
| "grad_norm": 4.755801552471063, | |
| "learning_rate": 2.916666666666667e-06, | |
| "loss": 1.0401, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.6274509803921569, | |
| "grad_norm": 3.3129565953149323, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.9898, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 3.3912326629630547, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 1.011, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 3.2048784731280806, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.9935, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.8627450980392157, | |
| "grad_norm": 3.752806588180809, | |
| "learning_rate": 4.583333333333333e-06, | |
| "loss": 0.9037, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 3.7034973959501185, | |
| "learning_rate": 5e-06, | |
| "loss": 0.9095, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.7034973959501185, | |
| "learning_rate": 4.998942375205502e-06, | |
| "loss": 0.6715, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 1.0784313725490196, | |
| "grad_norm": 3.3436999577667104, | |
| "learning_rate": 4.995770395678171e-06, | |
| "loss": 0.8186, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 1.156862745098039, | |
| "grad_norm": 2.662791286273683, | |
| "learning_rate": 4.990486745229364e-06, | |
| "loss": 0.7876, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.2352941176470589, | |
| "grad_norm": 3.1462660546154915, | |
| "learning_rate": 4.983095894354858e-06, | |
| "loss": 0.7498, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 1.3137254901960784, | |
| "grad_norm": 2.806483473961631, | |
| "learning_rate": 4.973604096452361e-06, | |
| "loss": 0.7377, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 1.392156862745098, | |
| "grad_norm": 2.4168449532239933, | |
| "learning_rate": 4.962019382530521e-06, | |
| "loss": 0.6722, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 2.1659663165693823, | |
| "learning_rate": 4.948351554413879e-06, | |
| "loss": 0.6785, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 1.5490196078431373, | |
| "grad_norm": 1.9715057945884606, | |
| "learning_rate": 4.93261217644956e-06, | |
| "loss": 0.6873, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.6274509803921569, | |
| "grad_norm": 2.03020256390926, | |
| "learning_rate": 4.914814565722671e-06, | |
| "loss": 0.659, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 1.7058823529411766, | |
| "grad_norm": 1.9753159977423889, | |
| "learning_rate": 4.894973780788722e-06, | |
| "loss": 0.6438, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 1.784313725490196, | |
| "grad_norm": 1.7231552446064127, | |
| "learning_rate": 4.873106608932585e-06, | |
| "loss": 0.6467, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 1.8627450980392157, | |
| "grad_norm": 1.7271946867600179, | |
| "learning_rate": 4.849231551964771e-06, | |
| "loss": 0.6004, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "grad_norm": 1.908074354983798, | |
| "learning_rate": 4.823368810567056e-06, | |
| "loss": 0.6608, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.8986968468078707, | |
| "learning_rate": 4.7955402672006855e-06, | |
| "loss": 0.4767, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 2.0784313725490198, | |
| "grad_norm": 1.7821876091389497, | |
| "learning_rate": 4.765769467591626e-06, | |
| "loss": 0.5677, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 2.156862745098039, | |
| "grad_norm": 1.5729961003705275, | |
| "learning_rate": 4.734081600808531e-06, | |
| "loss": 0.5875, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "grad_norm": 1.6161908564695435, | |
| "learning_rate": 4.700503477950278e-06, | |
| "loss": 0.5528, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 2.313725490196078, | |
| "grad_norm": 1.453253235686431, | |
| "learning_rate": 4.665063509461098e-06, | |
| "loss": 0.5471, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.392156862745098, | |
| "grad_norm": 1.5747903710062445, | |
| "learning_rate": 4.627791681092499e-06, | |
| "loss": 0.5058, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 2.4705882352941178, | |
| "grad_norm": 1.4435044859613202, | |
| "learning_rate": 4.588719528532342e-06, | |
| "loss": 0.5226, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 2.549019607843137, | |
| "grad_norm": 1.39605748394361, | |
| "learning_rate": 4.54788011072248e-06, | |
| "loss": 0.5071, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 2.627450980392157, | |
| "grad_norm": 1.5254258052298646, | |
| "learning_rate": 4.50530798188761e-06, | |
| "loss": 0.4998, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 2.7058823529411766, | |
| "grad_norm": 1.4261873477979348, | |
| "learning_rate": 4.46103916229894e-06, | |
| "loss": 0.5009, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 2.784313725490196, | |
| "grad_norm": 1.4137129832097197, | |
| "learning_rate": 4.415111107797445e-06, | |
| "loss": 0.479, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 2.8627450980392157, | |
| "grad_norm": 1.520647591268587, | |
| "learning_rate": 4.367562678102491e-06, | |
| "loss": 0.5122, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 1.437130314805203, | |
| "learning_rate": 4.318434103932622e-06, | |
| "loss": 0.4938, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.381198514753958, | |
| "learning_rate": 4.267766952966369e-06, | |
| "loss": 0.3629, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 3.0784313725490198, | |
| "grad_norm": 1.4414609712821322, | |
| "learning_rate": 4.215604094671835e-06, | |
| "loss": 0.4262, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 3.156862745098039, | |
| "grad_norm": 1.3346547206899726, | |
| "learning_rate": 4.161989664034844e-06, | |
| "loss": 0.4259, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 3.235294117647059, | |
| "grad_norm": 1.2955870784243064, | |
| "learning_rate": 4.106969024216348e-06, | |
| "loss": 0.4009, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 3.313725490196078, | |
| "grad_norm": 1.381727916852796, | |
| "learning_rate": 4.0505887281706505e-06, | |
| "loss": 0.4082, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 3.392156862745098, | |
| "grad_norm": 1.2215807897629705, | |
| "learning_rate": 3.992896479256966e-06, | |
| "loss": 0.4012, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 3.4705882352941178, | |
| "grad_norm": 1.341754935028127, | |
| "learning_rate": 3.933941090877615e-06, | |
| "loss": 0.4003, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 3.549019607843137, | |
| "grad_norm": 1.3219873249674265, | |
| "learning_rate": 3.8737724451770155e-06, | |
| "loss": 0.3906, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 3.627450980392157, | |
| "grad_norm": 1.2735261344863207, | |
| "learning_rate": 3.8124414508364005e-06, | |
| "loss": 0.3956, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 3.7058823529411766, | |
| "grad_norm": 1.408667555717726, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.3841, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 3.784313725490196, | |
| "grad_norm": 1.3920186495457132, | |
| "learning_rate": 3.6865009243691015e-06, | |
| "loss": 0.3998, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 3.8627450980392157, | |
| "grad_norm": 1.3668192562682828, | |
| "learning_rate": 3.621997950501156e-06, | |
| "loss": 0.3947, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.9411764705882355, | |
| "grad_norm": 1.3220345352979812, | |
| "learning_rate": 3.556545654351749e-06, | |
| "loss": 0.3898, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.273119605790472, | |
| "learning_rate": 3.4901994150978926e-06, | |
| "loss": 0.2635, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 4.078431372549019, | |
| "grad_norm": 1.5507480548662818, | |
| "learning_rate": 3.4230153682817112e-06, | |
| "loss": 0.3232, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 4.1568627450980395, | |
| "grad_norm": 1.2464576840916806, | |
| "learning_rate": 3.3550503583141726e-06, | |
| "loss": 0.2997, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 4.235294117647059, | |
| "grad_norm": 1.2140787104099877, | |
| "learning_rate": 3.2863618903790346e-06, | |
| "loss": 0.3061, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 4.313725490196078, | |
| "grad_norm": 1.2569518947420275, | |
| "learning_rate": 3.217008081777726e-06, | |
| "loss": 0.2995, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 4.392156862745098, | |
| "grad_norm": 1.2248006221848717, | |
| "learning_rate": 3.147047612756302e-06, | |
| "loss": 0.2973, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 4.470588235294118, | |
| "grad_norm": 1.2928146585389553, | |
| "learning_rate": 3.0765396768561005e-06, | |
| "loss": 0.2987, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 4.549019607843137, | |
| "grad_norm": 1.3574488686929125, | |
| "learning_rate": 3.0055439308300954e-06, | |
| "loss": 0.3132, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 4.627450980392156, | |
| "grad_norm": 1.2894955154605787, | |
| "learning_rate": 2.9341204441673267e-06, | |
| "loss": 0.2929, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.705882352941177, | |
| "grad_norm": 1.2326192638645608, | |
| "learning_rate": 2.862329648268117e-06, | |
| "loss": 0.2794, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 4.784313725490196, | |
| "grad_norm": 1.3599625008219607, | |
| "learning_rate": 2.7902322853130758e-06, | |
| "loss": 0.2911, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 4.862745098039216, | |
| "grad_norm": 1.2507485894142631, | |
| "learning_rate": 2.717889356869146e-06, | |
| "loss": 0.2945, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 4.9411764705882355, | |
| "grad_norm": 1.3120759092640928, | |
| "learning_rate": 2.6453620722761897e-06, | |
| "loss": 0.3093, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.3120759092640928, | |
| "learning_rate": 2.572711796857779e-06, | |
| "loss": 0.2038, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 5.078431372549019, | |
| "grad_norm": 1.2133928459250458, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.2457, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 5.1568627450980395, | |
| "grad_norm": 1.1164096271756436, | |
| "learning_rate": 2.4272882031422216e-06, | |
| "loss": 0.2378, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 5.235294117647059, | |
| "grad_norm": 1.1865964714246466, | |
| "learning_rate": 2.3546379277238107e-06, | |
| "loss": 0.2365, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 5.313725490196078, | |
| "grad_norm": 1.1133005502471431, | |
| "learning_rate": 2.2821106431308546e-06, | |
| "loss": 0.2273, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 5.392156862745098, | |
| "grad_norm": 1.0726780522048756, | |
| "learning_rate": 2.2097677146869242e-06, | |
| "loss": 0.2251, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 5.470588235294118, | |
| "grad_norm": 1.1507923857528541, | |
| "learning_rate": 2.1376703517318835e-06, | |
| "loss": 0.2199, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 5.549019607843137, | |
| "grad_norm": 1.1965895392459576, | |
| "learning_rate": 2.0658795558326745e-06, | |
| "loss": 0.2275, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 5.627450980392156, | |
| "grad_norm": 1.2081935303041835, | |
| "learning_rate": 1.994456069169906e-06, | |
| "loss": 0.2219, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 5.705882352941177, | |
| "grad_norm": 1.2037511840449118, | |
| "learning_rate": 1.9234603231439e-06, | |
| "loss": 0.2204, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 5.784313725490196, | |
| "grad_norm": 1.1530325179032028, | |
| "learning_rate": 1.852952387243698e-06, | |
| "loss": 0.22, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 5.862745098039216, | |
| "grad_norm": 1.1780261182347391, | |
| "learning_rate": 1.7829919182222752e-06, | |
| "loss": 0.2199, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 5.9411764705882355, | |
| "grad_norm": 1.1895169708488194, | |
| "learning_rate": 1.7136381096209665e-06, | |
| "loss": 0.2037, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 1.0854093120483197, | |
| "learning_rate": 1.6449496416858285e-06, | |
| "loss": 0.1583, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 6.078431372549019, | |
| "grad_norm": 1.0801354684612592, | |
| "learning_rate": 1.5769846317182894e-06, | |
| "loss": 0.1934, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 6.1568627450980395, | |
| "grad_norm": 1.0439246440370717, | |
| "learning_rate": 1.509800584902108e-06, | |
| "loss": 0.1705, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 6.235294117647059, | |
| "grad_norm": 1.0284457474816655, | |
| "learning_rate": 1.443454345648252e-06, | |
| "loss": 0.1723, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 6.313725490196078, | |
| "grad_norm": 0.9926653408766625, | |
| "learning_rate": 1.3780020494988447e-06, | |
| "loss": 0.1792, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 6.392156862745098, | |
| "grad_norm": 1.0138502371712474, | |
| "learning_rate": 1.313499075630899e-06, | |
| "loss": 0.1679, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 6.470588235294118, | |
| "grad_norm": 1.0083965688320564, | |
| "learning_rate": 1.2500000000000007e-06, | |
| "loss": 0.169, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 6.549019607843137, | |
| "grad_norm": 1.0514389443911982, | |
| "learning_rate": 1.1875585491636e-06, | |
| "loss": 0.1807, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 6.627450980392156, | |
| "grad_norm": 1.0545495890932062, | |
| "learning_rate": 1.1262275548229852e-06, | |
| "loss": 0.1735, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 6.705882352941177, | |
| "grad_norm": 1.1020211072090924, | |
| "learning_rate": 1.0660589091223854e-06, | |
| "loss": 0.1728, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 6.784313725490196, | |
| "grad_norm": 1.0983738677411488, | |
| "learning_rate": 1.0071035207430352e-06, | |
| "loss": 0.171, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 6.862745098039216, | |
| "grad_norm": 1.0848006154577043, | |
| "learning_rate": 9.494112718293503e-07, | |
| "loss": 0.1761, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 6.9411764705882355, | |
| "grad_norm": 1.0504620709430657, | |
| "learning_rate": 8.930309757836517e-07, | |
| "loss": 0.1689, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 1.0335128775784548, | |
| "learning_rate": 8.380103359651554e-07, | |
| "loss": 0.1193, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 7.078431372549019, | |
| "grad_norm": 0.9668349947212325, | |
| "learning_rate": 7.843959053281663e-07, | |
| "loss": 0.1531, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 7.1568627450980395, | |
| "grad_norm": 0.9049193018844318, | |
| "learning_rate": 7.322330470336314e-07, | |
| "loss": 0.1449, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 7.235294117647059, | |
| "grad_norm": 0.8855250299857752, | |
| "learning_rate": 6.815658960673782e-07, | |
| "loss": 0.1398, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 7.313725490196078, | |
| "grad_norm": 0.9608687848192914, | |
| "learning_rate": 6.324373218975105e-07, | |
| "loss": 0.1487, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 7.392156862745098, | |
| "grad_norm": 0.9692880229899594, | |
| "learning_rate": 5.848888922025553e-07, | |
| "loss": 0.1403, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 7.470588235294118, | |
| "grad_norm": 0.9035708627250062, | |
| "learning_rate": 5.389608377010608e-07, | |
| "loss": 0.1449, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 7.549019607843137, | |
| "grad_norm": 0.8748928881891229, | |
| "learning_rate": 4.946920181123904e-07, | |
| "loss": 0.1451, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 7.627450980392156, | |
| "grad_norm": 0.8912371091414796, | |
| "learning_rate": 4.5211988927752026e-07, | |
| "loss": 0.134, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 7.705882352941177, | |
| "grad_norm": 0.9454342145405766, | |
| "learning_rate": 4.1128047146765936e-07, | |
| "loss": 0.1497, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 7.784313725490196, | |
| "grad_norm": 0.9466608405460601, | |
| "learning_rate": 3.722083189075007e-07, | |
| "loss": 0.1523, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 7.862745098039216, | |
| "grad_norm": 0.9716836299571869, | |
| "learning_rate": 3.3493649053890325e-07, | |
| "loss": 0.1406, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 7.9411764705882355, | |
| "grad_norm": 0.9517618408879835, | |
| "learning_rate": 2.9949652204972257e-07, | |
| "loss": 0.1414, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.8961669761492526, | |
| "learning_rate": 2.6591839919146963e-07, | |
| "loss": 0.0988, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 8.07843137254902, | |
| "grad_norm": 0.8800809597012162, | |
| "learning_rate": 2.3423053240837518e-07, | |
| "loss": 0.1326, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 8.156862745098039, | |
| "grad_norm": 0.8678268581817502, | |
| "learning_rate": 2.044597327993153e-07, | |
| "loss": 0.1382, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 8.235294117647058, | |
| "grad_norm": 0.8749341283858125, | |
| "learning_rate": 1.7663118943294367e-07, | |
| "loss": 0.1394, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 8.313725490196079, | |
| "grad_norm": 0.8593112391578863, | |
| "learning_rate": 1.507684480352292e-07, | |
| "loss": 0.1278, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 8.392156862745098, | |
| "grad_norm": 0.9129847126820709, | |
| "learning_rate": 1.2689339106741529e-07, | |
| "loss": 0.1391, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 8.470588235294118, | |
| "grad_norm": 0.8656126791419273, | |
| "learning_rate": 1.0502621921127776e-07, | |
| "loss": 0.1311, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 8.549019607843137, | |
| "grad_norm": 0.8593751089585534, | |
| "learning_rate": 8.518543427732951e-08, | |
| "loss": 0.1296, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 8.627450980392156, | |
| "grad_norm": 0.8879560191886899, | |
| "learning_rate": 6.738782355044048e-08, | |
| "loss": 0.1263, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 8.705882352941176, | |
| "grad_norm": 0.9206743688905853, | |
| "learning_rate": 5.164844558612131e-08, | |
| "loss": 0.1381, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 8.784313725490197, | |
| "grad_norm": 0.8795400657114122, | |
| "learning_rate": 3.798061746947995e-08, | |
| "loss": 0.1282, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 8.862745098039216, | |
| "grad_norm": 0.8568365367984947, | |
| "learning_rate": 2.6395903547638825e-08, | |
| "loss": 0.1331, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 8.941176470588236, | |
| "grad_norm": 0.847262314471311, | |
| "learning_rate": 1.6904105645142443e-08, | |
| "loss": 0.1264, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.847262314471311, | |
| "learning_rate": 9.513254770636138e-09, | |
| "loss": 0.0991, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 9.07843137254902, | |
| "grad_norm": 0.8727190548675097, | |
| "learning_rate": 4.229604321829561e-09, | |
| "loss": 0.1293, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 9.156862745098039, | |
| "grad_norm": 0.816661788373, | |
| "learning_rate": 1.0576247944985018e-09, | |
| "loss": 0.1269, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 9.235294117647058, | |
| "grad_norm": 0.885147564294731, | |
| "learning_rate": 0.0, | |
| "loss": 0.1341, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 9.235294117647058, | |
| "step": 120, | |
| "total_flos": 27074593947648.0, | |
| "train_loss": 0.388430199213326, | |
| "train_runtime": 2487.6221, | |
| "train_samples_per_second": 1.624, | |
| "train_steps_per_second": 0.048 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 120, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 27074593947648.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |