Text Generation
Transformers
Safetensors
qwen2
Generated from Trainer
open-r1
trl
sft
conversational
text-generation-inference
Instructions to use lucass01/Qwen2.5-1.5B-Open-R1-Distill with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use lucass01/Qwen2.5-1.5B-Open-R1-Distill with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="lucass01/Qwen2.5-1.5B-Open-R1-Distill") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("lucass01/Qwen2.5-1.5B-Open-R1-Distill") model = AutoModelForCausalLM.from_pretrained("lucass01/Qwen2.5-1.5B-Open-R1-Distill") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use lucass01/Qwen2.5-1.5B-Open-R1-Distill with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "lucass01/Qwen2.5-1.5B-Open-R1-Distill" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "lucass01/Qwen2.5-1.5B-Open-R1-Distill", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/lucass01/Qwen2.5-1.5B-Open-R1-Distill
- SGLang
How to use lucass01/Qwen2.5-1.5B-Open-R1-Distill with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "lucass01/Qwen2.5-1.5B-Open-R1-Distill" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "lucass01/Qwen2.5-1.5B-Open-R1-Distill", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "lucass01/Qwen2.5-1.5B-Open-R1-Distill" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "lucass01/Qwen2.5-1.5B-Open-R1-Distill", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use lucass01/Qwen2.5-1.5B-Open-R1-Distill with Docker Model Runner:
docker model run hf.co/lucass01/Qwen2.5-1.5B-Open-R1-Distill
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 537, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00931098696461825, | |
| "grad_norm": 1.4210419106421883, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.8537, | |
| "num_tokens": 5242880.0, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0186219739292365, | |
| "grad_norm": 1.1151402682392344, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.7985, | |
| "num_tokens": 10465392.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027932960893854747, | |
| "grad_norm": 0.724348224866566, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.7413, | |
| "num_tokens": 15708272.0, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.037243947858473, | |
| "grad_norm": 0.4345624527110088, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.6905, | |
| "num_tokens": 20951152.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04655493482309125, | |
| "grad_norm": 0.3965562660873969, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.6738, | |
| "num_tokens": 26194032.0, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.055865921787709494, | |
| "grad_norm": 0.4004389301791487, | |
| "learning_rate": 4.999615813530619e-05, | |
| "loss": 0.6548, | |
| "num_tokens": 31436912.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06517690875232775, | |
| "grad_norm": 0.3617839186758097, | |
| "learning_rate": 4.997268482437153e-05, | |
| "loss": 0.6338, | |
| "num_tokens": 36666389.0, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.074487895716946, | |
| "grad_norm": 0.2888387943186131, | |
| "learning_rate": 4.992789481063698e-05, | |
| "loss": 0.6274, | |
| "num_tokens": 41909269.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08379888268156424, | |
| "grad_norm": 0.36356002561992884, | |
| "learning_rate": 4.9861830580143667e-05, | |
| "loss": 0.629, | |
| "num_tokens": 47152149.0, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0931098696461825, | |
| "grad_norm": 0.30736489842782255, | |
| "learning_rate": 4.977455479881591e-05, | |
| "loss": 0.6056, | |
| "num_tokens": 52353719.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10242085661080075, | |
| "grad_norm": 0.31222388716247457, | |
| "learning_rate": 4.96661502530189e-05, | |
| "loss": 0.6095, | |
| "num_tokens": 57574357.0, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11173184357541899, | |
| "grad_norm": 0.370734817299563, | |
| "learning_rate": 4.9536719771030736e-05, | |
| "loss": 0.6049, | |
| "num_tokens": 62792124.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12104283054003724, | |
| "grad_norm": 0.3645138936379128, | |
| "learning_rate": 4.938638612550361e-05, | |
| "loss": 0.6041, | |
| "num_tokens": 68027252.0, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1303538175046555, | |
| "grad_norm": 0.40787822229425746, | |
| "learning_rate": 4.92152919170065e-05, | |
| "loss": 0.5894, | |
| "num_tokens": 73260442.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13966480446927373, | |
| "grad_norm": 0.34078044675584757, | |
| "learning_rate": 4.902359943875992e-05, | |
| "loss": 0.5902, | |
| "num_tokens": 78503322.0, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.148975791433892, | |
| "grad_norm": 0.32141604974643456, | |
| "learning_rate": 4.8811490522690904e-05, | |
| "loss": 0.5774, | |
| "num_tokens": 83727556.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15828677839851024, | |
| "grad_norm": 0.3795067576990938, | |
| "learning_rate": 4.8579166366954484e-05, | |
| "loss": 0.5884, | |
| "num_tokens": 88959058.0, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.16759776536312848, | |
| "grad_norm": 0.2900776657085515, | |
| "learning_rate": 4.832684734508502e-05, | |
| "loss": 0.5809, | |
| "num_tokens": 94201938.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17690875232774675, | |
| "grad_norm": 0.3083068965103155, | |
| "learning_rate": 4.805477279695852e-05, | |
| "loss": 0.5796, | |
| "num_tokens": 99432200.0, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.186219739292365, | |
| "grad_norm": 0.46425935931691625, | |
| "learning_rate": 4.776320080176434e-05, | |
| "loss": 0.5791, | |
| "num_tokens": 104670740.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19553072625698323, | |
| "grad_norm": 0.4650776354391665, | |
| "learning_rate": 4.745240793320139e-05, | |
| "loss": 0.5826, | |
| "num_tokens": 109909053.0, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2048417132216015, | |
| "grad_norm": 0.4353890147241201, | |
| "learning_rate": 4.712268899713125e-05, | |
| "loss": 0.5848, | |
| "num_tokens": 115151933.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21415270018621974, | |
| "grad_norm": 0.41821526300311507, | |
| "learning_rate": 4.6774356751936914e-05, | |
| "loss": 0.5805, | |
| "num_tokens": 120376968.0, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.22346368715083798, | |
| "grad_norm": 0.351280496193892, | |
| "learning_rate": 4.640774161185259e-05, | |
| "loss": 0.5698, | |
| "num_tokens": 125607410.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23277467411545624, | |
| "grad_norm": 0.3822455446193692, | |
| "learning_rate": 4.602319133354571e-05, | |
| "loss": 0.5725, | |
| "num_tokens": 130831913.0, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.24208566108007448, | |
| "grad_norm": 0.2841526643703192, | |
| "learning_rate": 4.5621070686248734e-05, | |
| "loss": 0.5738, | |
| "num_tokens": 136036466.0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.25139664804469275, | |
| "grad_norm": 0.28890876631258056, | |
| "learning_rate": 4.5201761105753376e-05, | |
| "loss": 0.5718, | |
| "num_tokens": 141273912.0, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.260707635009311, | |
| "grad_norm": 0.32609770896218293, | |
| "learning_rate": 4.476566033259568e-05, | |
| "loss": 0.5689, | |
| "num_tokens": 146502251.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.27001862197392923, | |
| "grad_norm": 0.3177762846048211, | |
| "learning_rate": 4.4313182034775045e-05, | |
| "loss": 0.5669, | |
| "num_tokens": 151734911.0, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.27932960893854747, | |
| "grad_norm": 0.3963580304350529, | |
| "learning_rate": 4.384475541536505e-05, | |
| "loss": 0.57, | |
| "num_tokens": 156977308.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2886405959031657, | |
| "grad_norm": 0.2753787458495359, | |
| "learning_rate": 4.3360824805388314e-05, | |
| "loss": 0.5583, | |
| "num_tokens": 162220188.0, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.297951582867784, | |
| "grad_norm": 0.3841551355595531, | |
| "learning_rate": 4.2861849242341673e-05, | |
| "loss": 0.5671, | |
| "num_tokens": 167423794.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.30726256983240224, | |
| "grad_norm": 0.4177758092090377, | |
| "learning_rate": 4.2348302034771266e-05, | |
| "loss": 0.5537, | |
| "num_tokens": 172657586.0, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3165735567970205, | |
| "grad_norm": 0.27263863165709906, | |
| "learning_rate": 4.1820670313310686e-05, | |
| "loss": 0.5564, | |
| "num_tokens": 177900466.0, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3258845437616387, | |
| "grad_norm": 0.29258598302902616, | |
| "learning_rate": 4.1279454568608126e-05, | |
| "loss": 0.5609, | |
| "num_tokens": 183130933.0, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.33519553072625696, | |
| "grad_norm": 0.31407264656237033, | |
| "learning_rate": 4.072516817658065e-05, | |
| "loss": 0.5564, | |
| "num_tokens": 188373813.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.34450651769087526, | |
| "grad_norm": 0.3120574314584388, | |
| "learning_rate": 4.0158336911446025e-05, | |
| "loss": 0.5628, | |
| "num_tokens": 193616502.0, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.3538175046554935, | |
| "grad_norm": 0.30744269661986834, | |
| "learning_rate": 3.9579498446994056e-05, | |
| "loss": 0.5518, | |
| "num_tokens": 198859382.0, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.36312849162011174, | |
| "grad_norm": 0.2761741605282443, | |
| "learning_rate": 3.8989201846570405e-05, | |
| "loss": 0.551, | |
| "num_tokens": 204102262.0, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.37243947858473, | |
| "grad_norm": 0.3074672631236873, | |
| "learning_rate": 3.8388007042256785e-05, | |
| "loss": 0.5573, | |
| "num_tokens": 209345142.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3817504655493482, | |
| "grad_norm": 0.27800878810831803, | |
| "learning_rate": 3.777648430374142e-05, | |
| "loss": 0.5538, | |
| "num_tokens": 214586037.0, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.39106145251396646, | |
| "grad_norm": 0.293044883237373, | |
| "learning_rate": 3.71552136973837e-05, | |
| "loss": 0.5527, | |
| "num_tokens": 219820636.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.40037243947858475, | |
| "grad_norm": 0.30421370875030884, | |
| "learning_rate": 3.6524784535986175e-05, | |
| "loss": 0.5429, | |
| "num_tokens": 225063516.0, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.409683426443203, | |
| "grad_norm": 0.34073962406570196, | |
| "learning_rate": 3.5885794819795564e-05, | |
| "loss": 0.5481, | |
| "num_tokens": 230306396.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.41899441340782123, | |
| "grad_norm": 0.2565970903314218, | |
| "learning_rate": 3.523885066926339e-05, | |
| "loss": 0.547, | |
| "num_tokens": 235549276.0, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.42830540037243947, | |
| "grad_norm": 0.2629689057281218, | |
| "learning_rate": 3.458456575010393e-05, | |
| "loss": 0.5508, | |
| "num_tokens": 240792156.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4376163873370577, | |
| "grad_norm": 0.29007658847360174, | |
| "learning_rate": 3.39235606911952e-05, | |
| "loss": 0.5452, | |
| "num_tokens": 246029886.0, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.44692737430167595, | |
| "grad_norm": 0.2535281408801575, | |
| "learning_rate": 3.3256462495874804e-05, | |
| "loss": 0.549, | |
| "num_tokens": 251259365.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.45623836126629425, | |
| "grad_norm": 0.24781181484646034, | |
| "learning_rate": 3.258390394718933e-05, | |
| "loss": 0.5553, | |
| "num_tokens": 256502245.0, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4655493482309125, | |
| "grad_norm": 0.2630095693381117, | |
| "learning_rate": 3.190652300766126e-05, | |
| "loss": 0.5457, | |
| "num_tokens": 261736444.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4748603351955307, | |
| "grad_norm": 0.28592366591836077, | |
| "learning_rate": 3.122496221414293e-05, | |
| "loss": 0.5532, | |
| "num_tokens": 266962986.0, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.48417132216014896, | |
| "grad_norm": 0.2929373861403859, | |
| "learning_rate": 3.053986806833135e-05, | |
| "loss": 0.5456, | |
| "num_tokens": 272205866.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4934823091247672, | |
| "grad_norm": 0.26915646995198533, | |
| "learning_rate": 2.9851890423522204e-05, | |
| "loss": 0.5481, | |
| "num_tokens": 277446221.0, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5027932960893855, | |
| "grad_norm": 0.24698676025864852, | |
| "learning_rate": 2.916168186818467e-05, | |
| "loss": 0.5413, | |
| "num_tokens": 282683327.0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5121042830540037, | |
| "grad_norm": 0.27103300869681185, | |
| "learning_rate": 2.8469897106941655e-05, | |
| "loss": 0.5487, | |
| "num_tokens": 287912879.0, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.521415270018622, | |
| "grad_norm": 0.2405128738466024, | |
| "learning_rate": 2.7777192339542868e-05, | |
| "loss": 0.5456, | |
| "num_tokens": 293122806.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5307262569832403, | |
| "grad_norm": 0.2626795183171401, | |
| "learning_rate": 2.708422463841958e-05, | |
| "loss": 0.5492, | |
| "num_tokens": 298356067.0, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5400372439478585, | |
| "grad_norm": 0.23208432753851752, | |
| "learning_rate": 2.639165132541162e-05, | |
| "loss": 0.5437, | |
| "num_tokens": 303598947.0, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5493482309124768, | |
| "grad_norm": 0.2668195397686558, | |
| "learning_rate": 2.5700129348257817e-05, | |
| "loss": 0.5496, | |
| "num_tokens": 308834761.0, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5586592178770949, | |
| "grad_norm": 0.24155887859827735, | |
| "learning_rate": 2.5010314657441236e-05, | |
| "loss": 0.5375, | |
| "num_tokens": 314077641.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5679702048417132, | |
| "grad_norm": 0.22599358610446976, | |
| "learning_rate": 2.432286158398045e-05, | |
| "loss": 0.536, | |
| "num_tokens": 319309553.0, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5772811918063314, | |
| "grad_norm": 0.22670797204264545, | |
| "learning_rate": 2.3638422218756905e-05, | |
| "loss": 0.533, | |
| "num_tokens": 324545386.0, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5865921787709497, | |
| "grad_norm": 0.23594501607950474, | |
| "learning_rate": 2.2957645793967277e-05, | |
| "loss": 0.5464, | |
| "num_tokens": 329788266.0, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.595903165735568, | |
| "grad_norm": 0.22941469331605024, | |
| "learning_rate": 2.22811780672873e-05, | |
| "loss": 0.546, | |
| "num_tokens": 335031146.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6052141527001862, | |
| "grad_norm": 0.228340284564919, | |
| "learning_rate": 2.1609660709331592e-05, | |
| "loss": 0.5273, | |
| "num_tokens": 340270891.0, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6145251396648045, | |
| "grad_norm": 0.23445647783778023, | |
| "learning_rate": 2.09437306949902e-05, | |
| "loss": 0.526, | |
| "num_tokens": 345504904.0, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6238361266294227, | |
| "grad_norm": 0.22696299012921342, | |
| "learning_rate": 2.0284019699219263e-05, | |
| "loss": 0.5321, | |
| "num_tokens": 350734169.0, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.633147113594041, | |
| "grad_norm": 0.24373017348724665, | |
| "learning_rate": 1.9631153497859123e-05, | |
| "loss": 0.54, | |
| "num_tokens": 355956520.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6424581005586593, | |
| "grad_norm": 0.2084629388822976, | |
| "learning_rate": 1.898575137404802e-05, | |
| "loss": 0.5398, | |
| "num_tokens": 361191468.0, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6517690875232774, | |
| "grad_norm": 0.2299031182392996, | |
| "learning_rate": 1.83484255307945e-05, | |
| "loss": 0.5335, | |
| "num_tokens": 366434348.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6610800744878957, | |
| "grad_norm": 0.24042389063025746, | |
| "learning_rate": 1.7719780510265795e-05, | |
| "loss": 0.5421, | |
| "num_tokens": 371677228.0, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6703910614525139, | |
| "grad_norm": 0.21546418811112011, | |
| "learning_rate": 1.710041262034296e-05, | |
| "loss": 0.5368, | |
| "num_tokens": 376908695.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6797020484171322, | |
| "grad_norm": 0.21260804048598256, | |
| "learning_rate": 1.6490909368986725e-05, | |
| "loss": 0.5472, | |
| "num_tokens": 382146337.0, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6890130353817505, | |
| "grad_norm": 0.20185202208733555, | |
| "learning_rate": 1.58918489069506e-05, | |
| "loss": 0.5364, | |
| "num_tokens": 387384301.0, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6983240223463687, | |
| "grad_norm": 0.21414628866408306, | |
| "learning_rate": 1.5303799479369892e-05, | |
| "loss": 0.5276, | |
| "num_tokens": 392611427.0, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.707635009310987, | |
| "grad_norm": 0.18860489091523994, | |
| "learning_rate": 1.4727318886746724e-05, | |
| "loss": 0.5375, | |
| "num_tokens": 397835323.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7169459962756052, | |
| "grad_norm": 0.18734884514152186, | |
| "learning_rate": 1.4162953955842518e-05, | |
| "loss": 0.5301, | |
| "num_tokens": 403065282.0, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7262569832402235, | |
| "grad_norm": 0.20401265519925388, | |
| "learning_rate": 1.3611240020979663e-05, | |
| "loss": 0.5325, | |
| "num_tokens": 408298322.0, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7355679702048417, | |
| "grad_norm": 0.20070387824250907, | |
| "learning_rate": 1.3072700416244494e-05, | |
| "loss": 0.5412, | |
| "num_tokens": 413538686.0, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.74487895716946, | |
| "grad_norm": 0.19819656310845635, | |
| "learning_rate": 1.2547845979073204e-05, | |
| "loss": 0.5331, | |
| "num_tokens": 418781566.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7541899441340782, | |
| "grad_norm": 0.1998428611029513, | |
| "learning_rate": 1.203717456569159e-05, | |
| "loss": 0.5252, | |
| "num_tokens": 424003342.0, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7635009310986964, | |
| "grad_norm": 0.2169445240309026, | |
| "learning_rate": 1.15411705788683e-05, | |
| "loss": 0.5405, | |
| "num_tokens": 429246222.0, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7728119180633147, | |
| "grad_norm": 0.1943607434521693, | |
| "learning_rate": 1.1060304508429408e-05, | |
| "loss": 0.5283, | |
| "num_tokens": 434474764.0, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7821229050279329, | |
| "grad_norm": 0.18585453813571412, | |
| "learning_rate": 1.059503248497035e-05, | |
| "loss": 0.5255, | |
| "num_tokens": 439717644.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7914338919925512, | |
| "grad_norm": 0.2108300605323748, | |
| "learning_rate": 1.0145795847188434e-05, | |
| "loss": 0.5339, | |
| "num_tokens": 444944189.0, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8007448789571695, | |
| "grad_norm": 0.2264407666449324, | |
| "learning_rate": 9.713020723246324e-06, | |
| "loss": 0.5381, | |
| "num_tokens": 450170885.0, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8100558659217877, | |
| "grad_norm": 0.20184925385370617, | |
| "learning_rate": 9.297117626563687e-06, | |
| "loss": 0.5324, | |
| "num_tokens": 455413765.0, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.819366852886406, | |
| "grad_norm": 0.1878680680736472, | |
| "learning_rate": 8.898481066420325e-06, | |
| "loss": 0.5315, | |
| "num_tokens": 460656645.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8286778398510242, | |
| "grad_norm": 0.20949178411153432, | |
| "learning_rate": 8.517489173740262e-06, | |
| "loss": 0.5305, | |
| "num_tokens": 465899525.0, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8379888268156425, | |
| "grad_norm": 0.185038797709674, | |
| "learning_rate": 8.15450334241162e-06, | |
| "loss": 0.5178, | |
| "num_tokens": 471142405.0, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8472998137802608, | |
| "grad_norm": 0.17715442699837788, | |
| "learning_rate": 7.809867886482666e-06, | |
| "loss": 0.5295, | |
| "num_tokens": 476385285.0, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8566108007448789, | |
| "grad_norm": 0.19150077602633506, | |
| "learning_rate": 7.483909713559035e-06, | |
| "loss": 0.5271, | |
| "num_tokens": 481616409.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8659217877094972, | |
| "grad_norm": 0.1893179692797503, | |
| "learning_rate": 7.176938014712102e-06, | |
| "loss": 0.5264, | |
| "num_tokens": 486854794.0, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.8752327746741154, | |
| "grad_norm": 0.1887918388173283, | |
| "learning_rate": 6.889243971192496e-06, | |
| "loss": 0.5287, | |
| "num_tokens": 492097674.0, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8845437616387337, | |
| "grad_norm": 0.18546942835231378, | |
| "learning_rate": 6.6211004782270746e-06, | |
| "loss": 0.5312, | |
| "num_tokens": 497312162.0, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8938547486033519, | |
| "grad_norm": 0.1935479791248119, | |
| "learning_rate": 6.372761886161231e-06, | |
| "loss": 0.5192, | |
| "num_tokens": 502555042.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9031657355679702, | |
| "grad_norm": 0.19820350791698074, | |
| "learning_rate": 6.1444637591922245e-06, | |
| "loss": 0.5253, | |
| "num_tokens": 507789906.0, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.9124767225325885, | |
| "grad_norm": 0.18774875191699997, | |
| "learning_rate": 5.936422651922251e-06, | |
| "loss": 0.5213, | |
| "num_tokens": 513031521.0, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9217877094972067, | |
| "grad_norm": 0.18584859938164744, | |
| "learning_rate": 5.748835903943284e-06, | |
| "loss": 0.531, | |
| "num_tokens": 518274401.0, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.931098696461825, | |
| "grad_norm": 0.18302556041413434, | |
| "learning_rate": 5.581881452648523e-06, | |
| "loss": 0.5317, | |
| "num_tokens": 523517281.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9404096834264432, | |
| "grad_norm": 0.19819671802504518, | |
| "learning_rate": 5.435717664448002e-06, | |
| "loss": 0.5313, | |
| "num_tokens": 528760161.0, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9497206703910615, | |
| "grad_norm": 0.17835670014277702, | |
| "learning_rate": 5.310483184548443e-06, | |
| "loss": 0.5243, | |
| "num_tokens": 533998146.0, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9590316573556797, | |
| "grad_norm": 0.1765246076781633, | |
| "learning_rate": 5.206296805439894e-06, | |
| "loss": 0.5243, | |
| "num_tokens": 539227681.0, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.9683426443202979, | |
| "grad_norm": 0.18365273483430458, | |
| "learning_rate": 5.123257354213851e-06, | |
| "loss": 0.5234, | |
| "num_tokens": 544470561.0, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9776536312849162, | |
| "grad_norm": 0.18283453031782343, | |
| "learning_rate": 5.06144359881977e-06, | |
| "loss": 0.521, | |
| "num_tokens": 549691697.0, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.9869646182495344, | |
| "grad_norm": 0.18396988674845904, | |
| "learning_rate": 5.020914173348905e-06, | |
| "loss": 0.5191, | |
| "num_tokens": 554934577.0, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9962756052141527, | |
| "grad_norm": 0.18584208529935797, | |
| "learning_rate": 5.0017075224163115e-06, | |
| "loss": 0.5207, | |
| "num_tokens": 560177457.0, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 537, | |
| "total_flos": 489532856205312.0, | |
| "train_loss": 0.0, | |
| "train_runtime": 1.4445, | |
| "train_samples_per_second": 23750.234, | |
| "train_steps_per_second": 371.746 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 537, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 489532856205312.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |