Instructions to use modrill/qwen3_4b_base_rstar_longcot_16k_lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use modrill/qwen3_4b_base_rstar_longcot_16k_lora with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="modrill/qwen3_4b_base_rstar_longcot_16k_lora")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("modrill/qwen3_4b_base_rstar_longcot_16k_lora", dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use modrill/qwen3_4b_base_rstar_longcot_16k_lora with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "modrill/qwen3_4b_base_rstar_longcot_16k_lora"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "modrill/qwen3_4b_base_rstar_longcot_16k_lora",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/modrill/qwen3_4b_base_rstar_longcot_16k_lora

SGLang

How to use modrill/qwen3_4b_base_rstar_longcot_16k_lora with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "modrill/qwen3_4b_base_rstar_longcot_16k_lora" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "modrill/qwen3_4b_base_rstar_longcot_16k_lora",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "modrill/qwen3_4b_base_rstar_longcot_16k_lora" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "modrill/qwen3_4b_base_rstar_longcot_16k_lora",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use modrill/qwen3_4b_base_rstar_longcot_16k_lora with Docker Model Runner:
```
docker model run hf.co/modrill/qwen3_4b_base_rstar_longcot_16k_lora
```

qwen3_4b_base_rstar_longcot_16k_lora / trainer_state.json

modrill

Add files using upload-large-folder tool

ebbe3fd verified 10 days ago

raw

history blame contribute delete

15.2 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 100,
	"global_step": 766,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.013061224489795919,
	"grad_norm": 0.013593924231827259,
	"learning_rate": 1.8461538461538465e-05,
	"loss": 1.0348053932189942,
	"step": 10
	},
	{
	"epoch": 0.026122448979591838,
	"grad_norm": 0.010592319071292877,
	"learning_rate": 3.8974358974358976e-05,
	"loss": 0.9912956237792969,
	"step": 20
	},
	{
	"epoch": 0.03918367346938775,
	"grad_norm": 0.008065517991781235,
	"learning_rate": 5.9487179487179495e-05,
	"loss": 0.9145261764526367,
	"step": 30
	},
	{
	"epoch": 0.052244897959183675,
	"grad_norm": 0.006928236689418554,
	"learning_rate": 8e-05,
	"loss": 0.8690940856933593,
	"step": 40
	},
	{
	"epoch": 0.0653061224489796,
	"grad_norm": 0.0065370709635317326,
	"learning_rate": 7.996265836446255e-05,
	"loss": 0.8447072982788086,
	"step": 50
	},
	{
	"epoch": 0.0783673469387755,
	"grad_norm": 0.005765452049672604,
	"learning_rate": 7.985070317773737e-05,
	"loss": 0.8226842880249023,
	"step": 60
	},
	{
	"epoch": 0.09142857142857143,
	"grad_norm": 0.004979082383215427,
	"learning_rate": 7.966434346931348e-05,
	"loss": 0.8047774314880372,
	"step": 70
	},
	{
	"epoch": 0.10448979591836735,
	"grad_norm": 0.00686669023707509,
	"learning_rate": 7.940392718800637e-05,
	"loss": 0.7929642200469971,
	"step": 80
	},
	{
	"epoch": 0.11755102040816326,
	"grad_norm": 0.006248346995562315,
	"learning_rate": 7.90699405523093e-05,
	"loss": 0.7915477275848388,
	"step": 90
	},
	{
	"epoch": 0.1306122448979592,
	"grad_norm": 0.007850440219044685,
	"learning_rate": 7.86630071425835e-05,
	"loss": 0.7851225376129151,
	"step": 100
	},
	{
	"epoch": 0.1436734693877551,
	"grad_norm": 0.007440537214279175,
	"learning_rate": 7.818388673678265e-05,
	"loss": 0.7773007869720459,
	"step": 110
	},
	{
	"epoch": 0.156734693877551,
	"grad_norm": 0.007219385821372271,
	"learning_rate": 7.763347389188538e-05,
	"loss": 0.7723363399505615,
	"step": 120
	},
	{
	"epoch": 0.16979591836734695,
	"grad_norm": 0.006354185286909342,
	"learning_rate": 7.701279627368412e-05,
	"loss": 0.7682206153869628,
	"step": 130
	},
	{
	"epoch": 0.18285714285714286,
	"grad_norm": 0.006955909077078104,
	"learning_rate": 7.632301273804914e-05,
	"loss": 0.7699796676635742,
	"step": 140
	},
	{
	"epoch": 0.19591836734693877,
	"grad_norm": 0.008277718909084797,
	"learning_rate": 7.556541116724981e-05,
	"loss": 0.764019775390625,
	"step": 150
	},
	{
	"epoch": 0.2089795918367347,
	"grad_norm": 0.007425008807331324,
	"learning_rate": 7.474140606537311e-05,
	"loss": 0.7628626823425293,
	"step": 160
	},
	{
	"epoch": 0.2220408163265306,
	"grad_norm": 0.008092896081507206,
	"learning_rate": 7.38525359173288e-05,
	"loss": 0.7607949256896973,
	"step": 170
	},
	{
	"epoch": 0.23510204081632652,
	"grad_norm": 0.011249990202486515,
	"learning_rate": 7.290046031637228e-05,
	"loss": 0.757351303100586,
	"step": 180
	},
	{
	"epoch": 0.24816326530612245,
	"grad_norm": 0.009156743064522743,
	"learning_rate": 7.188695686550836e-05,
	"loss": 0.7565219879150391,
	"step": 190
	},
	{
	"epoch": 0.2612244897959184,
	"grad_norm": 0.008516875095665455,
	"learning_rate": 7.081391785856087e-05,
	"loss": 0.7473669052124023,
	"step": 200
	},
	{
	"epoch": 0.2742857142857143,
	"grad_norm": 0.008919311687350273,
	"learning_rate": 6.96833467471056e-05,
	"loss": 0.7444419860839844,
	"step": 210
	},
	{
	"epoch": 0.2873469387755102,
	"grad_norm": 0.007335508707910776,
	"learning_rate": 6.84973543998622e-05,
	"loss": 0.7573845386505127,
	"step": 220
	},
	{
	"epoch": 0.3004081632653061,
	"grad_norm": 0.007334326393902302,
	"learning_rate": 6.725815516152973e-05,
	"loss": 0.7524682998657226,
	"step": 230
	},
	{
	"epoch": 0.313469387755102,
	"grad_norm": 0.006889250595122576,
	"learning_rate": 6.596806271842398e-05,
	"loss": 0.7463503837585449,
	"step": 240
	},
	{
	"epoch": 0.32653061224489793,
	"grad_norm": 0.007522549480199814,
	"learning_rate": 6.462948577863593e-05,
	"loss": 0.7468090057373047,
	"step": 250
	},
	{
	"epoch": 0.3395918367346939,
	"grad_norm": 0.00689704529941082,
	"learning_rate": 6.324492357477686e-05,
	"loss": 0.745818042755127,
	"step": 260
	},
	{
	"epoch": 0.3526530612244898,
	"grad_norm": 0.007402004674077034,
	"learning_rate": 6.18169611977065e-05,
	"loss": 0.737040901184082,
	"step": 270
	},
	{
	"epoch": 0.3657142857142857,
	"grad_norm": 0.006718257907778025,
	"learning_rate": 6.034826476995715e-05,
	"loss": 0.7412730693817139,
	"step": 280
	},
	{
	"epoch": 0.3787755102040816,
	"grad_norm": 0.006786980666220188,
	"learning_rate": 5.8841576467864825e-05,
	"loss": 0.7408377170562744,
	"step": 290
	},
	{
	"epoch": 0.39183673469387753,
	"grad_norm": 0.006618338171392679,
	"learning_rate": 5.7299709401701805e-05,
	"loss": 0.7430388927459717,
	"step": 300
	},
	{
	"epoch": 0.4048979591836735,
	"grad_norm": 0.007314841262996197,
	"learning_rate": 5.572554236336965e-05,
	"loss": 0.7401338577270508,
	"step": 310
	},
	{
	"epoch": 0.4179591836734694,
	"grad_norm": 0.0067168474197387695,
	"learning_rate": 5.4122014451459385e-05,
	"loss": 0.7423385143280029,
	"step": 320
	},
	{
	"epoch": 0.4310204081632653,
	"grad_norm": 0.007062443997710943,
	"learning_rate": 5.2492119583714064e-05,
	"loss": 0.7407833099365234,
	"step": 330
	},
	{
	"epoch": 0.4440816326530612,
	"grad_norm": 0.009489820338785648,
	"learning_rate": 5.083890090713949e-05,
	"loss": 0.7376296997070313,
	"step": 340
	},
	{
	"epoch": 0.45714285714285713,
	"grad_norm": 0.006291185040026903,
	"learning_rate": 4.916544511619984e-05,
	"loss": 0.7393476963043213,
	"step": 350
	},
	{
	"epoch": 0.47020408163265304,
	"grad_norm": 0.007340357638895512,
	"learning_rate": 4.747487668970681e-05,
	"loss": 0.7434526443481445,
	"step": 360
	},
	{
	"epoch": 0.483265306122449,
	"grad_norm": 0.007148618344217539,
	"learning_rate": 4.5770352057162046e-05,
	"loss": 0.7274169445037841,
	"step": 370
	},
	{
	"epoch": 0.4963265306122449,
	"grad_norm": 0.007421489339321852,
	"learning_rate": 4.405505370544521e-05,
	"loss": 0.7373303413391114,
	"step": 380
	},
	{
	"epoch": 0.5093877551020408,
	"grad_norm": 0.006447239778935909,
	"learning_rate": 4.233218423685071e-05,
	"loss": 0.7334803581237793,
	"step": 390
	},
	{
	"epoch": 0.5224489795918368,
	"grad_norm": 0.007035430055111647,
	"learning_rate": 4.060496038956728e-05,
	"loss": 0.7342597961425781,
	"step": 400
	},
	{
	"epoch": 0.5355102040816326,
	"grad_norm": 0.006574620492756367,
	"learning_rate": 3.887660703176474e-05,
	"loss": 0.7356997966766358,
	"step": 410
	},
	{
	"epoch": 0.5485714285714286,
	"grad_norm": 0.0066298553720116615,
	"learning_rate": 3.7150351140501457e-05,
	"loss": 0.7381177425384522,
	"step": 420
	},
	{
	"epoch": 0.5616326530612245,
	"grad_norm": 0.007492161355912685,
	"learning_rate": 3.542941577669424e-05,
	"loss": 0.7291494369506836,
	"step": 430
	},
	{
	"epoch": 0.5746938775510204,
	"grad_norm": 0.006217462942004204,
	"learning_rate": 3.3717014067400025e-05,
	"loss": 0.7233750343322753,
	"step": 440
	},
	{
	"epoch": 0.5877551020408164,
	"grad_norm": 0.006911132019013166,
	"learning_rate": 3.201634320664491e-05,
	"loss": 0.7269360542297363,
	"step": 450
	},
	{
	"epoch": 0.6008163265306122,
	"grad_norm": 0.00729888491332531,
	"learning_rate": 3.0330578486001478e-05,
	"loss": 0.7219826698303222,
	"step": 460
	},
	{
	"epoch": 0.6138775510204082,
	"grad_norm": 0.007913697510957718,
	"learning_rate": 2.8662867366059758e-05,
	"loss": 0.7285231590270996,
	"step": 470
	},
	{
	"epoch": 0.626938775510204,
	"grad_norm": 0.006929247174412012,
	"learning_rate": 2.7016323599860833e-05,
	"loss": 0.7285576820373535,
	"step": 480
	},
	{
	"epoch": 0.64,
	"grad_norm": 0.006759402342140675,
	"learning_rate": 2.5394021419265458e-05,
	"loss": 0.7239264965057373,
	"step": 490
	},
	{
	"epoch": 0.6530612244897959,
	"grad_norm": 0.006527756340801716,
	"learning_rate": 2.379898979511156e-05,
	"loss": 0.731016731262207,
	"step": 500
	},
	{
	"epoch": 0.6661224489795918,
	"grad_norm": 0.006792586762458086,
	"learning_rate": 2.2234206781878127e-05,
	"loss": 0.7236400604248047,
	"step": 510
	},
	{
	"epoch": 0.6791836734693878,
	"grad_norm": 0.006418135017156601,
	"learning_rate": 2.0702593957413973e-05,
	"loss": 0.7233975887298584,
	"step": 520
	},
	{
	"epoch": 0.6922448979591836,
	"grad_norm": 0.006609582342207432,
	"learning_rate": 1.9207010968112856e-05,
	"loss": 0.7252939224243165,
	"step": 530
	},
	{
	"epoch": 0.7053061224489796,
	"grad_norm": 0.006350552197545767,
	"learning_rate": 1.7750250189719885e-05,
	"loss": 0.7322314739227295,
	"step": 540
	},
	{
	"epoch": 0.7183673469387755,
	"grad_norm": 0.006282226648181677,
	"learning_rate": 1.633503151373769e-05,
	"loss": 0.718090009689331,
	"step": 550
	},
	{
	"epoch": 0.7314285714285714,
	"grad_norm": 0.006717463489621878,
	"learning_rate": 1.4963997269166472e-05,
	"loss": 0.7251626491546631,
	"step": 560
	},
	{
	"epoch": 0.7444897959183674,
	"grad_norm": 0.006243122275918722,
	"learning_rate": 1.363970728905975e-05,
	"loss": 0.7236129760742187,
	"step": 570
	},
	{
	"epoch": 0.7575510204081632,
	"grad_norm": 0.005929226521402597,
	"learning_rate": 1.2364634131106663e-05,
	"loss": 0.7275202751159668,
	"step": 580
	},
	{
	"epoch": 0.7706122448979592,
	"grad_norm": 0.006825726944953203,
	"learning_rate": 1.11411584611646e-05,
	"loss": 0.7266074657440186,
	"step": 590
	},
	{
	"epoch": 0.7836734693877551,
	"grad_norm": 0.006338095758110285,
	"learning_rate": 9.971564608361387e-06,
	"loss": 0.7227339744567871,
	"step": 600
	},
	{
	"epoch": 0.796734693877551,
	"grad_norm": 0.006189221516251564,
	"learning_rate": 8.858036300065912e-06,
	"loss": 0.7260101318359375,
	"step": 610
	},
	{
	"epoch": 0.809795918367347,
	"grad_norm": 0.006516415625810623,
	"learning_rate": 7.802652584690627e-06,
	"loss": 0.721678638458252,
	"step": 620
	},
	{
	"epoch": 0.8228571428571428,
	"grad_norm": 0.006069981027394533,
	"learning_rate": 6.807383949938131e-06,
	"loss": 0.7275302886962891,
	"step": 630
	},
	{
	"epoch": 0.8359183673469388,
	"grad_norm": 0.006245093885809183,
	"learning_rate": 5.874088643739453e-06,
	"loss": 0.7232262134552002,
	"step": 640
	},
	{
	"epoch": 0.8489795918367347,
	"grad_norm": 0.0058577232994139194,
	"learning_rate": 5.0045092047532385e-06,
	"loss": 0.7290368556976319,
	"step": 650
	},
	{
	"epoch": 0.8620408163265306,
	"grad_norm": 0.005565746687352657,
	"learning_rate": 4.200269208903569e-06,
	"loss": 0.7283772945404052,
	"step": 660
	},
	{
	"epoch": 0.8751020408163265,
	"grad_norm": 0.006886645220220089,
	"learning_rate": 3.4628702380309266e-06,
	"loss": 0.7242953300476074,
	"step": 670
	},
	{
	"epoch": 0.8881632653061224,
	"grad_norm": 0.005979357752948999,
	"learning_rate": 2.793689076316111e-06,
	"loss": 0.7234395503997803,
	"step": 680
	},
	{
	"epoch": 0.9012244897959184,
	"grad_norm": 0.005459806881844997,
	"learning_rate": 2.193975139711575e-06,
	"loss": 0.7297232151031494,
	"step": 690
	},
	{
	"epoch": 0.9142857142857143,
	"grad_norm": 0.00546460272744298,
	"learning_rate": 1.6648481431797137e-06,
	"loss": 0.7135615348815918,
	"step": 700
	},
	{
	"epoch": 0.9273469387755102,
	"grad_norm": 0.005548370536416769,
	"learning_rate": 1.2072960100933862e-06,
	"loss": 0.7257327079772949,
	"step": 710
	},
	{
	"epoch": 0.9404081632653061,
	"grad_norm": 0.005945554003119469,
	"learning_rate": 8.221730277022488e-07,
	"loss": 0.7289669036865234,
	"step": 720
	},
	{
	"epoch": 0.953469387755102,
	"grad_norm": 0.005690570455044508,
	"learning_rate": 5.101982521085847e-07,
	"loss": 0.7285196781158447,
	"step": 730
	},
	{
	"epoch": 0.966530612244898,
	"grad_norm": 0.005644120275974274,
	"learning_rate": 2.719541657307456e-07,
	"loss": 0.7271464347839356,
	"step": 740
	},
	{
	"epoch": 0.9795918367346939,
	"grad_norm": 0.005760515108704567,
	"learning_rate": 1.0788558976085662e-07,
	"loss": 0.726295280456543,
	"step": 750
	},
	{
	"epoch": 0.9926530612244898,
	"grad_norm": 0.006554395891726017,
	"learning_rate": 1.8298853647267245e-08,
	"loss": 0.7278533458709717,
	"step": 760
	},
	{
	"epoch": 1.0,
	"step": 766,
	"total_flos": 7.613569370341507e+18,
	"train_loss": 0.7527124293479223,
	"train_runtime": 22851.6673,
	"train_samples_per_second": 2.144,
	"train_steps_per_second": 0.034
	}
	],
	"logging_steps": 10,
	"max_steps": 766,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 7.613569370341507e+18,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}