Instructions to use modrill/kodcode_3_qwen3_4b_sft with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use modrill/kodcode_3_qwen3_4b_sft with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="modrill/kodcode_3_qwen3_4b_sft") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("modrill/kodcode_3_qwen3_4b_sft") model = AutoModelForCausalLM.from_pretrained("modrill/kodcode_3_qwen3_4b_sft") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use modrill/kodcode_3_qwen3_4b_sft with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "modrill/kodcode_3_qwen3_4b_sft" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "modrill/kodcode_3_qwen3_4b_sft", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/modrill/kodcode_3_qwen3_4b_sft
- SGLang
How to use modrill/kodcode_3_qwen3_4b_sft with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "modrill/kodcode_3_qwen3_4b_sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "modrill/kodcode_3_qwen3_4b_sft", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "modrill/kodcode_3_qwen3_4b_sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "modrill/kodcode_3_qwen3_4b_sft", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use modrill/kodcode_3_qwen3_4b_sft with Docker Model Runner:
docker model run hf.co/modrill/kodcode_3_qwen3_4b_sft
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 659, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "entropy": 0.3487173642963171, | |
| "epoch": 0.015186028853454821, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.4099268913269043, | |
| "mean_token_accuracy": 0.8717762351036071, | |
| "num_tokens": 568708.0, | |
| "step": 10 | |
| }, | |
| { | |
| "entropy": 0.37818768359720706, | |
| "epoch": 0.030372057706909643, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 5.7575757575757586e-06, | |
| "loss": 0.39949469566345214, | |
| "mean_token_accuracy": 0.8737476468086243, | |
| "num_tokens": 1125639.0, | |
| "step": 20 | |
| }, | |
| { | |
| "entropy": 0.40618006475269797, | |
| "epoch": 0.04555808656036447, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8.787878787878788e-06, | |
| "loss": 0.3975033760070801, | |
| "mean_token_accuracy": 0.8727035835385323, | |
| "num_tokens": 1683225.0, | |
| "step": 30 | |
| }, | |
| { | |
| "entropy": 0.3896496780216694, | |
| "epoch": 0.060744115413819286, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 9.997733473639876e-06, | |
| "loss": 0.3925030708312988, | |
| "mean_token_accuracy": 0.8742863699793816, | |
| "num_tokens": 2236895.0, | |
| "step": 40 | |
| }, | |
| { | |
| "entropy": 0.37322904225438835, | |
| "epoch": 0.07593014426727411, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 9.983889919973586e-06, | |
| "loss": 0.3752753257751465, | |
| "mean_token_accuracy": 0.8792506881058216, | |
| "num_tokens": 2818707.0, | |
| "step": 50 | |
| }, | |
| { | |
| "entropy": 0.3811411205679178, | |
| "epoch": 0.09111617312072894, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 9.957496810072027e-06, | |
| "loss": 0.38604438304901123, | |
| "mean_token_accuracy": 0.8750339619815349, | |
| "num_tokens": 3351348.0, | |
| "step": 60 | |
| }, | |
| { | |
| "entropy": 0.3796327030286193, | |
| "epoch": 0.10630220197418375, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 9.918620602428916e-06, | |
| "loss": 0.37710745334625245, | |
| "mean_token_accuracy": 0.8776259452104569, | |
| "num_tokens": 3915545.0, | |
| "step": 70 | |
| }, | |
| { | |
| "entropy": 0.37812459245324137, | |
| "epoch": 0.12148823082763857, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 9.867359188282193e-06, | |
| "loss": 0.38009963035583494, | |
| "mean_token_accuracy": 0.8783061921596527, | |
| "num_tokens": 4462906.0, | |
| "step": 80 | |
| }, | |
| { | |
| "entropy": 0.3751340739428997, | |
| "epoch": 0.1366742596810934, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 9.803841645121505e-06, | |
| "loss": 0.37636594772338866, | |
| "mean_token_accuracy": 0.8778362341225148, | |
| "num_tokens": 5029003.0, | |
| "step": 90 | |
| }, | |
| { | |
| "entropy": 0.37551863975822924, | |
| "epoch": 0.15186028853454822, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 9.728227911667934e-06, | |
| "loss": 0.3773549795150757, | |
| "mean_token_accuracy": 0.8772883579134941, | |
| "num_tokens": 5596042.0, | |
| "step": 100 | |
| }, | |
| { | |
| "entropy": 0.3809764288365841, | |
| "epoch": 0.16704631738800305, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 9.640708385144403e-06, | |
| "loss": 0.3807323932647705, | |
| "mean_token_accuracy": 0.8774459846317768, | |
| "num_tokens": 6144821.0, | |
| "step": 110 | |
| }, | |
| { | |
| "entropy": 0.37467240951955316, | |
| "epoch": 0.18223234624145787, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 9.541503441850844e-06, | |
| "loss": 0.37542564868927003, | |
| "mean_token_accuracy": 0.8782215595245362, | |
| "num_tokens": 6691491.0, | |
| "step": 120 | |
| }, | |
| { | |
| "entropy": 0.3787713166326284, | |
| "epoch": 0.19741837509491267, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 9.430862882251279e-06, | |
| "loss": 0.37993783950805665, | |
| "mean_token_accuracy": 0.8774278596043587, | |
| "num_tokens": 7247335.0, | |
| "step": 130 | |
| }, | |
| { | |
| "entropy": 0.3862619888037443, | |
| "epoch": 0.2126044039483675, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 9.309065301970193e-06, | |
| "loss": 0.38727219104766847, | |
| "mean_token_accuracy": 0.8749251998960972, | |
| "num_tokens": 7808664.0, | |
| "step": 140 | |
| }, | |
| { | |
| "entropy": 0.3778634283691645, | |
| "epoch": 0.22779043280182232, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 9.176417390281944e-06, | |
| "loss": 0.38028583526611326, | |
| "mean_token_accuracy": 0.8772468723356723, | |
| "num_tokens": 8360893.0, | |
| "step": 150 | |
| }, | |
| { | |
| "entropy": 0.3749677825719118, | |
| "epoch": 0.24297646165527714, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 9.033253157859715e-06, | |
| "loss": 0.37344467639923096, | |
| "mean_token_accuracy": 0.8786589197814465, | |
| "num_tokens": 8905139.0, | |
| "step": 160 | |
| }, | |
| { | |
| "entropy": 0.37994367331266404, | |
| "epoch": 0.25816249050873197, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8.879933095728485e-06, | |
| "loss": 0.38379650115966796, | |
| "mean_token_accuracy": 0.8768095754086971, | |
| "num_tokens": 9467791.0, | |
| "step": 170 | |
| }, | |
| { | |
| "entropy": 0.3774459037929773, | |
| "epoch": 0.2733485193621868, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8.716843267539868e-06, | |
| "loss": 0.3767258644104004, | |
| "mean_token_accuracy": 0.8779186218976974, | |
| "num_tokens": 10013526.0, | |
| "step": 180 | |
| }, | |
| { | |
| "entropy": 0.3706828704103827, | |
| "epoch": 0.2885345482156416, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8.544394337454409e-06, | |
| "loss": 0.373125958442688, | |
| "mean_token_accuracy": 0.8792334951460361, | |
| "num_tokens": 10567209.0, | |
| "step": 190 | |
| }, | |
| { | |
| "entropy": 0.3781122103333473, | |
| "epoch": 0.30372057706909644, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8.36302053607924e-06, | |
| "loss": 0.3779691457748413, | |
| "mean_token_accuracy": 0.877835976332426, | |
| "num_tokens": 11121802.0, | |
| "step": 200 | |
| }, | |
| { | |
| "entropy": 0.37852676026523113, | |
| "epoch": 0.31890660592255127, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8.17317856706482e-06, | |
| "loss": 0.37905910015106203, | |
| "mean_token_accuracy": 0.877592646330595, | |
| "num_tokens": 11677885.0, | |
| "step": 210 | |
| }, | |
| { | |
| "entropy": 0.376834512129426, | |
| "epoch": 0.3340926347760061, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 7.975346457114034e-06, | |
| "loss": 0.3753563404083252, | |
| "mean_token_accuracy": 0.8776590585708618, | |
| "num_tokens": 12235216.0, | |
| "step": 220 | |
| }, | |
| { | |
| "entropy": 0.3737819105386734, | |
| "epoch": 0.3492786636294609, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 7.770022352299294e-06, | |
| "loss": 0.37358593940734863, | |
| "mean_token_accuracy": 0.878921328485012, | |
| "num_tokens": 12787759.0, | |
| "step": 230 | |
| }, | |
| { | |
| "entropy": 0.3769740372896194, | |
| "epoch": 0.36446469248291574, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 7.557723263718596e-06, | |
| "loss": 0.37995898723602295, | |
| "mean_token_accuracy": 0.8769361607730388, | |
| "num_tokens": 13346471.0, | |
| "step": 240 | |
| }, | |
| { | |
| "entropy": 0.3878506176173687, | |
| "epoch": 0.37965072133637057, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 7.338983765648985e-06, | |
| "loss": 0.38782215118408203, | |
| "mean_token_accuracy": 0.8749015353620052, | |
| "num_tokens": 13895194.0, | |
| "step": 250 | |
| }, | |
| { | |
| "entropy": 0.37555828876793385, | |
| "epoch": 0.39483675018982534, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 7.114354649475499e-06, | |
| "loss": 0.3771331787109375, | |
| "mean_token_accuracy": 0.878202386945486, | |
| "num_tokens": 14453542.0, | |
| "step": 260 | |
| }, | |
| { | |
| "entropy": 0.37179951313883064, | |
| "epoch": 0.41002277904328016, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 6.884401536785045e-06, | |
| "loss": 0.37206058502197265, | |
| "mean_token_accuracy": 0.8789021499454975, | |
| "num_tokens": 15016280.0, | |
| "step": 270 | |
| }, | |
| { | |
| "entropy": 0.3706459369510412, | |
| "epoch": 0.425208807896735, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 6.6497034551174585e-06, | |
| "loss": 0.37101426124572756, | |
| "mean_token_accuracy": 0.8798882246017456, | |
| "num_tokens": 15561057.0, | |
| "step": 280 | |
| }, | |
| { | |
| "entropy": 0.37728526555001735, | |
| "epoch": 0.4403948367501898, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 6.41085137996006e-06, | |
| "loss": 0.37785754203796384, | |
| "mean_token_accuracy": 0.8779311388731003, | |
| "num_tokens": 16127699.0, | |
| "step": 290 | |
| }, | |
| { | |
| "entropy": 0.3816283464431763, | |
| "epoch": 0.45558086560364464, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 6.168446746656973e-06, | |
| "loss": 0.3794879674911499, | |
| "mean_token_accuracy": 0.8773063771426678, | |
| "num_tokens": 16686457.0, | |
| "step": 300 | |
| }, | |
| { | |
| "entropy": 0.37517447732388975, | |
| "epoch": 0.47076689445709946, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 5.923099935980278e-06, | |
| "loss": 0.3782352924346924, | |
| "mean_token_accuracy": 0.8787827685475349, | |
| "num_tokens": 17254272.0, | |
| "step": 310 | |
| }, | |
| { | |
| "entropy": 0.374018133059144, | |
| "epoch": 0.4859529233105543, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 5.675428737176367e-06, | |
| "loss": 0.37341156005859377, | |
| "mean_token_accuracy": 0.8788688823580741, | |
| "num_tokens": 17809900.0, | |
| "step": 320 | |
| }, | |
| { | |
| "entropy": 0.3753270395100117, | |
| "epoch": 0.5011389521640092, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 5.426056792357552e-06, | |
| "loss": 0.3752497673034668, | |
| "mean_token_accuracy": 0.8784179173409938, | |
| "num_tokens": 18379566.0, | |
| "step": 330 | |
| }, | |
| { | |
| "entropy": 0.3742110010236502, | |
| "epoch": 0.5163249810174639, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 5.175612026156045e-06, | |
| "loss": 0.3746063232421875, | |
| "mean_token_accuracy": 0.8782069273293018, | |
| "num_tokens": 18943281.0, | |
| "step": 340 | |
| }, | |
| { | |
| "entropy": 0.37444472052156924, | |
| "epoch": 0.5315110098709187, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 4.924725064594448e-06, | |
| "loss": 0.3729024171829224, | |
| "mean_token_accuracy": 0.8787923693656922, | |
| "num_tokens": 19488865.0, | |
| "step": 350 | |
| }, | |
| { | |
| "entropy": 0.3750518877059221, | |
| "epoch": 0.5466970387243736, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.674027647154037e-06, | |
| "loss": 0.3758077621459961, | |
| "mean_token_accuracy": 0.8765743866562843, | |
| "num_tokens": 20048281.0, | |
| "step": 360 | |
| }, | |
| { | |
| "entropy": 0.3787516813725233, | |
| "epoch": 0.5618830675778284, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.424151036039381e-06, | |
| "loss": 0.3790909767150879, | |
| "mean_token_accuracy": 0.8769759923219681, | |
| "num_tokens": 20597434.0, | |
| "step": 370 | |
| }, | |
| { | |
| "entropy": 0.3786877432838082, | |
| "epoch": 0.5770690964312832, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 4.175724426644724e-06, | |
| "loss": 0.3812232971191406, | |
| "mean_token_accuracy": 0.8777030549943448, | |
| "num_tokens": 21161267.0, | |
| "step": 380 | |
| }, | |
| { | |
| "entropy": 0.37311795353889465, | |
| "epoch": 0.592255125284738, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 3.929373363224654e-06, | |
| "loss": 0.3731100559234619, | |
| "mean_token_accuracy": 0.8793233536183834, | |
| "num_tokens": 21709421.0, | |
| "step": 390 | |
| }, | |
| { | |
| "entropy": 0.3734915753826499, | |
| "epoch": 0.6074411541381929, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 3.685718163758427e-06, | |
| "loss": 0.37124335765838623, | |
| "mean_token_accuracy": 0.8786324210464954, | |
| "num_tokens": 22250023.0, | |
| "step": 400 | |
| }, | |
| { | |
| "entropy": 0.3722789943218231, | |
| "epoch": 0.6226271829916477, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 3.445372357974194e-06, | |
| "loss": 0.37429609298706057, | |
| "mean_token_accuracy": 0.8784996062517166, | |
| "num_tokens": 22802881.0, | |
| "step": 410 | |
| }, | |
| { | |
| "entropy": 0.3826644644141197, | |
| "epoch": 0.6378132118451025, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 3.2089411424661864e-06, | |
| "loss": 0.3828511953353882, | |
| "mean_token_accuracy": 0.875868634134531, | |
| "num_tokens": 23368508.0, | |
| "step": 420 | |
| }, | |
| { | |
| "entropy": 0.36304581388831136, | |
| "epoch": 0.6529992406985573, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 2.977019856794955e-06, | |
| "loss": 0.362534499168396, | |
| "mean_token_accuracy": 0.8821237675845623, | |
| "num_tokens": 23923709.0, | |
| "step": 430 | |
| }, | |
| { | |
| "entropy": 0.38759873658418653, | |
| "epoch": 0.6681852695520122, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 2.7501924844078538e-06, | |
| "loss": 0.38718571662902834, | |
| "mean_token_accuracy": 0.8746685221791267, | |
| "num_tokens": 24477925.0, | |
| "step": 440 | |
| }, | |
| { | |
| "entropy": 0.3708066754043102, | |
| "epoch": 0.683371298405467, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 2.5290301821544826e-06, | |
| "loss": 0.36970815658569334, | |
| "mean_token_accuracy": 0.8802599847316742, | |
| "num_tokens": 25027245.0, | |
| "step": 450 | |
| }, | |
| { | |
| "entropy": 0.36688214987516404, | |
| "epoch": 0.6985573272589218, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 2.3140898420998425e-06, | |
| "loss": 0.3657586097717285, | |
| "mean_token_accuracy": 0.8809232294559479, | |
| "num_tokens": 25582534.0, | |
| "step": 460 | |
| }, | |
| { | |
| "entropy": 0.36983687337487936, | |
| "epoch": 0.7137433561123766, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 2.105912689256533e-06, | |
| "loss": 0.37239837646484375, | |
| "mean_token_accuracy": 0.8794391065835953, | |
| "num_tokens": 26134875.0, | |
| "step": 470 | |
| }, | |
| { | |
| "entropy": 0.3732773784548044, | |
| "epoch": 0.7289293849658315, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 1.905022918766995e-06, | |
| "loss": 0.37306258678436277, | |
| "mean_token_accuracy": 0.8793606124818325, | |
| "num_tokens": 26681084.0, | |
| "step": 480 | |
| }, | |
| { | |
| "entropy": 0.3710829207673669, | |
| "epoch": 0.7441154138192863, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 1.7119263759673677e-06, | |
| "loss": 0.3711911678314209, | |
| "mean_token_accuracy": 0.8803343921899796, | |
| "num_tokens": 27234332.0, | |
| "step": 490 | |
| }, | |
| { | |
| "entropy": 0.3832434043288231, | |
| "epoch": 0.7593014426727411, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 1.5271092826566108e-06, | |
| "loss": 0.3841698169708252, | |
| "mean_token_accuracy": 0.8759766638278961, | |
| "num_tokens": 27794602.0, | |
| "step": 500 | |
| }, | |
| { | |
| "entropy": 0.38211295008659363, | |
| "epoch": 0.7744874715261959, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.3510370127781635e-06, | |
| "loss": 0.3804590940475464, | |
| "mean_token_accuracy": 0.8769169762730599, | |
| "num_tokens": 28354831.0, | |
| "step": 510 | |
| }, | |
| { | |
| "entropy": 0.3752284612506628, | |
| "epoch": 0.7896735003796507, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 1.1841529205970281e-06, | |
| "loss": 0.37546916007995607, | |
| "mean_token_accuracy": 0.8786770381033421, | |
| "num_tokens": 28922852.0, | |
| "step": 520 | |
| }, | |
| { | |
| "entropy": 0.36873019095510245, | |
| "epoch": 0.8048595292331056, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1.026877224322923e-06, | |
| "loss": 0.36797375679016114, | |
| "mean_token_accuracy": 0.880731363594532, | |
| "num_tokens": 29493442.0, | |
| "step": 530 | |
| }, | |
| { | |
| "entropy": 0.3788988694548607, | |
| "epoch": 0.8200455580865603, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8.7960594799059e-07, | |
| "loss": 0.37884984016418455, | |
| "mean_token_accuracy": 0.8770281121134758, | |
| "num_tokens": 30034443.0, | |
| "step": 540 | |
| }, | |
| { | |
| "entropy": 0.3814096964895725, | |
| "epoch": 0.8352315869400152, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 7.427099242616348e-07, | |
| "loss": 0.3821078300476074, | |
| "mean_token_accuracy": 0.8763406798243523, | |
| "num_tokens": 30570567.0, | |
| "step": 550 | |
| }, | |
| { | |
| "entropy": 0.3758743409067392, | |
| "epoch": 0.85041761579347, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 6.165338606588517e-07, | |
| "loss": 0.3744307279586792, | |
| "mean_token_accuracy": 0.8794760994613171, | |
| "num_tokens": 31129457.0, | |
| "step": 560 | |
| }, | |
| { | |
| "entropy": 0.37431446108967065, | |
| "epoch": 0.8656036446469249, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 5.0139547158427e-07, | |
| "loss": 0.37335963249206544, | |
| "mean_token_accuracy": 0.8793582506477833, | |
| "num_tokens": 31689902.0, | |
| "step": 570 | |
| }, | |
| { | |
| "entropy": 0.38372623883187773, | |
| "epoch": 0.8807896735003796, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.9758467830656623e-07, | |
| "loss": 0.38321547508239745, | |
| "mean_token_accuracy": 0.8755873307585716, | |
| "num_tokens": 32253359.0, | |
| "step": 580 | |
| }, | |
| { | |
| "entropy": 0.36962624490261076, | |
| "epoch": 0.8959757023538345, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 3.0536287893223603e-07, | |
| "loss": 0.37100839614868164, | |
| "mean_token_accuracy": 0.8799250744283199, | |
| "num_tokens": 32813428.0, | |
| "step": 590 | |
| }, | |
| { | |
| "entropy": 0.3853254303336143, | |
| "epoch": 0.9111617312072893, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 2.2496229019879635e-07, | |
| "loss": 0.3848439693450928, | |
| "mean_token_accuracy": 0.8762004837393761, | |
| "num_tokens": 33382024.0, | |
| "step": 600 | |
| }, | |
| { | |
| "entropy": 0.3742272950708866, | |
| "epoch": 0.9263477600607442, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 1.5658536274738623e-07, | |
| "loss": 0.3725078582763672, | |
| "mean_token_accuracy": 0.8787065915763378, | |
| "num_tokens": 33939521.0, | |
| "step": 610 | |
| }, | |
| { | |
| "entropy": 0.3774934906512499, | |
| "epoch": 0.9415337889141989, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 1.004042713471165e-07, | |
| "loss": 0.37858588695526124, | |
| "mean_token_accuracy": 0.877676124125719, | |
| "num_tokens": 34482539.0, | |
| "step": 620 | |
| }, | |
| { | |
| "entropy": 0.37360552567988636, | |
| "epoch": 0.9567198177676538, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 5.6560481354807625e-08, | |
| "loss": 0.37269864082336424, | |
| "mean_token_accuracy": 0.8786865592002868, | |
| "num_tokens": 35045028.0, | |
| "step": 630 | |
| }, | |
| { | |
| "entropy": 0.3767994062975049, | |
| "epoch": 0.9719058466211086, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.516439250177749e-08, | |
| "loss": 0.3758098125457764, | |
| "mean_token_accuracy": 0.8778494797647, | |
| "num_tokens": 35600399.0, | |
| "step": 640 | |
| }, | |
| { | |
| "entropy": 0.3786265593022108, | |
| "epoch": 0.9870918754745635, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 6.295060904623618e-09, | |
| "loss": 0.378217077255249, | |
| "mean_token_accuracy": 0.8778206452727317, | |
| "num_tokens": 36162293.0, | |
| "step": 650 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 659, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.858614662906511e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |