Instructions to use modrill/opencodeinst_5k_sft with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use modrill/opencodeinst_5k_sft with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="modrill/opencodeinst_5k_sft") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("modrill/opencodeinst_5k_sft", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use modrill/opencodeinst_5k_sft with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "modrill/opencodeinst_5k_sft" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "modrill/opencodeinst_5k_sft", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/modrill/opencodeinst_5k_sft
- SGLang
How to use modrill/opencodeinst_5k_sft with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "modrill/opencodeinst_5k_sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "modrill/opencodeinst_5k_sft", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "modrill/opencodeinst_5k_sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "modrill/opencodeinst_5k_sft", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use modrill/opencodeinst_5k_sft with Docker Model Runner:
docker model run hf.co/modrill/opencodeinst_5k_sft
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.5254736842105263, | |
| "eval_steps": 100, | |
| "global_step": 1500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016842105263157894, | |
| "grad_norm": 0.21757784485816956, | |
| "learning_rate": 5.027932960893855e-08, | |
| "loss": 0.7252199172973632, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03368421052631579, | |
| "grad_norm": 0.2456846386194229, | |
| "learning_rate": 1.0614525139664805e-07, | |
| "loss": 0.6507451057434082, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05052631578947368, | |
| "grad_norm": 0.20819272100925446, | |
| "learning_rate": 1.6201117318435754e-07, | |
| "loss": 0.7381344795227051, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06736842105263158, | |
| "grad_norm": 0.26373574137687683, | |
| "learning_rate": 2.17877094972067e-07, | |
| "loss": 0.7012194156646728, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08421052631578947, | |
| "grad_norm": 0.2081507444381714, | |
| "learning_rate": 2.7374301675977653e-07, | |
| "loss": 0.6083873748779297, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10105263157894737, | |
| "grad_norm": 0.2091236114501953, | |
| "learning_rate": 3.29608938547486e-07, | |
| "loss": 0.6980491638183594, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11789473684210526, | |
| "grad_norm": 0.20970331132411957, | |
| "learning_rate": 3.8547486033519547e-07, | |
| "loss": 0.708641767501831, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13473684210526315, | |
| "grad_norm": 0.18810197710990906, | |
| "learning_rate": 4.41340782122905e-07, | |
| "loss": 0.6742453098297119, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15157894736842106, | |
| "grad_norm": 0.20251069962978363, | |
| "learning_rate": 4.972067039106145e-07, | |
| "loss": 0.6590609550476074, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.16842105263157894, | |
| "grad_norm": 0.2644217908382416, | |
| "learning_rate": 5.53072625698324e-07, | |
| "loss": 0.704926872253418, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18526315789473685, | |
| "grad_norm": 0.23766489326953888, | |
| "learning_rate": 6.089385474860335e-07, | |
| "loss": 0.7445036888122558, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.20210526315789473, | |
| "grad_norm": 0.27427056431770325, | |
| "learning_rate": 6.64804469273743e-07, | |
| "loss": 0.7476531028747558, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.21894736842105264, | |
| "grad_norm": 0.3208928406238556, | |
| "learning_rate": 7.206703910614524e-07, | |
| "loss": 0.7291872501373291, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.23578947368421052, | |
| "grad_norm": 0.3123615086078644, | |
| "learning_rate": 7.76536312849162e-07, | |
| "loss": 0.721175241470337, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.25263157894736843, | |
| "grad_norm": 0.26158222556114197, | |
| "learning_rate": 8.324022346368714e-07, | |
| "loss": 0.7556095600128174, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2694736842105263, | |
| "grad_norm": 0.2592650353908539, | |
| "learning_rate": 8.88268156424581e-07, | |
| "loss": 0.7328392505645752, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2863157894736842, | |
| "grad_norm": 0.24533776938915253, | |
| "learning_rate": 9.441340782122904e-07, | |
| "loss": 0.6990129470825195, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3031578947368421, | |
| "grad_norm": 0.23409004509449005, | |
| "learning_rate": 1e-06, | |
| "loss": 0.6694639205932618, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.3267499506473541, | |
| "learning_rate": 9.999039806396227e-07, | |
| "loss": 0.7123252868652343, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3368421052631579, | |
| "grad_norm": 0.2115064263343811, | |
| "learning_rate": 9.996159594373611e-07, | |
| "loss": 0.6858412742614746, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.35368421052631577, | |
| "grad_norm": 0.26226580142974854, | |
| "learning_rate": 9.991360470156615e-07, | |
| "loss": 0.6541069507598877, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3705263157894737, | |
| "grad_norm": 0.24552594125270844, | |
| "learning_rate": 9.984644276980594e-07, | |
| "loss": 0.6506116390228271, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3873684210526316, | |
| "grad_norm": 0.25084301829338074, | |
| "learning_rate": 9.976013594383835e-07, | |
| "loss": 0.6540626049041748, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.40421052631578946, | |
| "grad_norm": 0.34244054555892944, | |
| "learning_rate": 9.965471737216833e-07, | |
| "loss": 0.6737770557403564, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 0.34752583503723145, | |
| "learning_rate": 9.953022754369114e-07, | |
| "loss": 0.6755708217620849, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4378947368421053, | |
| "grad_norm": 0.31017956137657166, | |
| "learning_rate": 9.938671427214158e-07, | |
| "loss": 0.6578442573547363, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.45473684210526316, | |
| "grad_norm": 0.21509627997875214, | |
| "learning_rate": 9.922423267772986e-07, | |
| "loss": 0.639409875869751, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.47157894736842104, | |
| "grad_norm": 0.3022947609424591, | |
| "learning_rate": 9.904284516597102e-07, | |
| "loss": 0.5995691776275635, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4884210526315789, | |
| "grad_norm": 0.3367304801940918, | |
| "learning_rate": 9.884262140371648e-07, | |
| "loss": 0.5898309707641601, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5052631578947369, | |
| "grad_norm": 0.294842928647995, | |
| "learning_rate": 9.862363829239662e-07, | |
| "loss": 0.6371779441833496, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5221052631578947, | |
| "grad_norm": 0.25171560049057007, | |
| "learning_rate": 9.838597993848456e-07, | |
| "loss": 0.5795581817626954, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5389473684210526, | |
| "grad_norm": 0.2818540036678314, | |
| "learning_rate": 9.81297376211928e-07, | |
| "loss": 0.5668415546417236, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5557894736842105, | |
| "grad_norm": 0.32951900362968445, | |
| "learning_rate": 9.785500975741498e-07, | |
| "loss": 0.5933257102966308, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5726315789473684, | |
| "grad_norm": 0.2763514518737793, | |
| "learning_rate": 9.756190186392615e-07, | |
| "loss": 0.5574678897857666, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5894736842105263, | |
| "grad_norm": 0.3070182204246521, | |
| "learning_rate": 9.725052651685612e-07, | |
| "loss": 0.5532425880432129, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6063157894736843, | |
| "grad_norm": 0.2079988420009613, | |
| "learning_rate": 9.692100330845153e-07, | |
| "loss": 0.5613389492034913, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6231578947368421, | |
| "grad_norm": 0.282924622297287, | |
| "learning_rate": 9.657345880114318e-07, | |
| "loss": 0.5131485939025879, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.20901450514793396, | |
| "learning_rate": 9.620802647893623e-07, | |
| "loss": 0.6279027462005615, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6568421052631579, | |
| "grad_norm": 0.2637634575366974, | |
| "learning_rate": 9.58248466961421e-07, | |
| "loss": 0.5403085231781006, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6736842105263158, | |
| "grad_norm": 0.29078468680381775, | |
| "learning_rate": 9.542406662347137e-07, | |
| "loss": 0.5678809642791748, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6905263157894737, | |
| "grad_norm": 0.2865101397037506, | |
| "learning_rate": 9.500584019150895e-07, | |
| "loss": 0.5479135036468505, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7073684210526315, | |
| "grad_norm": 0.22857311367988586, | |
| "learning_rate": 9.45703280315928e-07, | |
| "loss": 0.5604462623596191, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7242105263157895, | |
| "grad_norm": 0.23971959948539734, | |
| "learning_rate": 9.411769741411903e-07, | |
| "loss": 0.4704423427581787, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7410526315789474, | |
| "grad_norm": 0.29793378710746765, | |
| "learning_rate": 9.364812218429721e-07, | |
| "loss": 0.560968017578125, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7578947368421053, | |
| "grad_norm": 0.2236040234565735, | |
| "learning_rate": 9.316178269538014e-07, | |
| "loss": 0.5088452816009521, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7747368421052632, | |
| "grad_norm": 0.22047854959964752, | |
| "learning_rate": 9.265886573939446e-07, | |
| "loss": 0.5030550956726074, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.791578947368421, | |
| "grad_norm": 0.2273361086845398, | |
| "learning_rate": 9.213956447539792e-07, | |
| "loss": 0.46353440284729003, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8084210526315789, | |
| "grad_norm": 0.2170158326625824, | |
| "learning_rate": 9.160407835529136e-07, | |
| "loss": 0.49871411323547366, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8252631578947368, | |
| "grad_norm": 0.19333498179912567, | |
| "learning_rate": 9.105261304721375e-07, | |
| "loss": 0.4416178226470947, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 0.18490085005760193, | |
| "learning_rate": 9.048538035654969e-07, | |
| "loss": 0.39783194065093996, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8589473684210527, | |
| "grad_norm": 0.22122648358345032, | |
| "learning_rate": 8.990259814457977e-07, | |
| "loss": 0.4318229198455811, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.8757894736842106, | |
| "grad_norm": 0.17448943853378296, | |
| "learning_rate": 8.930449024480491e-07, | |
| "loss": 0.42445807456970214, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8926315789473684, | |
| "grad_norm": 0.18165165185928345, | |
| "learning_rate": 8.8691286376977e-07, | |
| "loss": 0.46429901123046874, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9094736842105263, | |
| "grad_norm": 0.16785287857055664, | |
| "learning_rate": 8.806322205886873e-07, | |
| "loss": 0.3975703239440918, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9263157894736842, | |
| "grad_norm": 0.1613738089799881, | |
| "learning_rate": 8.74205385158165e-07, | |
| "loss": 0.4458911418914795, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9431578947368421, | |
| "grad_norm": 0.15376177430152893, | |
| "learning_rate": 8.676348258807121e-07, | |
| "loss": 0.45571184158325195, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.14966322481632233, | |
| "learning_rate": 8.609230663599254e-07, | |
| "loss": 0.4039600372314453, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.9768421052631578, | |
| "grad_norm": 0.16819055378437042, | |
| "learning_rate": 8.540726844312294e-07, | |
| "loss": 0.4382494926452637, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9936842105263158, | |
| "grad_norm": 0.16405776143074036, | |
| "learning_rate": 8.470863111717889e-07, | |
| "loss": 0.4306180477142334, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.0101052631578948, | |
| "grad_norm": 0.18503950536251068, | |
| "learning_rate": 8.399666298899706e-07, | |
| "loss": 0.39806089401245115, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0269473684210526, | |
| "grad_norm": 0.14375492930412292, | |
| "learning_rate": 8.327163750947457e-07, | |
| "loss": 0.4271697044372559, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.0437894736842106, | |
| "grad_norm": 0.1412728875875473, | |
| "learning_rate": 8.253383314454263e-07, | |
| "loss": 0.3939049243927002, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.0606315789473684, | |
| "grad_norm": 0.20121850073337555, | |
| "learning_rate": 8.178353326821404e-07, | |
| "loss": 0.43197131156921387, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.0774736842105264, | |
| "grad_norm": 0.17767728865146637, | |
| "learning_rate": 8.102102605374566e-07, | |
| "loss": 0.437807559967041, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.0943157894736841, | |
| "grad_norm": 0.1498359888792038, | |
| "learning_rate": 8.024660436295759e-07, | |
| "loss": 0.38409013748168946, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.1111578947368421, | |
| "grad_norm": 0.15958793461322784, | |
| "learning_rate": 7.946056563375145e-07, | |
| "loss": 0.4204962730407715, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.1280000000000001, | |
| "grad_norm": 0.157291978597641, | |
| "learning_rate": 7.866321176587128e-07, | |
| "loss": 0.42113161087036133, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.1448421052631579, | |
| "grad_norm": 0.14119838178157806, | |
| "learning_rate": 7.785484900495065e-07, | |
| "loss": 0.4151731491088867, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.1616842105263159, | |
| "grad_norm": 0.1296525001525879, | |
| "learning_rate": 7.703578782489058e-07, | |
| "loss": 0.38312902450561526, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.1785263157894736, | |
| "grad_norm": 0.13671696186065674, | |
| "learning_rate": 7.620634280861351e-07, | |
| "loss": 0.42612557411193847, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1953684210526316, | |
| "grad_norm": 0.15196114778518677, | |
| "learning_rate": 7.536683252723923e-07, | |
| "loss": 0.4306772708892822, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.2122105263157894, | |
| "grad_norm": 0.1136903315782547, | |
| "learning_rate": 7.451757941772868e-07, | |
| "loss": 0.38483757972717286, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.2290526315789474, | |
| "grad_norm": 0.12378744781017303, | |
| "learning_rate": 7.365890965904337e-07, | |
| "loss": 0.4030342102050781, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.2458947368421052, | |
| "grad_norm": 0.1265542209148407, | |
| "learning_rate": 7.279115304686733e-07, | |
| "loss": 0.4091166973114014, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.2627368421052632, | |
| "grad_norm": 0.11647409200668335, | |
| "learning_rate": 7.191464286694e-07, | |
| "loss": 0.41426806449890136, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.279578947368421, | |
| "grad_norm": 0.11192695051431656, | |
| "learning_rate": 7.102971576704875e-07, | |
| "loss": 0.38181486129760744, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.296421052631579, | |
| "grad_norm": 0.14947861433029175, | |
| "learning_rate": 7.013671162773003e-07, | |
| "loss": 0.39824953079223635, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.313263157894737, | |
| "grad_norm": 0.11269424855709076, | |
| "learning_rate": 6.923597343172891e-07, | |
| "loss": 0.40348024368286134, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.3301052631578947, | |
| "grad_norm": 0.3742346167564392, | |
| "learning_rate": 6.83278471322672e-07, | |
| "loss": 0.38022048473358155, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.3469473684210527, | |
| "grad_norm": 0.1310902237892151, | |
| "learning_rate": 6.741268152017057e-07, | |
| "loss": 0.42791285514831545, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.3637894736842107, | |
| "grad_norm": 0.1692703813314438, | |
| "learning_rate": 6.649082808990585e-07, | |
| "loss": 0.4263493061065674, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.3806315789473684, | |
| "grad_norm": 0.1279117316007614, | |
| "learning_rate": 6.556264090457998e-07, | |
| "loss": 0.37379777431488037, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.3974736842105262, | |
| "grad_norm": 0.12949039041996002, | |
| "learning_rate": 6.462847645995237e-07, | |
| "loss": 0.38636391162872313, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.4143157894736842, | |
| "grad_norm": 0.10221126675605774, | |
| "learning_rate": 6.368869354751284e-07, | |
| "loss": 0.408221435546875, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.4311578947368422, | |
| "grad_norm": 0.11505889147520065, | |
| "learning_rate": 6.274365311667797e-07, | |
| "loss": 0.3951406717300415, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.448, | |
| "grad_norm": 0.11054962873458862, | |
| "learning_rate": 6.179371813615859e-07, | |
| "loss": 0.3732129096984863, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.464842105263158, | |
| "grad_norm": 0.10150120407342911, | |
| "learning_rate": 6.083925345455158e-07, | |
| "loss": 0.38601529598236084, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.4816842105263157, | |
| "grad_norm": 0.12239400297403336, | |
| "learning_rate": 5.988062566020986e-07, | |
| "loss": 0.3859985828399658, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.4985263157894737, | |
| "grad_norm": 0.15801067650318146, | |
| "learning_rate": 5.891820294044408e-07, | |
| "loss": 0.3983951807022095, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.5153684210526315, | |
| "grad_norm": 0.10104545950889587, | |
| "learning_rate": 5.795235494011007e-07, | |
| "loss": 0.41107850074768065, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.5322105263157895, | |
| "grad_norm": 0.1378099024295807, | |
| "learning_rate": 5.698345261963668e-07, | |
| "loss": 0.3708331823348999, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.5490526315789475, | |
| "grad_norm": 0.12936057150363922, | |
| "learning_rate": 5.601186811254825e-07, | |
| "loss": 0.387884521484375, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.5658947368421052, | |
| "grad_norm": 0.12379129230976105, | |
| "learning_rate": 5.503797458253646e-07, | |
| "loss": 0.43808717727661134, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.582736842105263, | |
| "grad_norm": 0.12017743289470673, | |
| "learning_rate": 5.406214608013662e-07, | |
| "loss": 0.41345391273498533, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.5995789473684212, | |
| "grad_norm": 0.1095535159111023, | |
| "learning_rate": 5.308475739906328e-07, | |
| "loss": 0.40022664070129393, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.616421052631579, | |
| "grad_norm": 0.13831396400928497, | |
| "learning_rate": 5.210618393226045e-07, | |
| "loss": 0.3909924983978271, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.6332631578947368, | |
| "grad_norm": 0.10449163615703583, | |
| "learning_rate": 5.112680152772156e-07, | |
| "loss": 0.37143146991729736, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.6501052631578947, | |
| "grad_norm": 0.11249610036611557, | |
| "learning_rate": 5.01469863441348e-07, | |
| "loss": 0.38103113174438474, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.6669473684210527, | |
| "grad_norm": 0.13718819618225098, | |
| "learning_rate": 4.916711470640907e-07, | |
| "loss": 0.4071629524230957, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.6837894736842105, | |
| "grad_norm": 0.10473571717739105, | |
| "learning_rate": 4.818756296113595e-07, | |
| "loss": 0.417419958114624, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.7006315789473683, | |
| "grad_norm": 0.10846224427223206, | |
| "learning_rate": 4.7208707332043623e-07, | |
| "loss": 0.3998772859573364, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.7174736842105263, | |
| "grad_norm": 0.10248563438653946, | |
| "learning_rate": 4.6230923775497714e-07, | |
| "loss": 0.38056583404541017, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.7343157894736843, | |
| "grad_norm": 0.12221980094909668, | |
| "learning_rate": 4.5254587836104964e-07, | |
| "loss": 0.39371190071105955, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.751157894736842, | |
| "grad_norm": 0.10641586035490036, | |
| "learning_rate": 4.4280074502475017e-07, | |
| "loss": 0.4280440330505371, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.768, | |
| "grad_norm": 0.12907131016254425, | |
| "learning_rate": 4.3307758063195796e-07, | |
| "loss": 0.3791615962982178, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.784842105263158, | |
| "grad_norm": 0.12383506447076797, | |
| "learning_rate": 4.233801196307762e-07, | |
| "loss": 0.347782301902771, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.8016842105263158, | |
| "grad_norm": 0.12547679245471954, | |
| "learning_rate": 4.1371208659721536e-07, | |
| "loss": 0.38370628356933595, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.8185263157894735, | |
| "grad_norm": 0.10580642521381378, | |
| "learning_rate": 4.0407719480466736e-07, | |
| "loss": 0.40404376983642576, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.8353684210526315, | |
| "grad_norm": 0.1055402085185051, | |
| "learning_rate": 3.944791447977213e-07, | |
| "loss": 0.4167450428009033, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.8522105263157895, | |
| "grad_norm": 0.11053823679685593, | |
| "learning_rate": 3.849216229708671e-07, | |
| "loss": 0.4046513080596924, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.8690526315789473, | |
| "grad_norm": 0.10185246914625168, | |
| "learning_rate": 3.7540830015263526e-07, | |
| "loss": 0.39672977924346925, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.8858947368421053, | |
| "grad_norm": 0.08342823386192322, | |
| "learning_rate": 3.6594283019571416e-07, | |
| "loss": 0.39356396198272703, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.9027368421052633, | |
| "grad_norm": 0.11821646988391876, | |
| "learning_rate": 3.565288485735874e-07, | |
| "loss": 0.42082643508911133, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.919578947368421, | |
| "grad_norm": 0.1106327474117279, | |
| "learning_rate": 3.4716997098423085e-07, | |
| "loss": 0.34105117321014405, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.9364210526315788, | |
| "grad_norm": 0.11533800512552261, | |
| "learning_rate": 3.378697919614045e-07, | |
| "loss": 0.3924069404602051, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.9532631578947368, | |
| "grad_norm": 0.1431114822626114, | |
| "learning_rate": 3.286318834940729e-07, | |
| "loss": 0.3922377586364746, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.9701052631578948, | |
| "grad_norm": 0.16050194203853607, | |
| "learning_rate": 3.1945979365448517e-07, | |
| "loss": 0.3745201587677002, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.9869473684210526, | |
| "grad_norm": 0.11921833455562592, | |
| "learning_rate": 3.103570452354402e-07, | |
| "loss": 0.40110602378845217, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.0033684210526315, | |
| "grad_norm": 0.0832003727555275, | |
| "learning_rate": 3.013271343972613e-07, | |
| "loss": 0.3981154918670654, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.0202105263157897, | |
| "grad_norm": 0.09975888580083847, | |
| "learning_rate": 2.9237352932500046e-07, | |
| "loss": 0.3726134061813354, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.0370526315789474, | |
| "grad_norm": 0.14600081741809845, | |
| "learning_rate": 2.8349966889638615e-07, | |
| "loss": 0.42558698654174804, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.053894736842105, | |
| "grad_norm": 0.10875770449638367, | |
| "learning_rate": 2.747089613610278e-07, | |
| "loss": 0.3682931184768677, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.070736842105263, | |
| "grad_norm": 0.10050549358129501, | |
| "learning_rate": 2.66004783031385e-07, | |
| "loss": 0.3756644487380981, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.087578947368421, | |
| "grad_norm": 0.08914914727210999, | |
| "learning_rate": 2.573904769860009e-07, | |
| "loss": 0.3804330825805664, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.104421052631579, | |
| "grad_norm": 0.08296852558851242, | |
| "learning_rate": 2.488693517855016e-07, | |
| "loss": 0.3978404521942139, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.1212631578947367, | |
| "grad_norm": 0.13885149359703064, | |
| "learning_rate": 2.404446802018533e-07, | |
| "loss": 0.3935218334197998, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.138105263157895, | |
| "grad_norm": 0.13195137679576874, | |
| "learning_rate": 2.3211969796136305e-07, | |
| "loss": 0.42966952323913576, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.1549473684210527, | |
| "grad_norm": 0.13367892801761627, | |
| "learning_rate": 2.2389760250191038e-07, | |
| "loss": 0.3679579019546509, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.1717894736842105, | |
| "grad_norm": 0.1288345605134964, | |
| "learning_rate": 2.1578155174488343e-07, | |
| "loss": 0.41324810981750487, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.1886315789473683, | |
| "grad_norm": 0.09626021236181259, | |
| "learning_rate": 2.0777466288229205e-07, | |
| "loss": 0.40120248794555663, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.2054736842105265, | |
| "grad_norm": 0.10264381766319275, | |
| "learning_rate": 1.9988001117952485e-07, | |
| "loss": 0.3501007080078125, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.2223157894736842, | |
| "grad_norm": 0.09031466394662857, | |
| "learning_rate": 1.9210062879420973e-07, | |
| "loss": 0.3839429378509521, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.239157894736842, | |
| "grad_norm": 0.12686079740524292, | |
| "learning_rate": 1.8443950361162957e-07, | |
| "loss": 0.4338528156280518, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.2560000000000002, | |
| "grad_norm": 0.12199016660451889, | |
| "learning_rate": 1.7689957809714346e-07, | |
| "loss": 0.39229888916015626, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.272842105263158, | |
| "grad_norm": 0.12029567360877991, | |
| "learning_rate": 1.694837481660525e-07, | |
| "loss": 0.38006880283355715, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.2896842105263158, | |
| "grad_norm": 0.08686309307813644, | |
| "learning_rate": 1.6219486207134313e-07, | |
| "loss": 0.3808159589767456, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.3065263157894735, | |
| "grad_norm": 0.10810462385416031, | |
| "learning_rate": 1.5503571930973785e-07, | |
| "loss": 0.401824426651001, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.3233684210526317, | |
| "grad_norm": 0.10281873494386673, | |
| "learning_rate": 1.480090695464723e-07, | |
| "loss": 0.40149493217468263, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.3402105263157895, | |
| "grad_norm": 0.09503985196352005, | |
| "learning_rate": 1.4111761155920975e-07, | |
| "loss": 0.38567726612091063, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.3570526315789473, | |
| "grad_norm": 0.10420782119035721, | |
| "learning_rate": 1.3436399220150212e-07, | |
| "loss": 0.3759742736816406, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.3738947368421055, | |
| "grad_norm": 0.10681115835905075, | |
| "learning_rate": 1.2775080538619347e-07, | |
| "loss": 0.3913698196411133, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.3907368421052633, | |
| "grad_norm": 0.10323983430862427, | |
| "learning_rate": 1.2128059108915595e-07, | |
| "loss": 0.39077584743499755, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.407578947368421, | |
| "grad_norm": 0.09566064178943634, | |
| "learning_rate": 1.1495583437374263e-07, | |
| "loss": 0.39895172119140626, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.424421052631579, | |
| "grad_norm": 0.13018426299095154, | |
| "learning_rate": 1.0877896443633117e-07, | |
| "loss": 0.38982129096984863, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.441263157894737, | |
| "grad_norm": 0.10760781168937683, | |
| "learning_rate": 1.0275235367332347e-07, | |
| "loss": 0.3756714344024658, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.458105263157895, | |
| "grad_norm": 0.11606904864311218, | |
| "learning_rate": 9.687831676996238e-08, | |
| "loss": 0.37858171463012696, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.4749473684210526, | |
| "grad_norm": 0.12957172095775604, | |
| "learning_rate": 9.115910981131336e-08, | |
| "loss": 0.40050196647644043, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.4917894736842103, | |
| "grad_norm": 0.11186131089925766, | |
| "learning_rate": 8.559692941575231e-08, | |
| "loss": 0.3684133291244507, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.5086315789473685, | |
| "grad_norm": 0.13279542326927185, | |
| "learning_rate": 8.019391189129466e-08, | |
| "loss": 0.3452518224716187, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.5254736842105263, | |
| "grad_norm": 0.09041756391525269, | |
| "learning_rate": 7.495213241508786e-08, | |
| "loss": 0.36301617622375487, | |
| "step": 1500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1782, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.742094440984576e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |