Instructions to use yoriis/multitask_model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use yoriis/multitask_model with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3") model = PeftModel.from_pretrained(base_model, "yoriis/multitask_model") - Transformers
How to use yoriis/multitask_model with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="yoriis/multitask_model") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("yoriis/multitask_model", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use yoriis/multitask_model with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "yoriis/multitask_model" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "yoriis/multitask_model", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/yoriis/multitask_model
- SGLang
How to use yoriis/multitask_model with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "yoriis/multitask_model" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "yoriis/multitask_model", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "yoriis/multitask_model" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "yoriis/multitask_model", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use yoriis/multitask_model with Docker Model Runner:
docker model run hf.co/yoriis/multitask_model
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 5243, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009536524890329964, | |
| "grad_norm": 1.3725634813308716, | |
| "learning_rate": 3.7262357414448674e-05, | |
| "loss": 1.5183903503417968, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.019073049780659927, | |
| "grad_norm": 1.5630295276641846, | |
| "learning_rate": 7.52851711026616e-05, | |
| "loss": 1.0358538818359375, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02860957467098989, | |
| "grad_norm": 1.0457429885864258, | |
| "learning_rate": 0.00011330798479087452, | |
| "loss": 0.9928044128417969, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.038146099561319854, | |
| "grad_norm": 1.1760847568511963, | |
| "learning_rate": 0.00015133079847908746, | |
| "loss": 0.9799901580810547, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04768262445164982, | |
| "grad_norm": 1.0198277235031128, | |
| "learning_rate": 0.0001893536121673004, | |
| "loss": 0.9692832946777343, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05721914934197978, | |
| "grad_norm": 4.2576141357421875, | |
| "learning_rate": 0.00019997421320309795, | |
| "loss": 0.9611747741699219, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06675567423230974, | |
| "grad_norm": 1.0672153234481812, | |
| "learning_rate": 0.00019985286992997873, | |
| "loss": 0.9739149475097656, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07629219912263971, | |
| "grad_norm": 1.015286922454834, | |
| "learning_rate": 0.00019963219089815488, | |
| "loss": 0.95861572265625, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08582872401296968, | |
| "grad_norm": 1.108496069908142, | |
| "learning_rate": 0.00019931239564382073, | |
| "loss": 0.9668975067138672, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.09536524890329964, | |
| "grad_norm": 0.855691134929657, | |
| "learning_rate": 0.0001988938023060968, | |
| "loss": 0.9651553344726562, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09536524890329964, | |
| "eval_loss": 0.9762284159660339, | |
| "eval_runtime": 680.8217, | |
| "eval_samples_per_second": 12.536, | |
| "eval_steps_per_second": 0.784, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1049017737936296, | |
| "grad_norm": 0.8507081866264343, | |
| "learning_rate": 0.0001983768273105382, | |
| "loss": 0.9656350708007813, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.11443829868395956, | |
| "grad_norm": 0.8325157165527344, | |
| "learning_rate": 0.00019776198495486565, | |
| "loss": 0.9318672180175781, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.12397482357428953, | |
| "grad_norm": 0.9234330654144287, | |
| "learning_rate": 0.0001970498868973313, | |
| "loss": 0.9393777465820312, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.13351134846461948, | |
| "grad_norm": 0.8355016708374023, | |
| "learning_rate": 0.0001962412415482278, | |
| "loss": 0.9270508575439453, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.14304787335494945, | |
| "grad_norm": 0.9328432083129883, | |
| "learning_rate": 0.00019533685336514697, | |
| "loss": 0.9209387969970703, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.15258439824527942, | |
| "grad_norm": 1.194154977798462, | |
| "learning_rate": 0.00019433762205268805, | |
| "loss": 0.9753380584716796, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.16212092313560938, | |
| "grad_norm": 0.9554775953292847, | |
| "learning_rate": 0.0001932445416674127, | |
| "loss": 0.9683086395263671, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.17165744802593935, | |
| "grad_norm": 1.0252937078475952, | |
| "learning_rate": 0.00019205869962893605, | |
| "loss": 0.9329315948486329, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.18119397291626932, | |
| "grad_norm": 1.2285805940628052, | |
| "learning_rate": 0.00019078127563813883, | |
| "loss": 0.9511469268798828, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.1907304978065993, | |
| "grad_norm": 0.9454661011695862, | |
| "learning_rate": 0.00018941354050357566, | |
| "loss": 0.951951904296875, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1907304978065993, | |
| "eval_loss": 0.9598119258880615, | |
| "eval_runtime": 680.8627, | |
| "eval_samples_per_second": 12.536, | |
| "eval_steps_per_second": 0.784, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.20026702269692923, | |
| "grad_norm": 1.0210950374603271, | |
| "learning_rate": 0.00018795685487724782, | |
| "loss": 0.9263379669189453, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2098035475872592, | |
| "grad_norm": 1.0087344646453857, | |
| "learning_rate": 0.0001864126679009975, | |
| "loss": 0.9310942840576172, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.21934007247758916, | |
| "grad_norm": 1.1398996114730835, | |
| "learning_rate": 0.00018478251576487092, | |
| "loss": 0.9465924072265625, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.22887659736791913, | |
| "grad_norm": 0.9276406764984131, | |
| "learning_rate": 0.0001830680201788836, | |
| "loss": 0.9552659606933593, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.2384131222582491, | |
| "grad_norm": 1.0002596378326416, | |
| "learning_rate": 0.00018127088675970888, | |
| "loss": 0.9471788024902343, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.24794964714857906, | |
| "grad_norm": 1.0108773708343506, | |
| "learning_rate": 0.000179392903333894, | |
| "loss": 0.9245248413085938, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.257486172038909, | |
| "grad_norm": 0.9765293002128601, | |
| "learning_rate": 0.0001774359381592925, | |
| "loss": 0.9204165649414062, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.26702269692923897, | |
| "grad_norm": 1.0059659481048584, | |
| "learning_rate": 0.00017540193806648134, | |
| "loss": 0.9452506256103516, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.27655922181956893, | |
| "grad_norm": 1.1858373880386353, | |
| "learning_rate": 0.0001732929265220125, | |
| "loss": 0.9286368560791015, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.2860957467098989, | |
| "grad_norm": 1.0006766319274902, | |
| "learning_rate": 0.00017111100161542545, | |
| "loss": 0.9553109741210938, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2860957467098989, | |
| "eval_loss": 0.9582533836364746, | |
| "eval_runtime": 680.822, | |
| "eval_samples_per_second": 12.536, | |
| "eval_steps_per_second": 0.784, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.29563227160022887, | |
| "grad_norm": 0.9042516350746155, | |
| "learning_rate": 0.00016885833397202308, | |
| "loss": 0.93341796875, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.30516879649055884, | |
| "grad_norm": 0.9387173652648926, | |
| "learning_rate": 0.00016653716459348735, | |
| "loss": 0.9339485168457031, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3147053213808888, | |
| "grad_norm": 0.9056106209754944, | |
| "learning_rate": 0.00016414980262848333, | |
| "loss": 0.9324442291259766, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.32424184627121877, | |
| "grad_norm": 0.9407602548599243, | |
| "learning_rate": 0.0001616986230754689, | |
| "loss": 0.9319153594970703, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.33377837116154874, | |
| "grad_norm": 0.9982315897941589, | |
| "learning_rate": 0.0001591860644199957, | |
| "loss": 0.9230084228515625, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.3433148960518787, | |
| "grad_norm": 1.0833745002746582, | |
| "learning_rate": 0.00015661462620885199, | |
| "loss": 0.9161262512207031, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.35285142094220867, | |
| "grad_norm": 0.9810793995857239, | |
| "learning_rate": 0.00015398686656346028, | |
| "loss": 0.9208243560791015, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.36238794583253864, | |
| "grad_norm": 1.094853401184082, | |
| "learning_rate": 0.00015130539963500376, | |
| "loss": 0.9387385559082031, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3719244707228686, | |
| "grad_norm": 0.8733468055725098, | |
| "learning_rate": 0.000148572893003813, | |
| "loss": 0.9468997955322266, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3814609956131986, | |
| "grad_norm": 1.0074750185012817, | |
| "learning_rate": 0.0001457920650256004, | |
| "loss": 0.9178851318359375, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3814609956131986, | |
| "eval_loss": 0.9489485025405884, | |
| "eval_runtime": 681.5678, | |
| "eval_samples_per_second": 12.523, | |
| "eval_steps_per_second": 0.783, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.39099752050352854, | |
| "grad_norm": 1.0041425228118896, | |
| "learning_rate": 0.00014296568212718213, | |
| "loss": 0.9244281005859375, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.40053404539385845, | |
| "grad_norm": 1.0447967052459717, | |
| "learning_rate": 0.0001400965560543778, | |
| "loss": 0.9248933410644531, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4100705702841884, | |
| "grad_norm": 0.968345046043396, | |
| "learning_rate": 0.00013718754107482596, | |
| "loss": 0.9109151458740234, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4196070951745184, | |
| "grad_norm": 0.9737382531166077, | |
| "learning_rate": 0.0001342415311384981, | |
| "loss": 0.9231878662109375, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.42914362006484835, | |
| "grad_norm": 1.0494582653045654, | |
| "learning_rate": 0.00013126145699873532, | |
| "loss": 0.9193107604980468, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.4386801449551783, | |
| "grad_norm": 0.925656259059906, | |
| "learning_rate": 0.000128250283296673, | |
| "loss": 0.9101874542236328, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4482166698455083, | |
| "grad_norm": 1.0119520425796509, | |
| "learning_rate": 0.00012521100561195233, | |
| "loss": 0.8976884460449219, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.45775319473583825, | |
| "grad_norm": 0.9749938249588013, | |
| "learning_rate": 0.0001221466474826543, | |
| "loss": 0.9223648834228516, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.4672897196261682, | |
| "grad_norm": 1.071219563484192, | |
| "learning_rate": 0.00011906025739741956, | |
| "loss": 0.9097858428955078, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.4768262445164982, | |
| "grad_norm": 1.0023361444473267, | |
| "learning_rate": 0.00011595490576274704, | |
| "loss": 0.9021361541748046, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4768262445164982, | |
| "eval_loss": 0.9404354095458984, | |
| "eval_runtime": 681.2848, | |
| "eval_samples_per_second": 12.528, | |
| "eval_steps_per_second": 0.784, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.48636276940682815, | |
| "grad_norm": 0.7936119437217712, | |
| "learning_rate": 0.00011283368184848842, | |
| "loss": 0.9291069030761718, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.4958992942971581, | |
| "grad_norm": 0.8363732695579529, | |
| "learning_rate": 0.00010969969071457669, | |
| "loss": 0.8912797546386719, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.505435819187488, | |
| "grad_norm": 0.9047644734382629, | |
| "learning_rate": 0.0001065560501220464, | |
| "loss": 0.8831972503662109, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.514972344077818, | |
| "grad_norm": 0.8407217264175415, | |
| "learning_rate": 0.00010340588743141879, | |
| "loss": 0.9096057891845704, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.524508868968148, | |
| "grad_norm": 0.9929122924804688, | |
| "learning_rate": 0.00010025233649153707, | |
| "loss": 0.9299073028564453, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5340453938584779, | |
| "grad_norm": 1.0364736318588257, | |
| "learning_rate": 9.70985345219468e-05, | |
| "loss": 0.9169498443603515, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5435819187488079, | |
| "grad_norm": 0.9246217012405396, | |
| "learning_rate": 9.394761899192327e-05, | |
| "loss": 0.8965087127685547, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5531184436391379, | |
| "grad_norm": 0.9324344992637634, | |
| "learning_rate": 9.08027244992503e-05, | |
| "loss": 0.9102237701416016, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.5626549685294678, | |
| "grad_norm": 0.998298704624176, | |
| "learning_rate": 8.766697965185565e-05, | |
| "loss": 0.9192097473144532, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.5721914934197978, | |
| "grad_norm": 0.9278152585029602, | |
| "learning_rate": 8.45435039554054e-05, | |
| "loss": 0.9202249908447265, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5721914934197978, | |
| "eval_loss": 0.9327928423881531, | |
| "eval_runtime": 680.7868, | |
| "eval_samples_per_second": 12.537, | |
| "eval_steps_per_second": 0.784, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5817280183101278, | |
| "grad_norm": 1.0032947063446045, | |
| "learning_rate": 8.14354047099533e-05, | |
| "loss": 0.9145193481445313, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.5912645432004577, | |
| "grad_norm": 0.834137499332428, | |
| "learning_rate": 7.834577391873266e-05, | |
| "loss": 0.9149667358398438, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6008010680907877, | |
| "grad_norm": 0.9321780204772949, | |
| "learning_rate": 7.527768521216568e-05, | |
| "loss": 0.892784652709961, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6103375929811177, | |
| "grad_norm": 1.0074846744537354, | |
| "learning_rate": 7.223419079015062e-05, | |
| "loss": 0.9072589111328125, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6198741178714476, | |
| "grad_norm": 1.2590699195861816, | |
| "learning_rate": 6.921831838566842e-05, | |
| "loss": 0.9023927307128906, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6294106427617776, | |
| "grad_norm": 0.7596737742424011, | |
| "learning_rate": 6.623306825272937e-05, | |
| "loss": 0.896091079711914, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6389471676521076, | |
| "grad_norm": 0.9938393831253052, | |
| "learning_rate": 6.328141018165693e-05, | |
| "loss": 0.9024559783935547, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6484836925424375, | |
| "grad_norm": 0.8195408582687378, | |
| "learning_rate": 6.036628054467682e-05, | |
| "loss": 0.8873242950439453, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6580202174327675, | |
| "grad_norm": 0.9234633445739746, | |
| "learning_rate": 5.7490579374751686e-05, | |
| "loss": 0.8969253540039063, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.6675567423230975, | |
| "grad_norm": 0.9521735906600952, | |
| "learning_rate": 5.4657167480566594e-05, | |
| "loss": 0.8936698913574219, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6675567423230975, | |
| "eval_loss": 0.9270365238189697, | |
| "eval_runtime": 680.6766, | |
| "eval_samples_per_second": 12.539, | |
| "eval_steps_per_second": 0.785, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6770932672134274, | |
| "grad_norm": 0.9278208613395691, | |
| "learning_rate": 5.1868863600535646e-05, | |
| "loss": 0.8819551849365235, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.6866297921037574, | |
| "grad_norm": 0.8966768383979797, | |
| "learning_rate": 4.912844159866112e-05, | |
| "loss": 0.905844955444336, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6961663169940874, | |
| "grad_norm": 0.886394202709198, | |
| "learning_rate": 4.6438627705034535e-05, | |
| "loss": 0.8963019561767578, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.7057028418844173, | |
| "grad_norm": 1.0538233518600464, | |
| "learning_rate": 4.380209780372496e-05, | |
| "loss": 0.8904299926757813, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7152393667747473, | |
| "grad_norm": 1.0238487720489502, | |
| "learning_rate": 4.12214747707527e-05, | |
| "loss": 0.9049517822265625, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7247758916650773, | |
| "grad_norm": 0.913487434387207, | |
| "learning_rate": 3.869932586479628e-05, | |
| "loss": 0.8899057006835938, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7343124165554072, | |
| "grad_norm": 0.8777801990509033, | |
| "learning_rate": 3.623816017322917e-05, | |
| "loss": 0.8895880889892578, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7438489414457372, | |
| "grad_norm": 1.0374138355255127, | |
| "learning_rate": 3.3840426116026044e-05, | |
| "loss": 0.9089337921142578, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7533854663360672, | |
| "grad_norm": 1.0942944288253784, | |
| "learning_rate": 3.150850901002268e-05, | |
| "loss": 0.8874909210205079, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.7629219912263971, | |
| "grad_norm": 0.8818588256835938, | |
| "learning_rate": 2.9244728695951995e-05, | |
| "loss": 0.8881364440917969, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7629219912263971, | |
| "eval_loss": 0.921961784362793, | |
| "eval_runtime": 680.8471, | |
| "eval_samples_per_second": 12.536, | |
| "eval_steps_per_second": 0.784, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7724585161167271, | |
| "grad_norm": 0.8658433556556702, | |
| "learning_rate": 2.7051337230617125e-05, | |
| "loss": 0.887118148803711, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.7819950410070571, | |
| "grad_norm": 1.0360819101333618, | |
| "learning_rate": 2.4930516646497448e-05, | |
| "loss": 0.9029306030273437, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.791531565897387, | |
| "grad_norm": 0.9453035593032837, | |
| "learning_rate": 2.2884376781016258e-05, | |
| "loss": 0.874930419921875, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.8010680907877169, | |
| "grad_norm": 0.9671445488929749, | |
| "learning_rate": 2.0914953177629548e-05, | |
| "loss": 0.8965608978271484, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8106046156780469, | |
| "grad_norm": 0.9346293210983276, | |
| "learning_rate": 1.902420506082424e-05, | |
| "loss": 0.8860881042480468, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8201411405683768, | |
| "grad_norm": 1.0210144519805908, | |
| "learning_rate": 1.7214013387039884e-05, | |
| "loss": 0.9090773773193359, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8296776654587068, | |
| "grad_norm": 0.7916406393051147, | |
| "learning_rate": 1.54861789734532e-05, | |
| "loss": 0.9077362060546875, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8392141903490368, | |
| "grad_norm": 0.849738597869873, | |
| "learning_rate": 1.3842420706486903e-05, | |
| "loss": 0.8971955871582031, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8487507152393667, | |
| "grad_norm": 0.8733137845993042, | |
| "learning_rate": 1.2284373831824847e-05, | |
| "loss": 0.8827657318115234, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.8582872401296967, | |
| "grad_norm": 0.9427047967910767, | |
| "learning_rate": 1.0813588327634961e-05, | |
| "loss": 0.8771236419677735, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8582872401296967, | |
| "eval_loss": 0.9178871512413025, | |
| "eval_runtime": 680.8349, | |
| "eval_samples_per_second": 12.536, | |
| "eval_steps_per_second": 0.784, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8678237650200267, | |
| "grad_norm": 0.8574934005737305, | |
| "learning_rate": 9.431527362617832e-06, | |
| "loss": 0.8978910827636719, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.8773602899103566, | |
| "grad_norm": 0.986375629901886, | |
| "learning_rate": 8.139565840415553e-06, | |
| "loss": 0.8829045867919922, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.8868968148006866, | |
| "grad_norm": 1.026666283607483, | |
| "learning_rate": 6.938989031828158e-06, | |
| "loss": 0.8786582183837891, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.8964333396910166, | |
| "grad_norm": 0.9898380041122437, | |
| "learning_rate": 5.8309912961990506e-06, | |
| "loss": 0.8815351867675781, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9059698645813465, | |
| "grad_norm": 0.8167511224746704, | |
| "learning_rate": 4.8166748932408355e-06, | |
| "loss": 0.8737747192382812, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.9155063894716765, | |
| "grad_norm": 0.9592494368553162, | |
| "learning_rate": 3.8970488864839334e-06, | |
| "loss": 0.886360092163086, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9250429143620065, | |
| "grad_norm": 1.1282484531402588, | |
| "learning_rate": 3.0730281394387382e-06, | |
| "loss": 0.9033355712890625, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9345794392523364, | |
| "grad_norm": 1.0762990713119507, | |
| "learning_rate": 2.345432405469894e-06, | |
| "loss": 0.9018070983886719, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9441159641426664, | |
| "grad_norm": 1.0391168594360352, | |
| "learning_rate": 1.7149855122882697e-06, | |
| "loss": 0.8773787689208984, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.9536524890329964, | |
| "grad_norm": 1.160536527633667, | |
| "learning_rate": 1.1823146418717068e-06, | |
| "loss": 0.8903836822509765, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9536524890329964, | |
| "eval_loss": 0.9169394373893738, | |
| "eval_runtime": 681.071, | |
| "eval_samples_per_second": 12.532, | |
| "eval_steps_per_second": 0.784, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9631890139233263, | |
| "grad_norm": 0.823947548866272, | |
| "learning_rate": 7.479497065310925e-07, | |
| "loss": 0.90203857421875, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.9727255388136563, | |
| "grad_norm": 0.7839481830596924, | |
| "learning_rate": 4.123228217422948e-07, | |
| "loss": 0.8997705078125, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.9822620637039863, | |
| "grad_norm": 0.9920501708984375, | |
| "learning_rate": 1.7576787626851777e-07, | |
| "loss": 0.8869709777832031, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.9917985885943162, | |
| "grad_norm": 0.9276648759841919, | |
| "learning_rate": 3.8520200000624615e-08, | |
| "loss": 0.8911021423339843, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.9169082045555115, | |
| "eval_runtime": 681.3749, | |
| "eval_samples_per_second": 12.526, | |
| "eval_steps_per_second": 0.784, | |
| "step": 5243 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5243, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.345861112836915e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |