Text Generation
Transformers
Safetensors
llama
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use PARZ2344/web_llama_sft_random with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use PARZ2344/web_llama_sft_random with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="PARZ2344/web_llama_sft_random") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("PARZ2344/web_llama_sft_random") model = AutoModelForCausalLM.from_pretrained("PARZ2344/web_llama_sft_random") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use PARZ2344/web_llama_sft_random with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "PARZ2344/web_llama_sft_random" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "PARZ2344/web_llama_sft_random", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/PARZ2344/web_llama_sft_random
- SGLang
How to use PARZ2344/web_llama_sft_random with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "PARZ2344/web_llama_sft_random" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "PARZ2344/web_llama_sft_random", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "PARZ2344/web_llama_sft_random" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "PARZ2344/web_llama_sft_random", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use PARZ2344/web_llama_sft_random with Docker Model Runner:
docker model run hf.co/PARZ2344/web_llama_sft_random
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 914, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03281378178835111, | |
| "grad_norm": 7.324607115951123, | |
| "learning_rate": 9.782608695652175e-07, | |
| "loss": 1.6507, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06562756357670221, | |
| "grad_norm": 2.6718330452258137, | |
| "learning_rate": 2.065217391304348e-06, | |
| "loss": 1.5293, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09844134536505332, | |
| "grad_norm": 2.0449760430297435, | |
| "learning_rate": 3.152173913043479e-06, | |
| "loss": 1.3882, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13125512715340443, | |
| "grad_norm": 1.7054134520698259, | |
| "learning_rate": 4.239130434782609e-06, | |
| "loss": 1.3212, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16406890894175555, | |
| "grad_norm": 1.5625383091861629, | |
| "learning_rate": 5.3260869565217395e-06, | |
| "loss": 1.2339, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.19688269073010664, | |
| "grad_norm": 1.5467201317331245, | |
| "learning_rate": 6.41304347826087e-06, | |
| "loss": 1.2209, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.22969647251845776, | |
| "grad_norm": 1.5441409779630197, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.1981, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.26251025430680885, | |
| "grad_norm": 1.5810387088382716, | |
| "learning_rate": 8.586956521739131e-06, | |
| "loss": 1.1633, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.29532403609515995, | |
| "grad_norm": 1.4632034044381061, | |
| "learning_rate": 9.673913043478262e-06, | |
| "loss": 1.1815, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3281378178835111, | |
| "grad_norm": 1.6769268374195203, | |
| "learning_rate": 9.998215114657564e-06, | |
| "loss": 1.1741, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3609515996718622, | |
| "grad_norm": 1.564606037108119, | |
| "learning_rate": 9.98947588668843e-06, | |
| "loss": 1.13, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3937653814602133, | |
| "grad_norm": 1.4659283460430306, | |
| "learning_rate": 9.973467196782484e-06, | |
| "loss": 1.1339, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4265791632485644, | |
| "grad_norm": 1.3900414131292986, | |
| "learning_rate": 9.950212368945013e-06, | |
| "loss": 1.1501, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4593929450369155, | |
| "grad_norm": 1.533573901481892, | |
| "learning_rate": 9.91974528450737e-06, | |
| "loss": 1.1374, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4922067268252666, | |
| "grad_norm": 1.4930324222767686, | |
| "learning_rate": 9.882110332763275e-06, | |
| "loss": 1.1316, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5250205086136177, | |
| "grad_norm": 1.4649309232116048, | |
| "learning_rate": 9.83736234629543e-06, | |
| "loss": 1.1199, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5578342904019689, | |
| "grad_norm": 1.4519140933698538, | |
| "learning_rate": 9.785566521086695e-06, | |
| "loss": 1.1163, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5906480721903199, | |
| "grad_norm": 1.4591675768161227, | |
| "learning_rate": 9.726798321532205e-06, | |
| "loss": 1.125, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.623461853978671, | |
| "grad_norm": 1.3084115670512635, | |
| "learning_rate": 9.661143370490846e-06, | |
| "loss": 1.1385, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6562756357670222, | |
| "grad_norm": 1.428698415469623, | |
| "learning_rate": 9.588697324536254e-06, | |
| "loss": 1.0995, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6890894175553732, | |
| "grad_norm": 1.3195726786177668, | |
| "learning_rate": 9.509565734589105e-06, | |
| "loss": 1.105, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7219031993437244, | |
| "grad_norm": 1.4644538779508807, | |
| "learning_rate": 9.423863892133754e-06, | |
| "loss": 1.0949, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 1.4442654685795682, | |
| "learning_rate": 9.33171666124326e-06, | |
| "loss": 1.1097, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7875307629204266, | |
| "grad_norm": 1.4628666514015816, | |
| "learning_rate": 9.233258296657547e-06, | |
| "loss": 1.0915, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8203445447087777, | |
| "grad_norm": 1.4584300507718668, | |
| "learning_rate": 9.128632248179761e-06, | |
| "loss": 1.0952, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8531583264971287, | |
| "grad_norm": 1.3907705989649481, | |
| "learning_rate": 9.017990951675764e-06, | |
| "loss": 1.1072, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8859721082854799, | |
| "grad_norm": 1.44611189547518, | |
| "learning_rate": 8.901495606981339e-06, | |
| "loss": 1.0908, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.918785890073831, | |
| "grad_norm": 1.4283544277374125, | |
| "learning_rate": 8.779315943040629e-06, | |
| "loss": 1.0934, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9515996718621821, | |
| "grad_norm": 1.3679340439375278, | |
| "learning_rate": 8.65162997061802e-06, | |
| "loss": 1.0902, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9844134536505332, | |
| "grad_norm": 1.4053097778093493, | |
| "learning_rate": 8.518623722943747e-06, | |
| "loss": 1.0826, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0164068908941755, | |
| "grad_norm": 1.358051135209502, | |
| "learning_rate": 8.380490984671105e-06, | |
| "loss": 1.0004, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0492206726825266, | |
| "grad_norm": 1.4920803109368017, | |
| "learning_rate": 8.23743300954015e-06, | |
| "loss": 0.9529, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0820344544708778, | |
| "grad_norm": 1.391421362612517, | |
| "learning_rate": 8.089658227159239e-06, | |
| "loss": 0.9108, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.114848236259229, | |
| "grad_norm": 1.6241278760522855, | |
| "learning_rate": 7.937381939331628e-06, | |
| "loss": 0.9279, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.14766201804758, | |
| "grad_norm": 1.4776439598223472, | |
| "learning_rate": 7.780826006369586e-06, | |
| "loss": 0.9332, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1804757998359312, | |
| "grad_norm": 1.531082298396227, | |
| "learning_rate": 7.620218523852987e-06, | |
| "loss": 0.9503, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.2132895816242821, | |
| "grad_norm": 1.4148212125540105, | |
| "learning_rate": 7.4557934903034035e-06, | |
| "loss": 0.9409, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.2461033634126333, | |
| "grad_norm": 1.569873004540387, | |
| "learning_rate": 7.287790466257854e-06, | |
| "loss": 0.9228, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2789171452009844, | |
| "grad_norm": 1.50216408338208, | |
| "learning_rate": 7.116454225238909e-06, | |
| "loss": 0.9354, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.3117309269893356, | |
| "grad_norm": 1.4646843748186438, | |
| "learning_rate": 6.942034397129702e-06, | |
| "loss": 0.9372, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.3445447087776867, | |
| "grad_norm": 1.5763941611606018, | |
| "learning_rate": 6.764785104473411e-06, | |
| "loss": 0.9169, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.3773584905660377, | |
| "grad_norm": 1.4076340592270304, | |
| "learning_rate": 6.584964592227135e-06, | |
| "loss": 0.9235, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.4101722723543888, | |
| "grad_norm": 1.5649588493962718, | |
| "learning_rate": 6.402834851509564e-06, | |
| "loss": 0.926, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.44298605414274, | |
| "grad_norm": 1.370995330259522, | |
| "learning_rate": 6.2186612378906545e-06, | |
| "loss": 0.9327, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.475799835931091, | |
| "grad_norm": 1.4405579132350896, | |
| "learning_rate": 6.0327120847794415e-06, | |
| "loss": 0.9461, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.5086136177194422, | |
| "grad_norm": 1.5139954304026377, | |
| "learning_rate": 5.845258312473252e-06, | |
| "loss": 0.9479, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.5414273995077932, | |
| "grad_norm": 1.5381263145609851, | |
| "learning_rate": 5.656573033437932e-06, | |
| "loss": 0.9217, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.5742411812961445, | |
| "grad_norm": 1.5176651174503644, | |
| "learning_rate": 5.466931154394171e-06, | |
| "loss": 0.9402, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.6070549630844955, | |
| "grad_norm": 1.64200137668156, | |
| "learning_rate": 5.276608975789683e-06, | |
| "loss": 0.925, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.6398687448728466, | |
| "grad_norm": 1.4813527495999301, | |
| "learning_rate": 5.085883789240764e-06, | |
| "loss": 0.9268, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.6726825266611978, | |
| "grad_norm": 1.5116269108263887, | |
| "learning_rate": 4.8950334735297746e-06, | |
| "loss": 0.9095, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.7054963084495487, | |
| "grad_norm": 1.4490396822436944, | |
| "learning_rate": 4.704336089747135e-06, | |
| "loss": 0.9341, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.7383100902379, | |
| "grad_norm": 1.5628125198488163, | |
| "learning_rate": 4.514069476167716e-06, | |
| "loss": 0.932, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.771123872026251, | |
| "grad_norm": 1.4687886059027409, | |
| "learning_rate": 4.324510843451851e-06, | |
| "loss": 0.9311, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.8039376538146021, | |
| "grad_norm": 1.5227999474981233, | |
| "learning_rate": 4.135936370760759e-06, | |
| "loss": 0.9046, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.8367514356029533, | |
| "grad_norm": 1.4388610554454382, | |
| "learning_rate": 3.9486208033748315e-06, | |
| "loss": 0.9378, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.8695652173913042, | |
| "grad_norm": 1.3581055291483148, | |
| "learning_rate": 3.762837052401004e-06, | |
| "loss": 0.9235, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.9023789991796556, | |
| "grad_norm": 1.5099266943158, | |
| "learning_rate": 3.5788557971524695e-06, | |
| "loss": 0.9444, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.9351927809680065, | |
| "grad_norm": 1.5169803759126903, | |
| "learning_rate": 3.3969450907799966e-06, | |
| "loss": 0.9279, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.9680065627563577, | |
| "grad_norm": 1.3979513991441848, | |
| "learning_rate": 3.217369969729476e-06, | |
| "loss": 0.9115, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.0032813781788352, | |
| "grad_norm": 2.105088244846462, | |
| "learning_rate": 3.0403920675946826e-06, | |
| "loss": 0.8327, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.036095159967186, | |
| "grad_norm": 1.6690335971877808, | |
| "learning_rate": 2.8662692339278387e-06, | |
| "loss": 0.7782, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.0689089417555375, | |
| "grad_norm": 1.705302067748598, | |
| "learning_rate": 2.6952551585633947e-06, | |
| "loss": 0.7875, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.1017227235438884, | |
| "grad_norm": 1.6066988675182772, | |
| "learning_rate": 2.52759900200232e-06, | |
| "loss": 0.7708, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.1345365053322394, | |
| "grad_norm": 1.5203406830167119, | |
| "learning_rate": 2.3635450323954773e-06, | |
| "loss": 0.7927, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.1673502871205907, | |
| "grad_norm": 1.626389642459301, | |
| "learning_rate": 2.2033322696549197e-06, | |
| "loss": 0.7885, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.2001640689089417, | |
| "grad_norm": 1.7664606984797573, | |
| "learning_rate": 2.0471941372116793e-06, | |
| "loss": 0.7626, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.232977850697293, | |
| "grad_norm": 1.5793686740276833, | |
| "learning_rate": 1.8953581219273987e-06, | |
| "loss": 0.7754, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.265791632485644, | |
| "grad_norm": 1.6724958298486645, | |
| "learning_rate": 1.7480454426552773e-06, | |
| "loss": 0.7783, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.298605414273995, | |
| "grad_norm": 1.5819324740235794, | |
| "learning_rate": 1.6054707279332865e-06, | |
| "loss": 0.7705, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.3314191960623463, | |
| "grad_norm": 1.6323457128335996, | |
| "learning_rate": 1.4678417032791653e-06, | |
| "loss": 0.7699, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.364232977850697, | |
| "grad_norm": 1.7857816578985155, | |
| "learning_rate": 1.335358888542862e-06, | |
| "loss": 0.7526, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.3970467596390486, | |
| "grad_norm": 1.5935449756290072, | |
| "learning_rate": 1.20821530575733e-06, | |
| "loss": 0.7918, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.4298605414273995, | |
| "grad_norm": 1.693473193818772, | |
| "learning_rate": 1.0865961979133245e-06, | |
| "loss": 0.7815, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.462674323215751, | |
| "grad_norm": 1.6977380314021282, | |
| "learning_rate": 9.706787590679685e-07, | |
| "loss": 0.7731, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.495488105004102, | |
| "grad_norm": 1.701374033891568, | |
| "learning_rate": 8.606318761802584e-07, | |
| "loss": 0.7666, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.5283018867924527, | |
| "grad_norm": 1.8239897351347403, | |
| "learning_rate": 7.566158830496917e-07, | |
| "loss": 0.7657, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.561115668580804, | |
| "grad_norm": 1.7594557186973525, | |
| "learning_rate": 6.587823267164911e-07, | |
| "loss": 0.7798, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.593929450369155, | |
| "grad_norm": 1.6074400677010736, | |
| "learning_rate": 5.672737466637701e-07, | |
| "loss": 0.7816, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.626743232157506, | |
| "grad_norm": 1.589497024974972, | |
| "learning_rate": 4.822234671433552e-07, | |
| "loss": 0.7837, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.6595570139458573, | |
| "grad_norm": 1.5745731469005892, | |
| "learning_rate": 4.03755402927804e-07, | |
| "loss": 0.7747, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.6923707957342082, | |
| "grad_norm": 1.6156048098137663, | |
| "learning_rate": 3.319838787716634e-07, | |
| "loss": 0.7793, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.7251845775225596, | |
| "grad_norm": 1.6695478870964964, | |
| "learning_rate": 2.6701346284499e-07, | |
| "loss": 0.7542, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.7579983593109105, | |
| "grad_norm": 1.5107440160015784, | |
| "learning_rate": 2.0893881438180275e-07, | |
| "loss": 0.7844, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.790812141099262, | |
| "grad_norm": 1.6477028499246302, | |
| "learning_rate": 1.578445457654637e-07, | |
| "loss": 0.7643, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.823625922887613, | |
| "grad_norm": 1.5854989560185238, | |
| "learning_rate": 1.1380509925189853e-07, | |
| "loss": 0.7673, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.8564397046759638, | |
| "grad_norm": 1.6787763051906506, | |
| "learning_rate": 7.688463851028227e-08, | |
| "loss": 0.769, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.889253486464315, | |
| "grad_norm": 1.581911801204859, | |
| "learning_rate": 4.713695513920147e-08, | |
| "loss": 0.7799, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.922067268252666, | |
| "grad_norm": 1.697466745242019, | |
| "learning_rate": 2.4605390294497043e-08, | |
| "loss": 0.785, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.954881050041017, | |
| "grad_norm": 1.6425019567953216, | |
| "learning_rate": 9.322771542978892e-09, | |
| "loss": 0.7753, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.9876948318293683, | |
| "grad_norm": 1.5783466963735138, | |
| "learning_rate": 1.3113650340046413e-09, | |
| "loss": 0.7911, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 914, | |
| "total_flos": 181229773520896.0, | |
| "train_loss": 0.266992773216715, | |
| "train_runtime": 3529.0775, | |
| "train_samples_per_second": 16.577, | |
| "train_steps_per_second": 0.259 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 915, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 181229773520896.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |