Instructions to use FlameF0X/ChessSLM-PM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use FlameF0X/ChessSLM-PM with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="FlameF0X/ChessSLM-PM")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("FlameF0X/ChessSLM-PM") model = AutoModelForCausalLM.from_pretrained("FlameF0X/ChessSLM-PM") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use FlameF0X/ChessSLM-PM with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "FlameF0X/ChessSLM-PM" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "FlameF0X/ChessSLM-PM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/FlameF0X/ChessSLM-PM
- SGLang
How to use FlameF0X/ChessSLM-PM with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "FlameF0X/ChessSLM-PM" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "FlameF0X/ChessSLM-PM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "FlameF0X/ChessSLM-PM" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "FlameF0X/ChessSLM-PM", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use FlameF0X/ChessSLM-PM with Docker Model Runner:
docker model run hf.co/FlameF0X/ChessSLM-PM
| { | |
| "best_global_step": 4200, | |
| "best_metric": 0.9021432995796204, | |
| "best_model_checkpoint": "./chessslm-2000plus/checkpoint-4200", | |
| "epoch": 0.6086074481959136, | |
| "eval_steps": 200, | |
| "global_step": 4200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007245326764237067, | |
| "grad_norm": 0.28264278173446655, | |
| "learning_rate": 2.45e-05, | |
| "loss": 1.2346894073486328, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.014490653528474133, | |
| "grad_norm": 0.25636932253837585, | |
| "learning_rate": 4.9500000000000004e-05, | |
| "loss": 1.1820894622802733, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0217359802927112, | |
| "grad_norm": 0.2744424343109131, | |
| "learning_rate": 4.9993596199009453e-05, | |
| "loss": 1.1531302642822265, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.028981307056948267, | |
| "grad_norm": 0.28049495816230774, | |
| "learning_rate": 4.997386280918375e-05, | |
| "loss": 1.1511387634277344, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.028981307056948267, | |
| "eval_loss": 1.1023718118667603, | |
| "eval_runtime": 193.7165, | |
| "eval_samples_per_second": 120.0, | |
| "eval_steps_per_second": 3.753, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.036226633821185336, | |
| "grad_norm": 0.2879795730113983, | |
| "learning_rate": 4.994080768959056e-05, | |
| "loss": 1.1302296447753906, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0434719605854224, | |
| "grad_norm": 0.29017066955566406, | |
| "learning_rate": 4.989444847271928e-05, | |
| "loss": 1.1334593963623047, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05071728734965947, | |
| "grad_norm": 0.31129732728004456, | |
| "learning_rate": 4.98348098878224e-05, | |
| "loss": 1.1211315155029298, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.057962614113896534, | |
| "grad_norm": 0.29630768299102783, | |
| "learning_rate": 4.9761923747724296e-05, | |
| "loss": 1.1175349426269532, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.057962614113896534, | |
| "eval_loss": 1.0756858587265015, | |
| "eval_runtime": 193.6918, | |
| "eval_samples_per_second": 120.015, | |
| "eval_steps_per_second": 3.753, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0652079408781336, | |
| "grad_norm": 0.2720015347003937, | |
| "learning_rate": 4.9675828931851424e-05, | |
| "loss": 1.1160638427734375, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.07245326764237067, | |
| "grad_norm": 0.29248911142349243, | |
| "learning_rate": 4.9576571365492965e-05, | |
| "loss": 1.1049542236328125, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07969859440660773, | |
| "grad_norm": 0.31138038635253906, | |
| "learning_rate": 4.946420399530304e-05, | |
| "loss": 1.1029164123535156, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.0869439211708448, | |
| "grad_norm": 0.2833043932914734, | |
| "learning_rate": 4.933878676105756e-05, | |
| "loss": 1.0982711791992188, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0869439211708448, | |
| "eval_loss": 1.056943416595459, | |
| "eval_runtime": 193.7026, | |
| "eval_samples_per_second": 120.009, | |
| "eval_steps_per_second": 3.753, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09418924793508188, | |
| "grad_norm": 0.32613420486450195, | |
| "learning_rate": 4.9200386563680734e-05, | |
| "loss": 1.0998190307617188, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.10143457469931894, | |
| "grad_norm": 0.30005744099617004, | |
| "learning_rate": 4.904907722955829e-05, | |
| "loss": 1.0960050201416016, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.10867990146355601, | |
| "grad_norm": 0.3087840974330902, | |
| "learning_rate": 4.8884939471156544e-05, | |
| "loss": 1.0800698852539063, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.11592522822779307, | |
| "grad_norm": 0.3069000542163849, | |
| "learning_rate": 4.8708060843968165e-05, | |
| "loss": 1.088259506225586, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.11592522822779307, | |
| "eval_loss": 1.0397652387619019, | |
| "eval_runtime": 193.5663, | |
| "eval_samples_per_second": 120.093, | |
| "eval_steps_per_second": 3.756, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12317055499203014, | |
| "grad_norm": 0.3139374554157257, | |
| "learning_rate": 4.8518535699807713e-05, | |
| "loss": 1.0745314788818359, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1304158817562672, | |
| "grad_norm": 0.29361429810523987, | |
| "learning_rate": 4.831646513648189e-05, | |
| "loss": 1.0730350494384766, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.13766120852050429, | |
| "grad_norm": 0.32080790400505066, | |
| "learning_rate": 4.810195694386123e-05, | |
| "loss": 1.0730361938476562, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.14490653528474134, | |
| "grad_norm": 0.3122475743293762, | |
| "learning_rate": 4.787512554638205e-05, | |
| "loss": 1.0709983825683593, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14490653528474134, | |
| "eval_loss": 1.023197054862976, | |
| "eval_runtime": 193.618, | |
| "eval_samples_per_second": 120.061, | |
| "eval_steps_per_second": 3.755, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1521518620489784, | |
| "grad_norm": 0.33824747800827026, | |
| "learning_rate": 4.763609194200942e-05, | |
| "loss": 1.0684901428222657, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.15939718881321546, | |
| "grad_norm": 0.30118507146835327, | |
| "learning_rate": 4.7384983637693614e-05, | |
| "loss": 1.0644412231445313, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.16664251557745255, | |
| "grad_norm": 0.29680049419403076, | |
| "learning_rate": 4.71219345813544e-05, | |
| "loss": 1.0592123413085937, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.1738878423416896, | |
| "grad_norm": 0.320403128862381, | |
| "learning_rate": 4.684708509042971e-05, | |
| "loss": 1.0508477020263671, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.1738878423416896, | |
| "eval_loss": 1.0096791982650757, | |
| "eval_runtime": 193.6179, | |
| "eval_samples_per_second": 120.061, | |
| "eval_steps_per_second": 3.755, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.18113316910592667, | |
| "grad_norm": 0.3681316673755646, | |
| "learning_rate": 4.656058177702647e-05, | |
| "loss": 1.0457440185546876, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.18837849587016375, | |
| "grad_norm": 0.3355785608291626, | |
| "learning_rate": 4.626257746971382e-05, | |
| "loss": 1.0470626068115234, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.1956238226344008, | |
| "grad_norm": 0.339575856924057, | |
| "learning_rate": 4.5953231132000175e-05, | |
| "loss": 1.0423269653320313, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.20286914939863787, | |
| "grad_norm": 0.33345919847488403, | |
| "learning_rate": 4.563270777753791e-05, | |
| "loss": 1.0385567474365234, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.20286914939863787, | |
| "eval_loss": 0.9979937076568604, | |
| "eval_runtime": 193.9537, | |
| "eval_samples_per_second": 119.853, | |
| "eval_steps_per_second": 3.748, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.21011447616287496, | |
| "grad_norm": 0.33952876925468445, | |
| "learning_rate": 4.530117838210059e-05, | |
| "loss": 1.040812759399414, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.21735980292711202, | |
| "grad_norm": 0.3470812737941742, | |
| "learning_rate": 4.4958819792379846e-05, | |
| "loss": 1.0452130126953125, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.22460512969134908, | |
| "grad_norm": 0.37180599570274353, | |
| "learning_rate": 4.460581463165071e-05, | |
| "loss": 1.0368424224853516, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.23185045645558613, | |
| "grad_norm": 0.3716419041156769, | |
| "learning_rate": 4.424235120235537e-05, | |
| "loss": 1.0325569915771484, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.23185045645558613, | |
| "eval_loss": 0.9863654971122742, | |
| "eval_runtime": 192.694, | |
| "eval_samples_per_second": 120.637, | |
| "eval_steps_per_second": 3.773, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.23909578321982322, | |
| "grad_norm": 0.33746030926704407, | |
| "learning_rate": 4.386862338565759e-05, | |
| "loss": 1.0336730194091797, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.24634110998406028, | |
| "grad_norm": 0.34745824337005615, | |
| "learning_rate": 4.3484830538021324e-05, | |
| "loss": 1.0262679290771484, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.25358643674829734, | |
| "grad_norm": 0.33224305510520935, | |
| "learning_rate": 4.3091177384868585e-05, | |
| "loss": 1.0238925170898439, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2608317635125344, | |
| "grad_norm": 0.3336823880672455, | |
| "learning_rate": 4.26878739113734e-05, | |
| "loss": 1.021321792602539, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.2608317635125344, | |
| "eval_loss": 0.9752877354621887, | |
| "eval_runtime": 192.2236, | |
| "eval_samples_per_second": 120.932, | |
| "eval_steps_per_second": 3.782, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.26807709027677146, | |
| "grad_norm": 0.3285761773586273, | |
| "learning_rate": 4.2275135250450106e-05, | |
| "loss": 1.0217689514160155, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.27532241704100857, | |
| "grad_norm": 0.35993409156799316, | |
| "learning_rate": 4.1853181567995645e-05, | |
| "loss": 1.0172651672363282, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.28256774380524563, | |
| "grad_norm": 0.3507116734981537, | |
| "learning_rate": 4.142223794544715e-05, | |
| "loss": 1.0248129272460937, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.2898130705694827, | |
| "grad_norm": 0.3568938374519348, | |
| "learning_rate": 4.0982534259717475e-05, | |
| "loss": 1.0136798095703126, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2898130705694827, | |
| "eval_loss": 0.9648082256317139, | |
| "eval_runtime": 192.1777, | |
| "eval_samples_per_second": 120.961, | |
| "eval_steps_per_second": 3.783, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.29705839733371975, | |
| "grad_norm": 0.3777235746383667, | |
| "learning_rate": 4.053430506057268e-05, | |
| "loss": 1.0077315521240235, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.3043037240979568, | |
| "grad_norm": 0.3593011498451233, | |
| "learning_rate": 4.0077789445516814e-05, | |
| "loss": 1.0111034393310547, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.31154905086219387, | |
| "grad_norm": 0.3624265193939209, | |
| "learning_rate": 3.9613230932250985e-05, | |
| "loss": 1.0038584136962891, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.3187943776264309, | |
| "grad_norm": 0.3455412983894348, | |
| "learning_rate": 3.9140877328774375e-05, | |
| "loss": 1.0052964782714844, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.3187943776264309, | |
| "eval_loss": 0.9546002149581909, | |
| "eval_runtime": 193.0198, | |
| "eval_samples_per_second": 120.433, | |
| "eval_steps_per_second": 3.766, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.32603970439066804, | |
| "grad_norm": 0.35707926750183105, | |
| "learning_rate": 3.866098060119684e-05, | |
| "loss": 0.9974722290039062, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3332850311549051, | |
| "grad_norm": 0.3994494080543518, | |
| "learning_rate": 3.817379673933341e-05, | |
| "loss": 0.9981327056884766, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.34053035791914216, | |
| "grad_norm": 0.36747175455093384, | |
| "learning_rate": 3.767958562015246e-05, | |
| "loss": 0.9972872924804688, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.3477756846833792, | |
| "grad_norm": 0.36322903633117676, | |
| "learning_rate": 3.717861086915026e-05, | |
| "loss": 0.9997342681884765, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3477756846833792, | |
| "eval_loss": 0.9470997452735901, | |
| "eval_runtime": 192.2555, | |
| "eval_samples_per_second": 120.912, | |
| "eval_steps_per_second": 3.781, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3550210114476163, | |
| "grad_norm": 0.33153679966926575, | |
| "learning_rate": 3.6671139719726174e-05, | |
| "loss": 0.9987025451660156, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.36226633821185333, | |
| "grad_norm": 0.40384331345558167, | |
| "learning_rate": 3.6157442870633096e-05, | |
| "loss": 0.992891616821289, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.36951166497609045, | |
| "grad_norm": 0.3685908913612366, | |
| "learning_rate": 3.563779434157947e-05, | |
| "loss": 0.991015396118164, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.3767569917403275, | |
| "grad_norm": 0.37052083015441895, | |
| "learning_rate": 3.511247132705986e-05, | |
| "loss": 0.9884940338134766, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.3767569917403275, | |
| "eval_loss": 0.939989447593689, | |
| "eval_runtime": 192.4644, | |
| "eval_samples_per_second": 120.781, | |
| "eval_steps_per_second": 3.777, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.38400231850456457, | |
| "grad_norm": 0.35503244400024414, | |
| "learning_rate": 3.458175404849188e-05, | |
| "loss": 0.9872270965576172, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.3912476452688016, | |
| "grad_norm": 0.3497767746448517, | |
| "learning_rate": 3.4045925604738604e-05, | |
| "loss": 0.9846116638183594, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.3984929720330387, | |
| "grad_norm": 0.3695673942565918, | |
| "learning_rate": 3.350527182109603e-05, | |
| "loss": 0.9809439849853515, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.40573829879727574, | |
| "grad_norm": 0.36862626671791077, | |
| "learning_rate": 3.296008109682616e-05, | |
| "loss": 0.9826165008544921, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.40573829879727574, | |
| "eval_loss": 0.9336075186729431, | |
| "eval_runtime": 192.3219, | |
| "eval_samples_per_second": 120.87, | |
| "eval_steps_per_second": 3.78, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.4129836255615128, | |
| "grad_norm": 0.34875786304473877, | |
| "learning_rate": 3.241064425131708e-05, | |
| "loss": 0.9786082458496094, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.4202289523257499, | |
| "grad_norm": 0.3561500608921051, | |
| "learning_rate": 3.185725436895209e-05, | |
| "loss": 0.9792596435546875, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.427474279089987, | |
| "grad_norm": 0.36314815282821655, | |
| "learning_rate": 3.130020664277064e-05, | |
| "loss": 0.9800941467285156, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.43471960585422403, | |
| "grad_norm": 0.3698309361934662, | |
| "learning_rate": 3.0739798217004354e-05, | |
| "loss": 0.9847097015380859, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.43471960585422403, | |
| "eval_loss": 0.927081286907196, | |
| "eval_runtime": 192.3461, | |
| "eval_samples_per_second": 120.855, | |
| "eval_steps_per_second": 3.78, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4419649326184611, | |
| "grad_norm": 0.3639983534812927, | |
| "learning_rate": 3.0176328028572408e-05, | |
| "loss": 0.9701497650146484, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.44921025938269815, | |
| "grad_norm": 0.3515409827232361, | |
| "learning_rate": 2.9610096647620445e-05, | |
| "loss": 0.9721237945556641, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.4564555861469352, | |
| "grad_norm": 0.391891747713089, | |
| "learning_rate": 2.9041406117188458e-05, | |
| "loss": 0.9730013275146484, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.46370091291117227, | |
| "grad_norm": 0.3907790780067444, | |
| "learning_rate": 2.8470559792092855e-05, | |
| "loss": 0.974240951538086, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.46370091291117227, | |
| "eval_loss": 0.9227471351623535, | |
| "eval_runtime": 192.3251, | |
| "eval_samples_per_second": 120.868, | |
| "eval_steps_per_second": 3.78, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.4709462396754094, | |
| "grad_norm": 0.3531961143016815, | |
| "learning_rate": 2.789786217710888e-05, | |
| "loss": 0.9673224639892578, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.47819156643964644, | |
| "grad_norm": 0.33910030126571655, | |
| "learning_rate": 2.732361876453957e-05, | |
| "loss": 0.9719451904296875, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.4854368932038835, | |
| "grad_norm": 0.37699881196022034, | |
| "learning_rate": 2.6748135871257955e-05, | |
| "loss": 0.9684635925292969, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.49268221996812056, | |
| "grad_norm": 0.3599157929420471, | |
| "learning_rate": 2.617172047530939e-05, | |
| "loss": 0.957724609375, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.49268221996812056, | |
| "eval_loss": 0.91666579246521, | |
| "eval_runtime": 192.0058, | |
| "eval_samples_per_second": 121.069, | |
| "eval_steps_per_second": 3.786, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.4999275467323576, | |
| "grad_norm": 0.37489375472068787, | |
| "learning_rate": 2.5594680052161206e-05, | |
| "loss": 0.9689096069335937, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5071728734965947, | |
| "grad_norm": 0.3328556418418884, | |
| "learning_rate": 2.5017322410687077e-05, | |
| "loss": 0.96274169921875, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5144182002608317, | |
| "grad_norm": 0.35683172941207886, | |
| "learning_rate": 2.4439955528973414e-05, | |
| "loss": 0.9614187622070313, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.5216635270250688, | |
| "grad_norm": 0.36412957310676575, | |
| "learning_rate": 2.386288739003567e-05, | |
| "loss": 0.964275131225586, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.5216635270250688, | |
| "eval_loss": 0.9119325280189514, | |
| "eval_runtime": 192.5165, | |
| "eval_samples_per_second": 120.748, | |
| "eval_steps_per_second": 3.776, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.5289088537893059, | |
| "grad_norm": 0.3563174307346344, | |
| "learning_rate": 2.3286425817531836e-05, | |
| "loss": 0.9626367950439453, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.5361541805535429, | |
| "grad_norm": 0.3758934438228607, | |
| "learning_rate": 2.271087831156107e-05, | |
| "loss": 0.9564736938476562, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.5433995073177801, | |
| "grad_norm": 0.34257784485816956, | |
| "learning_rate": 2.2136551884634864e-05, | |
| "loss": 0.9561862945556641, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.5506448340820171, | |
| "grad_norm": 0.38688501715660095, | |
| "learning_rate": 2.1563752897908352e-05, | |
| "loss": 0.9572794342041016, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.5506448340820171, | |
| "eval_loss": 0.9085790514945984, | |
| "eval_runtime": 193.5054, | |
| "eval_samples_per_second": 120.131, | |
| "eval_steps_per_second": 3.757, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.5578901608462542, | |
| "grad_norm": 0.4174833297729492, | |
| "learning_rate": 2.0992786897758974e-05, | |
| "loss": 0.9556180572509766, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.5651354876104913, | |
| "grad_norm": 0.35218948125839233, | |
| "learning_rate": 2.0423958452799854e-05, | |
| "loss": 0.9547309875488281, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.5723808143747283, | |
| "grad_norm": 0.3613223433494568, | |
| "learning_rate": 1.9857570991414737e-05, | |
| "loss": 0.9548380279541016, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.5796261411389654, | |
| "grad_norm": 0.35305657982826233, | |
| "learning_rate": 1.9293926639901092e-05, | |
| "loss": 0.9536138916015625, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5796261411389654, | |
| "eval_loss": 0.9045887589454651, | |
| "eval_runtime": 193.4058, | |
| "eval_samples_per_second": 120.193, | |
| "eval_steps_per_second": 3.759, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5868714679032024, | |
| "grad_norm": 0.35594019293785095, | |
| "learning_rate": 1.873332606130787e-05, | |
| "loss": 0.961040267944336, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.5941167946674395, | |
| "grad_norm": 0.3579290509223938, | |
| "learning_rate": 1.8176068295053684e-05, | |
| "loss": 0.953524169921875, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.6013621214316766, | |
| "grad_norm": 0.37046733498573303, | |
| "learning_rate": 1.7622450597411216e-05, | |
| "loss": 0.9559716796875, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.6086074481959136, | |
| "grad_norm": 0.41828927397727966, | |
| "learning_rate": 1.7072768282942695e-05, | |
| "loss": 0.9484828186035156, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.6086074481959136, | |
| "eval_loss": 0.9021432995796204, | |
| "eval_runtime": 193.4682, | |
| "eval_samples_per_second": 120.154, | |
| "eval_steps_per_second": 3.758, | |
| "step": 4200 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 6901, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8789181767221248.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |