Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use TabibitoQZP/Qwen3-4B-Sorted-3Task with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use TabibitoQZP/Qwen3-4B-Sorted-3Task with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="TabibitoQZP/Qwen3-4B-Sorted-3Task") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForMultimodalLM tokenizer = AutoTokenizer.from_pretrained("TabibitoQZP/Qwen3-4B-Sorted-3Task") model = AutoModelForMultimodalLM.from_pretrained("TabibitoQZP/Qwen3-4B-Sorted-3Task") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use TabibitoQZP/Qwen3-4B-Sorted-3Task with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "TabibitoQZP/Qwen3-4B-Sorted-3Task" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TabibitoQZP/Qwen3-4B-Sorted-3Task", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/TabibitoQZP/Qwen3-4B-Sorted-3Task
- SGLang
How to use TabibitoQZP/Qwen3-4B-Sorted-3Task with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "TabibitoQZP/Qwen3-4B-Sorted-3Task" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TabibitoQZP/Qwen3-4B-Sorted-3Task", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "TabibitoQZP/Qwen3-4B-Sorted-3Task" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "TabibitoQZP/Qwen3-4B-Sorted-3Task", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use TabibitoQZP/Qwen3-4B-Sorted-3Task with Docker Model Runner:
docker model run hf.co/TabibitoQZP/Qwen3-4B-Sorted-3Task
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.000629326620516, | |
| "eval_steps": 500, | |
| "global_step": 1589, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0062932662051604785, | |
| "grad_norm": 39.40880584716797, | |
| "learning_rate": 5.660377358490567e-07, | |
| "loss": 1.2507, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012586532410320957, | |
| "grad_norm": 12.689669609069824, | |
| "learning_rate": 1.1949685534591195e-06, | |
| "loss": 0.9818, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.018879798615481436, | |
| "grad_norm": 1.60954749584198, | |
| "learning_rate": 1.8238993710691824e-06, | |
| "loss": 0.6639, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.025173064820641914, | |
| "grad_norm": 0.9736618995666504, | |
| "learning_rate": 2.4528301886792453e-06, | |
| "loss": 0.603, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03146633102580239, | |
| "grad_norm": 0.9699676632881165, | |
| "learning_rate": 3.0817610062893084e-06, | |
| "loss": 0.5679, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03775959723096287, | |
| "grad_norm": 0.8372435569763184, | |
| "learning_rate": 3.710691823899371e-06, | |
| "loss": 0.5549, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04405286343612335, | |
| "grad_norm": 0.8186138272285461, | |
| "learning_rate": 4.339622641509435e-06, | |
| "loss": 0.5552, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05034612964128383, | |
| "grad_norm": 0.7362136840820312, | |
| "learning_rate": 4.968553459119497e-06, | |
| "loss": 0.558, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.056639395846444306, | |
| "grad_norm": 0.8293086290359497, | |
| "learning_rate": 5.59748427672956e-06, | |
| "loss": 0.5432, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06293266205160478, | |
| "grad_norm": 0.7764604091644287, | |
| "learning_rate": 6.226415094339623e-06, | |
| "loss": 0.541, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06922592825676527, | |
| "grad_norm": 0.8436954021453857, | |
| "learning_rate": 6.855345911949685e-06, | |
| "loss": 0.5457, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07551919446192575, | |
| "grad_norm": 0.7573267817497253, | |
| "learning_rate": 7.484276729559748e-06, | |
| "loss": 0.5285, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08181246066708622, | |
| "grad_norm": 0.8208069801330566, | |
| "learning_rate": 8.113207547169812e-06, | |
| "loss": 0.5352, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0881057268722467, | |
| "grad_norm": 0.759560227394104, | |
| "learning_rate": 8.742138364779875e-06, | |
| "loss": 0.5333, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09439899307740718, | |
| "grad_norm": 0.8434644341468811, | |
| "learning_rate": 9.371069182389939e-06, | |
| "loss": 0.5372, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10069225928256766, | |
| "grad_norm": 0.8114253878593445, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5339, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10698552548772813, | |
| "grad_norm": 0.8041621446609497, | |
| "learning_rate": 9.998793436421342e-06, | |
| "loss": 0.5371, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11327879169288861, | |
| "grad_norm": 0.782455563545227, | |
| "learning_rate": 9.99517432800363e-06, | |
| "loss": 0.5224, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.11957205789804909, | |
| "grad_norm": 0.802542507648468, | |
| "learning_rate": 9.98914442142063e-06, | |
| "loss": 0.5209, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.12586532410320955, | |
| "grad_norm": 0.8419063091278076, | |
| "learning_rate": 9.980706626858607e-06, | |
| "loss": 0.5261, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13215859030837004, | |
| "grad_norm": 0.7716870307922363, | |
| "learning_rate": 9.9698650166118e-06, | |
| "loss": 0.522, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.13845185651353054, | |
| "grad_norm": 0.774776816368103, | |
| "learning_rate": 9.956624823117036e-06, | |
| "loss": 0.5305, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.144745122718691, | |
| "grad_norm": 0.7823233008384705, | |
| "learning_rate": 9.94099243642841e-06, | |
| "loss": 0.5247, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1510383889238515, | |
| "grad_norm": 0.7220829725265503, | |
| "learning_rate": 9.922975401133292e-06, | |
| "loss": 0.5286, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.15733165512901195, | |
| "grad_norm": 0.7797294855117798, | |
| "learning_rate": 9.90258241271112e-06, | |
| "loss": 0.5299, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.16362492133417245, | |
| "grad_norm": 0.7687580585479736, | |
| "learning_rate": 9.879823313336723e-06, | |
| "loss": 0.5262, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1699181875393329, | |
| "grad_norm": 0.7156737446784973, | |
| "learning_rate": 9.854709087130261e-06, | |
| "loss": 0.5227, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1762114537444934, | |
| "grad_norm": 0.747580885887146, | |
| "learning_rate": 9.827251854855992e-06, | |
| "loss": 0.5186, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.18250471994965387, | |
| "grad_norm": 0.7566559910774231, | |
| "learning_rate": 9.797464868072489e-06, | |
| "loss": 0.5127, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.18879798615481436, | |
| "grad_norm": 0.747591495513916, | |
| "learning_rate": 9.765362502737098e-06, | |
| "loss": 0.5167, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19509125235997482, | |
| "grad_norm": 0.732440173625946, | |
| "learning_rate": 9.730960252267744e-06, | |
| "loss": 0.5225, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2013845185651353, | |
| "grad_norm": 0.7387551069259644, | |
| "learning_rate": 9.6942747200654e-06, | |
| "loss": 0.5149, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.20767778477029578, | |
| "grad_norm": 0.7358985543251038, | |
| "learning_rate": 9.655323611500876e-06, | |
| "loss": 0.518, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.21397105097545627, | |
| "grad_norm": 0.7722839117050171, | |
| "learning_rate": 9.614125725369748e-06, | |
| "loss": 0.5095, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.22026431718061673, | |
| "grad_norm": 0.677197277545929, | |
| "learning_rate": 9.570700944819584e-06, | |
| "loss": 0.5233, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.22655758338577722, | |
| "grad_norm": 0.6825560331344604, | |
| "learning_rate": 9.525070227753835e-06, | |
| "loss": 0.5125, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2328508495909377, | |
| "grad_norm": 0.6920183300971985, | |
| "learning_rate": 9.477255596717012e-06, | |
| "loss": 0.5191, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.23914411579609818, | |
| "grad_norm": 0.7336747646331787, | |
| "learning_rate": 9.427280128266049e-06, | |
| "loss": 0.5163, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.24543738200125864, | |
| "grad_norm": 0.7665858268737793, | |
| "learning_rate": 9.375167941832974e-06, | |
| "loss": 0.5062, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2517306482064191, | |
| "grad_norm": 0.6906554102897644, | |
| "learning_rate": 9.320944188084241e-06, | |
| "loss": 0.518, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2580239144115796, | |
| "grad_norm": 0.7612572312355042, | |
| "learning_rate": 9.264635036782406e-06, | |
| "loss": 0.5042, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2643171806167401, | |
| "grad_norm": 0.7517194747924805, | |
| "learning_rate": 9.206267664155906e-06, | |
| "loss": 0.5221, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.27061044682190055, | |
| "grad_norm": 0.7678345441818237, | |
| "learning_rate": 9.145870239783143e-06, | |
| "loss": 0.5172, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.27690371302706107, | |
| "grad_norm": 0.7215328812599182, | |
| "learning_rate": 9.08347191299711e-06, | |
| "loss": 0.5143, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.28319697923222154, | |
| "grad_norm": 0.6326926350593567, | |
| "learning_rate": 9.019102798817196e-06, | |
| "loss": 0.5164, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.289490245437382, | |
| "grad_norm": 0.689453661441803, | |
| "learning_rate": 8.952793963414908e-06, | |
| "loss": 0.5179, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.29578351164254246, | |
| "grad_norm": 0.7151985168457031, | |
| "learning_rate": 8.884577409120535e-06, | |
| "loss": 0.5073, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.302076777847703, | |
| "grad_norm": 0.7656172513961792, | |
| "learning_rate": 8.814486058978035e-06, | |
| "loss": 0.5042, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.30837004405286345, | |
| "grad_norm": 0.680549144744873, | |
| "learning_rate": 8.742553740855507e-06, | |
| "loss": 0.5191, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3146633102580239, | |
| "grad_norm": 0.7366177439689636, | |
| "learning_rate": 8.66881517111902e-06, | |
| "loss": 0.5163, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3209565764631844, | |
| "grad_norm": 0.8044482469558716, | |
| "learning_rate": 8.593305937877614e-06, | |
| "loss": 0.5152, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3272498426683449, | |
| "grad_norm": 0.6989796161651611, | |
| "learning_rate": 8.516062483807556e-06, | |
| "loss": 0.5192, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.33354310887350536, | |
| "grad_norm": 0.705839216709137, | |
| "learning_rate": 8.437122088564197e-06, | |
| "loss": 0.5054, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3398363750786658, | |
| "grad_norm": 0.6799296736717224, | |
| "learning_rate": 8.356522850789852e-06, | |
| "loss": 0.5032, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3461296412838263, | |
| "grad_norm": 0.8019563555717468, | |
| "learning_rate": 8.274303669726427e-06, | |
| "loss": 0.5113, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3524229074889868, | |
| "grad_norm": 0.7109248638153076, | |
| "learning_rate": 8.190504226441654e-06, | |
| "loss": 0.5029, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.35871617369414727, | |
| "grad_norm": 0.7193357944488525, | |
| "learning_rate": 8.105164964678009e-06, | |
| "loss": 0.5127, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.36500943989930773, | |
| "grad_norm": 0.6586730480194092, | |
| "learning_rate": 8.018327071333521e-06, | |
| "loss": 0.5178, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3713027061044682, | |
| "grad_norm": 0.7992932796478271, | |
| "learning_rate": 7.930032456583931e-06, | |
| "loss": 0.5064, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3775959723096287, | |
| "grad_norm": 0.6866645812988281, | |
| "learning_rate": 7.84032373365578e-06, | |
| "loss": 0.5025, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3838892385147892, | |
| "grad_norm": 0.662344217300415, | |
| "learning_rate": 7.749244198260175e-06, | |
| "loss": 0.5103, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.39018250471994964, | |
| "grad_norm": 0.6587361693382263, | |
| "learning_rate": 7.656837807697187e-06, | |
| "loss": 0.5129, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3964757709251101, | |
| "grad_norm": 0.6918533444404602, | |
| "learning_rate": 7.563149159640929e-06, | |
| "loss": 0.5053, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4027690371302706, | |
| "grad_norm": 0.6420175433158875, | |
| "learning_rate": 7.468223470615593e-06, | |
| "loss": 0.5223, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.4090623033354311, | |
| "grad_norm": 0.7031328678131104, | |
| "learning_rate": 7.372106554172802e-06, | |
| "loss": 0.5024, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.41535556954059155, | |
| "grad_norm": 0.7460775375366211, | |
| "learning_rate": 7.274844798780826e-06, | |
| "loss": 0.5123, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.42164883574575207, | |
| "grad_norm": 0.6937898397445679, | |
| "learning_rate": 7.176485145436325e-06, | |
| "loss": 0.5051, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.42794210195091253, | |
| "grad_norm": 0.623894453048706, | |
| "learning_rate": 7.0770750650094335e-06, | |
| "loss": 0.5059, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.434235368156073, | |
| "grad_norm": 0.6496269106864929, | |
| "learning_rate": 6.976662535333107e-06, | |
| "loss": 0.4999, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.44052863436123346, | |
| "grad_norm": 0.6723958253860474, | |
| "learning_rate": 6.87529601804781e-06, | |
| "loss": 0.5054, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.446821900566394, | |
| "grad_norm": 0.6890814900398254, | |
| "learning_rate": 6.773024435212678e-06, | |
| "loss": 0.5066, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.45311516677155445, | |
| "grad_norm": 0.6805148720741272, | |
| "learning_rate": 6.669897145694507e-06, | |
| "loss": 0.5086, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4594084329767149, | |
| "grad_norm": 0.6633646488189697, | |
| "learning_rate": 6.565963921345896e-06, | |
| "loss": 0.4939, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4657016991818754, | |
| "grad_norm": 0.6664919257164001, | |
| "learning_rate": 6.461274922984087e-06, | |
| "loss": 0.4995, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4719949653870359, | |
| "grad_norm": 0.6816923022270203, | |
| "learning_rate": 6.355880676182086e-06, | |
| "loss": 0.5038, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.47828823159219636, | |
| "grad_norm": 0.6514876484870911, | |
| "learning_rate": 6.249832046883729e-06, | |
| "loss": 0.5011, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4845814977973568, | |
| "grad_norm": 0.6344130039215088, | |
| "learning_rate": 6.143180216854488e-06, | |
| "loss": 0.5034, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4908747640025173, | |
| "grad_norm": 0.6643583178520203, | |
| "learning_rate": 6.035976658979846e-06, | |
| "loss": 0.4956, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4971680302076778, | |
| "grad_norm": 0.7020254731178284, | |
| "learning_rate": 5.928273112423177e-06, | |
| "loss": 0.497, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.0037759597230962, | |
| "grad_norm": 0.6873272657394409, | |
| "learning_rate": 5.820121557655109e-06, | |
| "loss": 0.5445, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0100692259282569, | |
| "grad_norm": 0.6778249144554138, | |
| "learning_rate": 5.711574191366427e-06, | |
| "loss": 0.4808, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.0163624921334173, | |
| "grad_norm": 0.6917534470558167, | |
| "learning_rate": 5.6026834012766155e-06, | |
| "loss": 0.4871, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.0226557583385778, | |
| "grad_norm": 0.6982170343399048, | |
| "learning_rate": 5.493501740850228e-06, | |
| "loss": 0.4768, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.0289490245437383, | |
| "grad_norm": 0.622083842754364, | |
| "learning_rate": 5.384081903933235e-06, | |
| "loss": 0.4874, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.0352422907488987, | |
| "grad_norm": 0.682299792766571, | |
| "learning_rate": 5.274476699321638e-06, | |
| "loss": 0.4787, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0415355569540592, | |
| "grad_norm": 0.719980776309967, | |
| "learning_rate": 5.164739025274604e-06, | |
| "loss": 0.4731, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.0478288231592197, | |
| "grad_norm": 0.7684125304222107, | |
| "learning_rate": 5.0549218439844185e-06, | |
| "loss": 0.4858, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.0541220893643801, | |
| "grad_norm": 0.6164060831069946, | |
| "learning_rate": 4.945078156015582e-06, | |
| "loss": 0.4803, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0604153555695406, | |
| "grad_norm": 0.7356188297271729, | |
| "learning_rate": 4.835260974725397e-06, | |
| "loss": 0.4756, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.066708621774701, | |
| "grad_norm": 0.6549850106239319, | |
| "learning_rate": 4.7255233006783626e-06, | |
| "loss": 0.4665, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.0730018879798615, | |
| "grad_norm": 0.6597391366958618, | |
| "learning_rate": 4.615918096066766e-06, | |
| "loss": 0.4669, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.079295154185022, | |
| "grad_norm": 0.6646954417228699, | |
| "learning_rate": 4.506498259149774e-06, | |
| "loss": 0.4717, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.0855884203901824, | |
| "grad_norm": 0.6768482327461243, | |
| "learning_rate": 4.397316598723385e-06, | |
| "loss": 0.4702, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.091881686595343, | |
| "grad_norm": 0.7066707611083984, | |
| "learning_rate": 4.2884258086335755e-06, | |
| "loss": 0.4502, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.0981749528005034, | |
| "grad_norm": 0.6753413677215576, | |
| "learning_rate": 4.179878442344892e-06, | |
| "loss": 0.4721, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.104468219005664, | |
| "grad_norm": 0.6423931121826172, | |
| "learning_rate": 4.071726887576823e-06, | |
| "loss": 0.4661, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.1107614852108245, | |
| "grad_norm": 0.686931848526001, | |
| "learning_rate": 3.9640233410201555e-06, | |
| "loss": 0.4684, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.117054751415985, | |
| "grad_norm": 0.6282669901847839, | |
| "learning_rate": 3.856819783145514e-06, | |
| "loss": 0.4621, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.1233480176211454, | |
| "grad_norm": 0.7436355352401733, | |
| "learning_rate": 3.750167953116272e-06, | |
| "loss": 0.4575, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.129641283826306, | |
| "grad_norm": 0.6427481174468994, | |
| "learning_rate": 3.6441193238179152e-06, | |
| "loss": 0.4591, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1359345500314664, | |
| "grad_norm": 0.6406493782997131, | |
| "learning_rate": 3.5387250770159152e-06, | |
| "loss": 0.4503, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.1422278162366268, | |
| "grad_norm": 0.6536152958869934, | |
| "learning_rate": 3.4340360786541067e-06, | |
| "loss": 0.454, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.1485210824417873, | |
| "grad_norm": 0.7142040729522705, | |
| "learning_rate": 3.3301028543054935e-06, | |
| "loss": 0.4527, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.1548143486469478, | |
| "grad_norm": 0.6241782903671265, | |
| "learning_rate": 3.226975564787322e-06, | |
| "loss": 0.4472, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.1611076148521082, | |
| "grad_norm": 0.6668282747268677, | |
| "learning_rate": 3.1247039819521907e-06, | |
| "loss": 0.4509, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.1674008810572687, | |
| "grad_norm": 0.6013820171356201, | |
| "learning_rate": 3.0233374646668935e-06, | |
| "loss": 0.443, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.1736941472624292, | |
| "grad_norm": 0.6819528341293335, | |
| "learning_rate": 2.9229249349905686e-06, | |
| "loss": 0.4598, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.1799874134675896, | |
| "grad_norm": 0.6443890929222107, | |
| "learning_rate": 2.8235148545636776e-06, | |
| "loss": 0.447, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.18628067967275, | |
| "grad_norm": 0.6288453340530396, | |
| "learning_rate": 2.7251552012191763e-06, | |
| "loss": 0.4563, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.1925739458779105, | |
| "grad_norm": 0.661491334438324, | |
| "learning_rate": 2.6278934458271998e-06, | |
| "loss": 0.4443, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.198867212083071, | |
| "grad_norm": 0.6220525503158569, | |
| "learning_rate": 2.531776529384407e-06, | |
| "loss": 0.446, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.2051604782882315, | |
| "grad_norm": 0.6786297559738159, | |
| "learning_rate": 2.436850840359073e-06, | |
| "loss": 0.4464, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.2114537444933922, | |
| "grad_norm": 0.7809886336326599, | |
| "learning_rate": 2.3431621923028146e-06, | |
| "loss": 0.4554, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.2177470106985526, | |
| "grad_norm": 0.7114007472991943, | |
| "learning_rate": 2.2507558017398263e-06, | |
| "loss": 0.4405, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.224040276903713, | |
| "grad_norm": 0.726741373538971, | |
| "learning_rate": 2.159676266344222e-06, | |
| "loss": 0.4463, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.2303335431088736, | |
| "grad_norm": 0.6209679841995239, | |
| "learning_rate": 2.06996754341607e-06, | |
| "loss": 0.4601, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.236626809314034, | |
| "grad_norm": 0.7238234877586365, | |
| "learning_rate": 1.98167292866648e-06, | |
| "loss": 0.4498, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.2429200755191945, | |
| "grad_norm": 0.663245439529419, | |
| "learning_rate": 1.8948350353219913e-06, | |
| "loss": 0.4507, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.249213341724355, | |
| "grad_norm": 0.6694537997245789, | |
| "learning_rate": 1.8094957735583463e-06, | |
| "loss": 0.4616, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.2555066079295154, | |
| "grad_norm": 0.6908589005470276, | |
| "learning_rate": 1.7256963302735752e-06, | |
| "loss": 0.439, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.2617998741346759, | |
| "grad_norm": 0.6885989308357239, | |
| "learning_rate": 1.6434771492101487e-06, | |
| "loss": 0.4444, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.2680931403398363, | |
| "grad_norm": 0.6936271786689758, | |
| "learning_rate": 1.5628779114358034e-06, | |
| "loss": 0.4535, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.2743864065449968, | |
| "grad_norm": 0.6543680429458618, | |
| "learning_rate": 1.4839375161924446e-06, | |
| "loss": 0.4584, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.2806796727501573, | |
| "grad_norm": 0.6488659381866455, | |
| "learning_rate": 1.406694062122389e-06, | |
| "loss": 0.4532, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.286972938955318, | |
| "grad_norm": 0.668251097202301, | |
| "learning_rate": 1.3311848288809815e-06, | |
| "loss": 0.4432, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.2932662051604784, | |
| "grad_norm": 0.6807104349136353, | |
| "learning_rate": 1.257446259144494e-06, | |
| "loss": 0.4324, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.2995594713656389, | |
| "grad_norm": 0.6319746971130371, | |
| "learning_rate": 1.1855139410219657e-06, | |
| "loss": 0.4493, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.3058527375707993, | |
| "grad_norm": 0.6163645386695862, | |
| "learning_rate": 1.115422590879464e-06, | |
| "loss": 0.4501, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.3121460037759598, | |
| "grad_norm": 0.6366046071052551, | |
| "learning_rate": 1.047206036585095e-06, | |
| "loss": 0.4423, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.3184392699811203, | |
| "grad_norm": 0.5919917821884155, | |
| "learning_rate": 9.808972011828055e-07, | |
| "loss": 0.4379, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.3247325361862807, | |
| "grad_norm": 0.6659424304962158, | |
| "learning_rate": 9.165280870028919e-07, | |
| "loss": 0.4548, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.3310258023914412, | |
| "grad_norm": 0.6823071837425232, | |
| "learning_rate": 8.541297602168591e-07, | |
| "loss": 0.464, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.3373190685966017, | |
| "grad_norm": 0.6970245838165283, | |
| "learning_rate": 7.937323358440935e-07, | |
| "loss": 0.4598, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.3436123348017621, | |
| "grad_norm": 0.6151891946792603, | |
| "learning_rate": 7.353649632175957e-07, | |
| "loss": 0.4569, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.3499056010069226, | |
| "grad_norm": 0.6397150754928589, | |
| "learning_rate": 6.790558119157597e-07, | |
| "loss": 0.4598, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.356198867212083, | |
| "grad_norm": 0.6420609951019287, | |
| "learning_rate": 6.248320581670281e-07, | |
| "loss": 0.4576, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.3624921334172435, | |
| "grad_norm": 0.6434526443481445, | |
| "learning_rate": 5.727198717339511e-07, | |
| "loss": 0.4544, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.368785399622404, | |
| "grad_norm": 0.6515443921089172, | |
| "learning_rate": 5.227444032829887e-07, | |
| "loss": 0.4462, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.3750786658275644, | |
| "grad_norm": 0.59568852186203, | |
| "learning_rate": 4.74929772246166e-07, | |
| "loss": 0.4697, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.381371932032725, | |
| "grad_norm": 0.6043298244476318, | |
| "learning_rate": 4.2929905518041714e-07, | |
| "loss": 0.4506, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.3876651982378854, | |
| "grad_norm": 0.5912688970565796, | |
| "learning_rate": 3.858742746302535e-07, | |
| "loss": 0.4358, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.3939584644430458, | |
| "grad_norm": 0.6092295050621033, | |
| "learning_rate": 3.44676388499125e-07, | |
| "loss": 0.4545, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.4002517306482063, | |
| "grad_norm": 0.6405302882194519, | |
| "learning_rate": 3.0572527993460054e-07, | |
| "loss": 0.4584, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.406544996853367, | |
| "grad_norm": 0.6120900511741638, | |
| "learning_rate": 2.6903974773225703e-07, | |
| "loss": 0.4461, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.4128382630585274, | |
| "grad_norm": 0.594711184501648, | |
| "learning_rate": 2.3463749726290287e-07, | |
| "loss": 0.4519, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.419131529263688, | |
| "grad_norm": 0.6510460376739502, | |
| "learning_rate": 2.0253513192751374e-07, | |
| "loss": 0.4509, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.4254247954688484, | |
| "grad_norm": 0.574932873249054, | |
| "learning_rate": 1.7274814514400995e-07, | |
| "loss": 0.4511, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.4317180616740088, | |
| "grad_norm": 0.6897699236869812, | |
| "learning_rate": 1.4529091286973994e-07, | |
| "loss": 0.4496, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.4380113278791693, | |
| "grad_norm": 0.6335061192512512, | |
| "learning_rate": 1.2017668666327752e-07, | |
| "loss": 0.4481, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.4443045940843298, | |
| "grad_norm": 0.6672943830490112, | |
| "learning_rate": 9.741758728888218e-08, | |
| "loss": 0.4518, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4505978602894902, | |
| "grad_norm": 0.673734188079834, | |
| "learning_rate": 7.702459886670788e-08, | |
| "loss": 0.4495, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.4568911264946507, | |
| "grad_norm": 0.6775258183479309, | |
| "learning_rate": 5.900756357159143e-08, | |
| "loss": 0.458, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.4631843926998112, | |
| "grad_norm": 0.588603138923645, | |
| "learning_rate": 4.337517688296544e-08, | |
| "loss": 0.4543, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.4694776589049716, | |
| "grad_norm": 0.642877459526062, | |
| "learning_rate": 3.013498338820031e-08, | |
| "loss": 0.4522, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.475770925110132, | |
| "grad_norm": 0.6829048991203308, | |
| "learning_rate": 1.9293373141394124e-08, | |
| "loss": 0.4583, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.4820641913152928, | |
| "grad_norm": 0.6072986721992493, | |
| "learning_rate": 1.0855578579370696e-08, | |
| "loss": 0.4504, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.4883574575204532, | |
| "grad_norm": 0.6607363820075989, | |
| "learning_rate": 4.825671996370184e-09, | |
| "loss": 0.4618, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.4946507237256137, | |
| "grad_norm": 0.6756806969642639, | |
| "learning_rate": 1.2065635786595586e-09, | |
| "loss": 0.4539, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.000629326620516, | |
| "step": 1589, | |
| "total_flos": 208043401019392.0, | |
| "train_loss": 0.4968605734703299, | |
| "train_runtime": 50248.0079, | |
| "train_samples_per_second": 1.012, | |
| "train_steps_per_second": 0.032 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1589, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 208043401019392.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |