Instructions to use haihp02/rrd with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use haihp02/rrd with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B-Instruct") model = PeftModel.from_pretrained(base_model, "haihp02/rrd") - Transformers
How to use haihp02/rrd with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="haihp02/rrd") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("haihp02/rrd", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use haihp02/rrd with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "haihp02/rrd" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "haihp02/rrd", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/haihp02/rrd
- SGLang
How to use haihp02/rrd with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "haihp02/rrd" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "haihp02/rrd", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "haihp02/rrd" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "haihp02/rrd", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use haihp02/rrd with Docker Model Runner:
docker model run hf.co/haihp02/rrd
| { | |
| "best_global_step": 800, | |
| "best_metric": 0.2455482929944992, | |
| "best_model_checkpoint": "./checkpoints/qwen253-lora-leduc_random_l_s3/checkpoint-800", | |
| "epoch": 1.0, | |
| "eval_steps": 200, | |
| "global_step": 826, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012106537530266344, | |
| "grad_norm": 10.681763648986816, | |
| "learning_rate": 8.999999999999999e-06, | |
| "loss": 0.828, | |
| "mean_token_accuracy": 0.8087970525026321, | |
| "num_tokens": 158075.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.024213075060532687, | |
| "grad_norm": 0.8575407266616821, | |
| "learning_rate": 1.8999999999999998e-05, | |
| "loss": 0.3278, | |
| "mean_token_accuracy": 0.8451847195625305, | |
| "num_tokens": 314832.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03631961259079903, | |
| "grad_norm": 0.38625678420066833, | |
| "learning_rate": 2.9e-05, | |
| "loss": 0.2427, | |
| "mean_token_accuracy": 0.8527996808290481, | |
| "num_tokens": 473401.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.048426150121065374, | |
| "grad_norm": 0.25771161913871765, | |
| "learning_rate": 3.4997791661317485e-05, | |
| "loss": 0.2284, | |
| "mean_token_accuracy": 0.8509193986654282, | |
| "num_tokens": 630871.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06053268765133172, | |
| "grad_norm": 0.21924710273742676, | |
| "learning_rate": 3.497295425144213e-05, | |
| "loss": 0.2428, | |
| "mean_token_accuracy": 0.8498442590236663, | |
| "num_tokens": 782593.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07263922518159806, | |
| "grad_norm": 0.172270268201828, | |
| "learning_rate": 3.4920558312793984e-05, | |
| "loss": 0.2396, | |
| "mean_token_accuracy": 0.844213005900383, | |
| "num_tokens": 938469.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0847457627118644, | |
| "grad_norm": 0.23005615174770355, | |
| "learning_rate": 3.4840686484803226e-05, | |
| "loss": 0.2336, | |
| "mean_token_accuracy": 0.8490692973136902, | |
| "num_tokens": 1093154.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.09685230024213075, | |
| "grad_norm": 0.2150808423757553, | |
| "learning_rate": 3.473346474216413e-05, | |
| "loss": 0.2308, | |
| "mean_token_accuracy": 0.8609368681907654, | |
| "num_tokens": 1249444.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1089588377723971, | |
| "grad_norm": 0.15730148553848267, | |
| "learning_rate": 3.459906219614643e-05, | |
| "loss": 0.2424, | |
| "mean_token_accuracy": 0.8448190927505493, | |
| "num_tokens": 1405823.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.12106537530266344, | |
| "grad_norm": 0.4564799964427948, | |
| "learning_rate": 3.4437690827871256e-05, | |
| "loss": 0.2422, | |
| "mean_token_accuracy": 0.8474129974842072, | |
| "num_tokens": 1559563.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13317191283292978, | |
| "grad_norm": 0.1691020429134369, | |
| "learning_rate": 3.424960515397224e-05, | |
| "loss": 0.2426, | |
| "mean_token_accuracy": 0.8417032897472382, | |
| "num_tokens": 1714717.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.14527845036319612, | |
| "grad_norm": 0.13242730498313904, | |
| "learning_rate": 3.403510182516918e-05, | |
| "loss": 0.2401, | |
| "mean_token_accuracy": 0.8526080518960952, | |
| "num_tokens": 1869540.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.15738498789346247, | |
| "grad_norm": 0.16052637994289398, | |
| "learning_rate": 3.379451915838742e-05, | |
| "loss": 0.2442, | |
| "mean_token_accuracy": 0.8375077575445176, | |
| "num_tokens": 2024145.0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 0.1499997228384018, | |
| "learning_rate": 3.352823660316074e-05, | |
| "loss": 0.2346, | |
| "mean_token_accuracy": 0.845609164237976, | |
| "num_tokens": 2180735.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.18159806295399517, | |
| "grad_norm": 0.10429004579782486, | |
| "learning_rate": 3.323667414315959e-05, | |
| "loss": 0.2419, | |
| "mean_token_accuracy": 0.8553953766822815, | |
| "num_tokens": 2335148.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1937046004842615, | |
| "grad_norm": 0.08566914498806, | |
| "learning_rate": 3.292029163378833e-05, | |
| "loss": 0.2357, | |
| "mean_token_accuracy": 0.8465773612260818, | |
| "num_tokens": 2492016.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.20581113801452786, | |
| "grad_norm": 0.14463454484939575, | |
| "learning_rate": 3.2579588076896486e-05, | |
| "loss": 0.2314, | |
| "mean_token_accuracy": 0.8503528028726578, | |
| "num_tokens": 2647377.0, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2179176755447942, | |
| "grad_norm": 0.12798088788986206, | |
| "learning_rate": 3.221510083374765e-05, | |
| "loss": 0.2333, | |
| "mean_token_accuracy": 0.8497181862592698, | |
| "num_tokens": 2803931.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.23002421307506055, | |
| "grad_norm": 0.17899736762046814, | |
| "learning_rate": 3.182740477748768e-05, | |
| "loss": 0.2358, | |
| "mean_token_accuracy": 0.8438972860574723, | |
| "num_tokens": 2956080.0, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.24213075060532688, | |
| "grad_norm": 0.15043821930885315, | |
| "learning_rate": 3.1417111386448595e-05, | |
| "loss": 0.2366, | |
| "mean_token_accuracy": 0.8498786896467209, | |
| "num_tokens": 3111180.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.24213075060532688, | |
| "eval_loss": 0.24580417573451996, | |
| "eval_num_tokens": 3111180.0, | |
| "eval_runtime": 27.1445, | |
| "eval_samples_per_second": 9.836, | |
| "eval_steps_per_second": 9.836, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2542372881355932, | |
| "grad_norm": 0.12326110154390335, | |
| "learning_rate": 3.098486777971855e-05, | |
| "loss": 0.2277, | |
| "mean_token_accuracy": 0.8524766951799393, | |
| "num_tokens": 3269249.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.26634382566585957, | |
| "grad_norm": 0.0829845741391182, | |
| "learning_rate": 3.053135569649868e-05, | |
| "loss": 0.2419, | |
| "mean_token_accuracy": 0.843473681807518, | |
| "num_tokens": 3424471.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2784503631961259, | |
| "grad_norm": 0.13634343445301056, | |
| "learning_rate": 3.005729042085683e-05, | |
| "loss": 0.2383, | |
| "mean_token_accuracy": 0.8487411588430405, | |
| "num_tokens": 3579004.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.29055690072639223, | |
| "grad_norm": 0.09264083206653595, | |
| "learning_rate": 2.956341965357393e-05, | |
| "loss": 0.236, | |
| "mean_token_accuracy": 0.8531801581382752, | |
| "num_tokens": 3734168.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3026634382566586, | |
| "grad_norm": 0.09290221333503723, | |
| "learning_rate": 2.9050522332862385e-05, | |
| "loss": 0.2369, | |
| "mean_token_accuracy": 0.8493932217359543, | |
| "num_tokens": 3888227.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.31476997578692495, | |
| "grad_norm": 0.08292774111032486, | |
| "learning_rate": 2.8519407405816493e-05, | |
| "loss": 0.2313, | |
| "mean_token_accuracy": 0.851080346107483, | |
| "num_tokens": 4046278.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3268765133171913, | |
| "grad_norm": 0.1620582491159439, | |
| "learning_rate": 2.797091255253247e-05, | |
| "loss": 0.2379, | |
| "mean_token_accuracy": 0.8395844340324402, | |
| "num_tokens": 4200203.0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 0.12099113315343857, | |
| "learning_rate": 2.7405902864910543e-05, | |
| "loss": 0.2364, | |
| "mean_token_accuracy": 0.8551326721906662, | |
| "num_tokens": 4355292.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.35108958837772397, | |
| "grad_norm": 0.12489405274391174, | |
| "learning_rate": 2.6825269482222827e-05, | |
| "loss": 0.2354, | |
| "mean_token_accuracy": 0.8442697525024414, | |
| "num_tokens": 4510258.0, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.36319612590799033, | |
| "grad_norm": 0.10075319558382034, | |
| "learning_rate": 2.6229928185598994e-05, | |
| "loss": 0.2333, | |
| "mean_token_accuracy": 0.8536905407905578, | |
| "num_tokens": 4664788.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.37530266343825663, | |
| "grad_norm": 0.11994941532611847, | |
| "learning_rate": 2.5620817953646596e-05, | |
| "loss": 0.2323, | |
| "mean_token_accuracy": 0.8539896428585052, | |
| "num_tokens": 4821986.0, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.387409200968523, | |
| "grad_norm": 0.12077498435974121, | |
| "learning_rate": 2.4998899481484006e-05, | |
| "loss": 0.2399, | |
| "mean_token_accuracy": 0.8509245574474334, | |
| "num_tokens": 4978102.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.39951573849878935, | |
| "grad_norm": 0.1378944218158722, | |
| "learning_rate": 2.4365153665521915e-05, | |
| "loss": 0.233, | |
| "mean_token_accuracy": 0.8478419154882431, | |
| "num_tokens": 5134005.0, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4116222760290557, | |
| "grad_norm": 0.15924955904483795, | |
| "learning_rate": 2.3720580056383107e-05, | |
| "loss": 0.2244, | |
| "mean_token_accuracy": 0.8621924012899399, | |
| "num_tokens": 5290764.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.423728813559322, | |
| "grad_norm": 0.1484508514404297, | |
| "learning_rate": 2.30661952824006e-05, | |
| "loss": 0.2266, | |
| "mean_token_accuracy": 0.8586694985628128, | |
| "num_tokens": 5447775.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4358353510895884, | |
| "grad_norm": 0.18554432690143585, | |
| "learning_rate": 2.2403031446180677e-05, | |
| "loss": 0.2269, | |
| "mean_token_accuracy": 0.8663704991340637, | |
| "num_tokens": 5605311.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.44794188861985473, | |
| "grad_norm": 0.2061612904071808, | |
| "learning_rate": 2.1732134496759685e-05, | |
| "loss": 0.2293, | |
| "mean_token_accuracy": 0.8527790486812592, | |
| "num_tokens": 5763991.0, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4600484261501211, | |
| "grad_norm": 0.21663211286067963, | |
| "learning_rate": 2.1054562579922147e-05, | |
| "loss": 0.2384, | |
| "mean_token_accuracy": 0.8578897565603256, | |
| "num_tokens": 5918372.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4721549636803874, | |
| "grad_norm": 0.16164663434028625, | |
| "learning_rate": 2.0371384369281973e-05, | |
| "loss": 0.2321, | |
| "mean_token_accuracy": 0.8527203172445297, | |
| "num_tokens": 6076662.0, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.48426150121065376, | |
| "grad_norm": 0.14720699191093445, | |
| "learning_rate": 1.968367738075915e-05, | |
| "loss": 0.223, | |
| "mean_token_accuracy": 0.8647637069225311, | |
| "num_tokens": 6233988.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.48426150121065376, | |
| "eval_loss": 0.24974025785923004, | |
| "eval_num_tokens": 6233988.0, | |
| "eval_runtime": 26.7143, | |
| "eval_samples_per_second": 9.995, | |
| "eval_steps_per_second": 9.995, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4963680387409201, | |
| "grad_norm": 0.11780782788991928, | |
| "learning_rate": 1.899252627311015e-05, | |
| "loss": 0.2288, | |
| "mean_token_accuracy": 0.853996068239212, | |
| "num_tokens": 6391251.0, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5084745762711864, | |
| "grad_norm": 0.14394888281822205, | |
| "learning_rate": 1.8299021137192683e-05, | |
| "loss": 0.237, | |
| "mean_token_accuracy": 0.8532957583665848, | |
| "num_tokens": 6544551.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5205811138014528, | |
| "grad_norm": 0.4107162356376648, | |
| "learning_rate": 1.760425577666279e-05, | |
| "loss": 0.2294, | |
| "mean_token_accuracy": 0.8468001574277878, | |
| "num_tokens": 6702345.0, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5326876513317191, | |
| "grad_norm": 0.12140627950429916, | |
| "learning_rate": 1.6909325982816146e-05, | |
| "loss": 0.2268, | |
| "mean_token_accuracy": 0.8571277797222138, | |
| "num_tokens": 6857256.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5447941888619855, | |
| "grad_norm": 0.13262014091014862, | |
| "learning_rate": 1.6215327806294417e-05, | |
| "loss": 0.228, | |
| "mean_token_accuracy": 0.8527298241853714, | |
| "num_tokens": 7013503.0, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5569007263922519, | |
| "grad_norm": 0.18789087235927582, | |
| "learning_rate": 1.552335582838251e-05, | |
| "loss": 0.2317, | |
| "mean_token_accuracy": 0.85929856300354, | |
| "num_tokens": 7167382.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5690072639225182, | |
| "grad_norm": 0.1549673080444336, | |
| "learning_rate": 1.4834501434623413e-05, | |
| "loss": 0.2386, | |
| "mean_token_accuracy": 0.8493269443511963, | |
| "num_tokens": 7322223.0, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5811138014527845, | |
| "grad_norm": 0.28990328311920166, | |
| "learning_rate": 1.4149851093473319e-05, | |
| "loss": 0.2261, | |
| "mean_token_accuracy": 0.8549934804439545, | |
| "num_tokens": 7477291.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5932203389830508, | |
| "grad_norm": 0.34836694598197937, | |
| "learning_rate": 1.3470484642712053e-05, | |
| "loss": 0.2391, | |
| "mean_token_accuracy": 0.8534150063991547, | |
| "num_tokens": 7631181.0, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6053268765133172, | |
| "grad_norm": 0.08868297189474106, | |
| "learning_rate": 1.2797473586311476e-05, | |
| "loss": 0.235, | |
| "mean_token_accuracy": 0.8497831732034683, | |
| "num_tokens": 7786171.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6174334140435835, | |
| "grad_norm": 0.11514752358198166, | |
| "learning_rate": 1.2131879404448057e-05, | |
| "loss": 0.2331, | |
| "mean_token_accuracy": 0.8469379067420959, | |
| "num_tokens": 7941159.0, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6295399515738499, | |
| "grad_norm": 0.22596512734889984, | |
| "learning_rate": 1.1474751879325075e-05, | |
| "loss": 0.2374, | |
| "mean_token_accuracy": 0.8513785660266876, | |
| "num_tokens": 8095202.0, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6416464891041163, | |
| "grad_norm": 0.19439919292926788, | |
| "learning_rate": 1.0827127439444991e-05, | |
| "loss": 0.2318, | |
| "mean_token_accuracy": 0.8583654165267944, | |
| "num_tokens": 8250634.0, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6537530266343826, | |
| "grad_norm": 0.16050127148628235, | |
| "learning_rate": 1.0190027524943444e-05, | |
| "loss": 0.2247, | |
| "mean_token_accuracy": 0.8635302782058716, | |
| "num_tokens": 8408664.0, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6658595641646489, | |
| "grad_norm": 0.19398577511310577, | |
| "learning_rate": 9.564456976562993e-06, | |
| "loss": 0.2359, | |
| "mean_token_accuracy": 0.8510926723480224, | |
| "num_tokens": 8563164.0, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.16695190966129303, | |
| "learning_rate": 8.951402450807686e-06, | |
| "loss": 0.2256, | |
| "mean_token_accuracy": 0.8567656666040421, | |
| "num_tokens": 8717818.0, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6900726392251816, | |
| "grad_norm": 0.14462164044380188, | |
| "learning_rate": 8.35183086377792e-06, | |
| "loss": 0.2302, | |
| "mean_token_accuracy": 0.8581649184226989, | |
| "num_tokens": 8872048.0, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.7021791767554479, | |
| "grad_norm": 0.20691347122192383, | |
| "learning_rate": 7.766687866140133e-06, | |
| "loss": 0.234, | |
| "mean_token_accuracy": 0.856579378247261, | |
| "num_tokens": 9027749.0, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.23428326845169067, | |
| "learning_rate": 7.196896351636536e-06, | |
| "loss": 0.2305, | |
| "mean_token_accuracy": 0.8628283053636551, | |
| "num_tokens": 9186566.0, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7263922518159807, | |
| "grad_norm": 0.16487430036067963, | |
| "learning_rate": 6.643355001487321e-06, | |
| "loss": 0.2298, | |
| "mean_token_accuracy": 0.8545309662818908, | |
| "num_tokens": 9343040.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7263922518159807, | |
| "eval_loss": 0.24567341804504395, | |
| "eval_num_tokens": 9343040.0, | |
| "eval_runtime": 26.5992, | |
| "eval_samples_per_second": 10.038, | |
| "eval_steps_per_second": 10.038, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.738498789346247, | |
| "grad_norm": 0.1282430738210678, | |
| "learning_rate": 6.106936866981081e-06, | |
| "loss": 0.2249, | |
| "mean_token_accuracy": 0.8606575727462769, | |
| "num_tokens": 9499657.0, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7506053268765133, | |
| "grad_norm": 0.13524822890758514, | |
| "learning_rate": 5.588487992489113e-06, | |
| "loss": 0.2259, | |
| "mean_token_accuracy": 0.865332567691803, | |
| "num_tokens": 9654173.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7627118644067796, | |
| "grad_norm": 0.1036379262804985, | |
| "learning_rate": 5.088826081075191e-06, | |
| "loss": 0.2296, | |
| "mean_token_accuracy": 0.8487064689397812, | |
| "num_tokens": 9809053.0, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.774818401937046, | |
| "grad_norm": 0.20920903980731964, | |
| "learning_rate": 4.6087392048056934e-06, | |
| "loss": 0.2363, | |
| "mean_token_accuracy": 0.8600066721439361, | |
| "num_tokens": 9965376.0, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7869249394673123, | |
| "grad_norm": 0.2146104872226715, | |
| "learning_rate": 4.148984561793913e-06, | |
| "loss": 0.2303, | |
| "mean_token_accuracy": 0.8529395699501038, | |
| "num_tokens": 10119911.0, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7990314769975787, | |
| "grad_norm": 0.15396490693092346, | |
| "learning_rate": 3.7102872819392174e-06, | |
| "loss": 0.2298, | |
| "mean_token_accuracy": 0.8552716702222825, | |
| "num_tokens": 10277701.0, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8111380145278451, | |
| "grad_norm": 0.15627269446849823, | |
| "learning_rate": 3.2933392832444513e-06, | |
| "loss": 0.2277, | |
| "mean_token_accuracy": 0.8558280795812607, | |
| "num_tokens": 10434155.0, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8232445520581114, | |
| "grad_norm": 0.1806221306324005, | |
| "learning_rate": 2.898798180515523e-06, | |
| "loss": 0.2316, | |
| "mean_token_accuracy": 0.8524704337120056, | |
| "num_tokens": 10592288.0, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8353510895883777, | |
| "grad_norm": 0.15548691153526306, | |
| "learning_rate": 2.527286248164371e-06, | |
| "loss": 0.2343, | |
| "mean_token_accuracy": 0.8490294456481934, | |
| "num_tokens": 10748051.0, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 0.21228361129760742, | |
| "learning_rate": 2.179389438751151e-06, | |
| "loss": 0.2274, | |
| "mean_token_accuracy": 0.8586687803268432, | |
| "num_tokens": 10903879.0, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8595641646489104, | |
| "grad_norm": 0.1210569515824318, | |
| "learning_rate": 1.8556564588136477e-06, | |
| "loss": 0.2272, | |
| "mean_token_accuracy": 0.8536923497915268, | |
| "num_tokens": 11058830.0, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8716707021791767, | |
| "grad_norm": 0.12513045966625214, | |
| "learning_rate": 1.556597903441502e-06, | |
| "loss": 0.2322, | |
| "mean_token_accuracy": 0.8645375669002533, | |
| "num_tokens": 11214742.0, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8837772397094431, | |
| "grad_norm": 0.12597453594207764, | |
| "learning_rate": 1.2826854509602204e-06, | |
| "loss": 0.2257, | |
| "mean_token_accuracy": 0.8663272529840469, | |
| "num_tokens": 11371761.0, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8958837772397095, | |
| "grad_norm": 0.13700132071971893, | |
| "learning_rate": 1.0343511189951156e-06, | |
| "loss": 0.2226, | |
| "mean_token_accuracy": 0.862814399600029, | |
| "num_tokens": 11526770.0, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9079903147699758, | |
| "grad_norm": 0.14355961978435516, | |
| "learning_rate": 8.119865830885323e-07, | |
| "loss": 0.2285, | |
| "mean_token_accuracy": 0.8575877249240875, | |
| "num_tokens": 11683565.0, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9200968523002422, | |
| "grad_norm": 0.17410188913345337, | |
| "learning_rate": 6.159425589450137e-07, | |
| "loss": 0.2331, | |
| "mean_token_accuracy": 0.8473503857851028, | |
| "num_tokens": 11840016.0, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9322033898305084, | |
| "grad_norm": 0.10479779541492462, | |
| "learning_rate": 4.4652824927878805e-07, | |
| "loss": 0.2323, | |
| "mean_token_accuracy": 0.8583548158407212, | |
| "num_tokens": 11993687.0, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.9443099273607748, | |
| "grad_norm": 0.25644639134407043, | |
| "learning_rate": 3.040108561359608e-07, | |
| "loss": 0.2262, | |
| "mean_token_accuracy": 0.8624204069375991, | |
| "num_tokens": 12149690.0, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9564164648910412, | |
| "grad_norm": 0.10093547403812408, | |
| "learning_rate": 1.8861515946060807e-07, | |
| "loss": 0.2327, | |
| "mean_token_accuracy": 0.8606836467981338, | |
| "num_tokens": 12305420.0, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.9685230024213075, | |
| "grad_norm": 0.21478745341300964, | |
| "learning_rate": 1.0052316256947606e-07, | |
| "loss": 0.2312, | |
| "mean_token_accuracy": 0.8556828409433365, | |
| "num_tokens": 12461595.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9685230024213075, | |
| "eval_loss": 0.2455482929944992, | |
| "eval_num_tokens": 12461595.0, | |
| "eval_runtime": 26.6485, | |
| "eval_samples_per_second": 10.019, | |
| "eval_steps_per_second": 10.019, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9806295399515739, | |
| "grad_norm": 0.13691502809524536, | |
| "learning_rate": 3.987380509441307e-08, | |
| "loss": 0.2252, | |
| "mean_token_accuracy": 0.852449357509613, | |
| "num_tokens": 12617715.0, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9927360774818402, | |
| "grad_norm": 0.12197011709213257, | |
| "learning_rate": 6.76274384530412e-09, | |
| "loss": 0.2263, | |
| "mean_token_accuracy": 0.8597381263971329, | |
| "num_tokens": 12775223.0, | |
| "step": 820 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 826, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1877783380167885e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |