Text Generation
Transformers
PyTorch
llama
Generated from Trainer
open-r1
conversational
text-generation-inference
Instructions to use jaredfern/original-modified-seq with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use jaredfern/original-modified-seq with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="jaredfern/original-modified-seq") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("jaredfern/original-modified-seq") model = AutoModelForCausalLM.from_pretrained("jaredfern/original-modified-seq") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use jaredfern/original-modified-seq with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "jaredfern/original-modified-seq" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "jaredfern/original-modified-seq", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/jaredfern/original-modified-seq
- SGLang
How to use jaredfern/original-modified-seq with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "jaredfern/original-modified-seq" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "jaredfern/original-modified-seq", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "jaredfern/original-modified-seq" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "jaredfern/original-modified-seq", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use jaredfern/original-modified-seq with Docker Model Runner:
docker model run hf.co/jaredfern/original-modified-seq
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 212, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 305.40234375, | |
| "epoch": 0.009512485136741973, | |
| "grad_norm": 9.479197648905142, | |
| "kl": 1.3113021850585938e-05, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "reward": 2.353515625, | |
| "reward_std": 0.5666993586346507, | |
| "rewards/accuracy_reward": 0.46484375, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/influence_reward": 0.3828125, | |
| "rewards/len_reward": 0.568359375, | |
| "step": 1 | |
| }, | |
| { | |
| "completion_length": 303.59375, | |
| "epoch": 0.04756242568370987, | |
| "grad_norm": 5.559375419637804, | |
| "kl": 0.00265657901763916, | |
| "learning_rate": 1.818181818181818e-07, | |
| "loss": 0.0001, | |
| "reward": 2.5146484375, | |
| "reward_std": 0.530997786205262, | |
| "rewards/accuracy_reward": 0.54736328125, | |
| "rewards/format_reward": 0.9443359375, | |
| "rewards/influence_reward": 0.42724609375, | |
| "rewards/len_reward": 0.595703125, | |
| "step": 5 | |
| }, | |
| { | |
| "completion_length": 301.755859375, | |
| "epoch": 0.09512485136741974, | |
| "grad_norm": 7.509497472122595, | |
| "kl": 0.010486793518066407, | |
| "learning_rate": 4.090909090909091e-07, | |
| "loss": 0.0004, | |
| "reward": 2.534765625, | |
| "reward_std": 0.5271333329379558, | |
| "rewards/accuracy_reward": 0.542578125, | |
| "rewards/format_reward": 0.95234375, | |
| "rewards/influence_reward": 0.441015625, | |
| "rewards/len_reward": 0.598828125, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 308.83046875, | |
| "epoch": 0.1426872770511296, | |
| "grad_norm": 9.5559930417524, | |
| "kl": 0.3596527099609375, | |
| "learning_rate": 6.363636363636363e-07, | |
| "loss": 0.0144, | |
| "reward": 2.4265625, | |
| "reward_std": 0.4884017549455166, | |
| "rewards/accuracy_reward": 0.512109375, | |
| "rewards/format_reward": 0.95625, | |
| "rewards/influence_reward": 0.419140625, | |
| "rewards/len_reward": 0.5390625, | |
| "step": 15 | |
| }, | |
| { | |
| "completion_length": 309.508984375, | |
| "epoch": 0.1902497027348395, | |
| "grad_norm": 6.575100009019049, | |
| "kl": 1.8339599609375, | |
| "learning_rate": 8.636363636363636e-07, | |
| "loss": 0.0734, | |
| "reward": 2.483984375, | |
| "reward_std": 0.4793000495061278, | |
| "rewards/accuracy_reward": 0.534765625, | |
| "rewards/format_reward": 0.971875, | |
| "rewards/influence_reward": 0.444921875, | |
| "rewards/len_reward": 0.532421875, | |
| "step": 20 | |
| }, | |
| { | |
| "completion_length": 302.12109375, | |
| "epoch": 0.23781212841854935, | |
| "grad_norm": 3.8082898220241357, | |
| "kl": 1.6882080078125, | |
| "learning_rate": 9.99726628670463e-07, | |
| "loss": 0.0675, | |
| "reward": 2.571484375, | |
| "reward_std": 0.46187512911856177, | |
| "rewards/accuracy_reward": 0.56015625, | |
| "rewards/format_reward": 0.97578125, | |
| "rewards/influence_reward": 0.478125, | |
| "rewards/len_reward": 0.557421875, | |
| "step": 25 | |
| }, | |
| { | |
| "completion_length": 285.939453125, | |
| "epoch": 0.2853745541022592, | |
| "grad_norm": 5.417820371147177, | |
| "kl": 1.641796875, | |
| "learning_rate": 9.966546331768192e-07, | |
| "loss": 0.0657, | |
| "reward": 2.583984375, | |
| "reward_std": 0.48238000813871623, | |
| "rewards/accuracy_reward": 0.5625, | |
| "rewards/format_reward": 0.985546875, | |
| "rewards/influence_reward": 0.465625, | |
| "rewards/len_reward": 0.5703125, | |
| "step": 30 | |
| }, | |
| { | |
| "completion_length": 295.471875, | |
| "epoch": 0.3329369797859691, | |
| "grad_norm": 3.3934355489448493, | |
| "kl": 1.362841796875, | |
| "learning_rate": 9.901899829374047e-07, | |
| "loss": 0.0545, | |
| "reward": 2.55234375, | |
| "reward_std": 0.48458676002919676, | |
| "rewards/accuracy_reward": 0.516796875, | |
| "rewards/format_reward": 0.9796875, | |
| "rewards/influence_reward": 0.429296875, | |
| "rewards/len_reward": 0.6265625, | |
| "step": 35 | |
| }, | |
| { | |
| "completion_length": 295.90390625, | |
| "epoch": 0.380499405469679, | |
| "grad_norm": 4.1900410232582335, | |
| "kl": 1.5516845703125, | |
| "learning_rate": 9.803768380684242e-07, | |
| "loss": 0.0621, | |
| "reward": 2.50546875, | |
| "reward_std": 0.46826295778155325, | |
| "rewards/accuracy_reward": 0.50703125, | |
| "rewards/format_reward": 0.97578125, | |
| "rewards/influence_reward": 0.428125, | |
| "rewards/len_reward": 0.59453125, | |
| "step": 40 | |
| }, | |
| { | |
| "completion_length": 292.028515625, | |
| "epoch": 0.4280618311533888, | |
| "grad_norm": 7.946193406732366, | |
| "kl": 1.88642578125, | |
| "learning_rate": 9.672822322997304e-07, | |
| "loss": 0.0754, | |
| "reward": 2.47421875, | |
| "reward_std": 0.45962891932576894, | |
| "rewards/accuracy_reward": 0.49296875, | |
| "rewards/format_reward": 0.982421875, | |
| "rewards/influence_reward": 0.411328125, | |
| "rewards/len_reward": 0.5875, | |
| "step": 45 | |
| }, | |
| { | |
| "completion_length": 292.873046875, | |
| "epoch": 0.4756242568370987, | |
| "grad_norm": 7.492938961448867, | |
| "kl": 1.96015625, | |
| "learning_rate": 9.509956150664795e-07, | |
| "loss": 0.0784, | |
| "reward": 2.596484375, | |
| "reward_std": 0.460917086713016, | |
| "rewards/accuracy_reward": 0.52578125, | |
| "rewards/format_reward": 0.982421875, | |
| "rewards/influence_reward": 0.4453125, | |
| "rewards/len_reward": 0.64296875, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 286.85859375, | |
| "epoch": 0.5231866825208086, | |
| "grad_norm": 3.5519428103835224, | |
| "kl": 1.998779296875, | |
| "learning_rate": 9.316282404787869e-07, | |
| "loss": 0.0799, | |
| "reward": 2.530078125, | |
| "reward_std": 0.43998180609196424, | |
| "rewards/accuracy_reward": 0.506640625, | |
| "rewards/format_reward": 0.973828125, | |
| "rewards/influence_reward": 0.43046875, | |
| "rewards/len_reward": 0.619140625, | |
| "step": 55 | |
| }, | |
| { | |
| "completion_length": 284.312890625, | |
| "epoch": 0.5707491082045184, | |
| "grad_norm": 3.7968869284274014, | |
| "kl": 2.133349609375, | |
| "learning_rate": 9.093124073433462e-07, | |
| "loss": 0.0854, | |
| "reward": 2.521875, | |
| "reward_std": 0.5203634534031153, | |
| "rewards/accuracy_reward": 0.506640625, | |
| "rewards/format_reward": 0.979296875, | |
| "rewards/influence_reward": 0.419140625, | |
| "rewards/len_reward": 0.616796875, | |
| "step": 60 | |
| }, | |
| { | |
| "completion_length": 275.38203125, | |
| "epoch": 0.6183115338882283, | |
| "grad_norm": 4.3717453671021165, | |
| "kl": 2.27578125, | |
| "learning_rate": 8.842005554284295e-07, | |
| "loss": 0.091, | |
| "reward": 2.609765625, | |
| "reward_std": 0.4997987896203995, | |
| "rewards/accuracy_reward": 0.539453125, | |
| "rewards/format_reward": 0.983203125, | |
| "rewards/influence_reward": 0.4578125, | |
| "rewards/len_reward": 0.629296875, | |
| "step": 65 | |
| }, | |
| { | |
| "completion_length": 271.71171875, | |
| "epoch": 0.6658739595719382, | |
| "grad_norm": 5.274292147592828, | |
| "kl": 2.325927734375, | |
| "learning_rate": 8.564642241456986e-07, | |
| "loss": 0.093, | |
| "reward": 2.628515625, | |
| "reward_std": 0.47792479060590265, | |
| "rewards/accuracy_reward": 0.543359375, | |
| "rewards/format_reward": 0.9765625, | |
| "rewards/influence_reward": 0.4640625, | |
| "rewards/len_reward": 0.64453125, | |
| "step": 70 | |
| }, | |
| { | |
| "completion_length": 272.53828125, | |
| "epoch": 0.713436385255648, | |
| "grad_norm": 4.422045761319486, | |
| "kl": 2.280224609375, | |
| "learning_rate": 8.262928807620843e-07, | |
| "loss": 0.0912, | |
| "reward": 2.548046875, | |
| "reward_std": 0.504490308649838, | |
| "rewards/accuracy_reward": 0.511328125, | |
| "rewards/format_reward": 0.98046875, | |
| "rewards/influence_reward": 0.433203125, | |
| "rewards/len_reward": 0.623046875, | |
| "step": 75 | |
| }, | |
| { | |
| "completion_length": 274.9234375, | |
| "epoch": 0.760998810939358, | |
| "grad_norm": 3.788180136247375, | |
| "kl": 2.3419921875, | |
| "learning_rate": 7.938926261462365e-07, | |
| "loss": 0.0937, | |
| "reward": 2.491796875, | |
| "reward_std": 0.47606104165315627, | |
| "rewards/accuracy_reward": 0.484375, | |
| "rewards/format_reward": 0.980859375, | |
| "rewards/influence_reward": 0.40546875, | |
| "rewards/len_reward": 0.62109375, | |
| "step": 80 | |
| }, | |
| { | |
| "completion_length": 284.03671875, | |
| "epoch": 0.8085612366230678, | |
| "grad_norm": 2.828523061717255, | |
| "kl": 2.372216796875, | |
| "learning_rate": 7.594847868906076e-07, | |
| "loss": 0.0949, | |
| "reward": 2.5171875, | |
| "reward_std": 0.4772877097129822, | |
| "rewards/accuracy_reward": 0.4734375, | |
| "rewards/format_reward": 0.98046875, | |
| "rewards/influence_reward": 0.40390625, | |
| "rewards/len_reward": 0.659375, | |
| "step": 85 | |
| }, | |
| { | |
| "completion_length": 280.916796875, | |
| "epoch": 0.8561236623067776, | |
| "grad_norm": 607.3521812300567, | |
| "kl": 2.50087890625, | |
| "learning_rate": 7.233044034264033e-07, | |
| "loss": 0.1001, | |
| "reward": 2.506640625, | |
| "reward_std": 0.46796532850712536, | |
| "rewards/accuracy_reward": 0.495703125, | |
| "rewards/format_reward": 0.9734375, | |
| "rewards/influence_reward": 0.42109375, | |
| "rewards/len_reward": 0.61640625, | |
| "step": 90 | |
| }, | |
| { | |
| "completion_length": 289.612890625, | |
| "epoch": 0.9036860879904876, | |
| "grad_norm": 2.4261586696137574, | |
| "kl": 2.483837890625, | |
| "learning_rate": 6.855986244591103e-07, | |
| "loss": 0.0994, | |
| "reward": 2.565625, | |
| "reward_std": 0.4893400952219963, | |
| "rewards/accuracy_reward": 0.525, | |
| "rewards/format_reward": 0.977734375, | |
| "rewards/influence_reward": 0.4375, | |
| "rewards/len_reward": 0.625390625, | |
| "step": 95 | |
| }, | |
| { | |
| "completion_length": 286.48984375, | |
| "epoch": 0.9512485136741974, | |
| "grad_norm": 4.005939428029324, | |
| "kl": 2.36318359375, | |
| "learning_rate": 6.466250186922324e-07, | |
| "loss": 0.0945, | |
| "reward": 2.48828125, | |
| "reward_std": 0.45276672914624216, | |
| "rewards/accuracy_reward": 0.488671875, | |
| "rewards/format_reward": 0.978515625, | |
| "rewards/influence_reward": 0.403515625, | |
| "rewards/len_reward": 0.617578125, | |
| "step": 100 | |
| }, | |
| { | |
| "completion_length": 288.877734375, | |
| "epoch": 0.9988109393579072, | |
| "grad_norm": 3.7990751409015475, | |
| "kl": 2.427783203125, | |
| "learning_rate": 6.066498153718734e-07, | |
| "loss": 0.0971, | |
| "reward": 2.457421875, | |
| "reward_std": 0.5042316474020481, | |
| "rewards/accuracy_reward": 0.47734375, | |
| "rewards/format_reward": 0.9734375, | |
| "rewards/influence_reward": 0.380859375, | |
| "rewards/len_reward": 0.62578125, | |
| "step": 105 | |
| }, | |
| { | |
| "completion_length": 274.04876893939394, | |
| "epoch": 1.0380499405469679, | |
| "grad_norm": 22.164672338850448, | |
| "kl": 2.5658735795454546, | |
| "learning_rate": 5.659460856710345e-07, | |
| "loss": 0.1004, | |
| "reward": 2.5634469696969697, | |
| "reward_std": 0.5097437008763804, | |
| "rewards/accuracy_reward": 0.5350378787878788, | |
| "rewards/format_reward": 0.9772727272727273, | |
| "rewards/influence_reward": 0.4308712121212121, | |
| "rewards/len_reward": 0.6202651515151515, | |
| "step": 110 | |
| }, | |
| { | |
| "completion_length": 275.455078125, | |
| "epoch": 1.0856123662306778, | |
| "grad_norm": 3.2333169625918923, | |
| "kl": 2.7962890625, | |
| "learning_rate": 5.247918773366111e-07, | |
| "loss": 0.1119, | |
| "reward": 2.575390625, | |
| "reward_std": 0.4654921619221568, | |
| "rewards/accuracy_reward": 0.5125, | |
| "rewards/format_reward": 0.984375, | |
| "rewards/influence_reward": 0.426953125, | |
| "rewards/len_reward": 0.6515625, | |
| "step": 115 | |
| }, | |
| { | |
| "completion_length": 289.22265625, | |
| "epoch": 1.1331747919143877, | |
| "grad_norm": 6.3823521689302085, | |
| "kl": 2.52705078125, | |
| "learning_rate": 4.834683153413459e-07, | |
| "loss": 0.1011, | |
| "reward": 2.573046875, | |
| "reward_std": 0.4989847050979733, | |
| "rewards/accuracy_reward": 0.515625, | |
| "rewards/format_reward": 0.971875, | |
| "rewards/influence_reward": 0.41875, | |
| "rewards/len_reward": 0.666796875, | |
| "step": 120 | |
| }, | |
| { | |
| "completion_length": 269.36796875, | |
| "epoch": 1.1807372175980975, | |
| "grad_norm": 9.314483672694436, | |
| "kl": 2.718115234375, | |
| "learning_rate": 4.4225768151520694e-07, | |
| "loss": 0.1087, | |
| "reward": 2.595703125, | |
| "reward_std": 0.4847894934937358, | |
| "rewards/accuracy_reward": 0.530078125, | |
| "rewards/format_reward": 0.975, | |
| "rewards/influence_reward": 0.440625, | |
| "rewards/len_reward": 0.65, | |
| "step": 125 | |
| }, | |
| { | |
| "completion_length": 272.870703125, | |
| "epoch": 1.2282996432818074, | |
| "grad_norm": 9.964810062893706, | |
| "kl": 2.692333984375, | |
| "learning_rate": 4.0144148627425986e-07, | |
| "loss": 0.1077, | |
| "reward": 2.548828125, | |
| "reward_std": 0.4715102421119809, | |
| "rewards/accuracy_reward": 0.50546875, | |
| "rewards/format_reward": 0.979296875, | |
| "rewards/influence_reward": 0.421875, | |
| "rewards/len_reward": 0.6421875, | |
| "step": 130 | |
| }, | |
| { | |
| "completion_length": 271.344921875, | |
| "epoch": 1.2758620689655173, | |
| "grad_norm": 4.882054754352994, | |
| "kl": 2.486865234375, | |
| "learning_rate": 3.612985456190778e-07, | |
| "loss": 0.0995, | |
| "reward": 2.56328125, | |
| "reward_std": 0.47647856548428535, | |
| "rewards/accuracy_reward": 0.51328125, | |
| "rewards/format_reward": 0.978515625, | |
| "rewards/influence_reward": 0.422265625, | |
| "rewards/len_reward": 0.64921875, | |
| "step": 135 | |
| }, | |
| { | |
| "completion_length": 274.757421875, | |
| "epoch": 1.323424494649227, | |
| "grad_norm": 4.091037506852225, | |
| "kl": 2.571435546875, | |
| "learning_rate": 3.221030765387417e-07, | |
| "loss": 0.1029, | |
| "reward": 2.5890625, | |
| "reward_std": 0.47138190008699893, | |
| "rewards/accuracy_reward": 0.512109375, | |
| "rewards/format_reward": 0.978515625, | |
| "rewards/influence_reward": 0.434765625, | |
| "rewards/len_reward": 0.663671875, | |
| "step": 140 | |
| }, | |
| { | |
| "completion_length": 273.21875, | |
| "epoch": 1.370986920332937, | |
| "grad_norm": 3.923917161371445, | |
| "kl": 2.58369140625, | |
| "learning_rate": 2.841228238307536e-07, | |
| "loss": 0.1033, | |
| "reward": 2.52421875, | |
| "reward_std": 0.48247870467603204, | |
| "rewards/accuracy_reward": 0.496875, | |
| "rewards/format_reward": 0.973046875, | |
| "rewards/influence_reward": 0.407421875, | |
| "rewards/len_reward": 0.646875, | |
| "step": 145 | |
| }, | |
| { | |
| "completion_length": 270.95703125, | |
| "epoch": 1.418549346016647, | |
| "grad_norm": 3.288471428585341, | |
| "kl": 2.6228515625, | |
| "learning_rate": 2.476172311325783e-07, | |
| "loss": 0.1049, | |
| "reward": 2.5703125, | |
| "reward_std": 0.530765401944518, | |
| "rewards/accuracy_reward": 0.52265625, | |
| "rewards/format_reward": 0.975390625, | |
| "rewards/influence_reward": 0.416796875, | |
| "rewards/len_reward": 0.65546875, | |
| "step": 150 | |
| }, | |
| { | |
| "completion_length": 276.080859375, | |
| "epoch": 1.4661117717003567, | |
| "grad_norm": 5.406976549541725, | |
| "kl": 3.496875, | |
| "learning_rate": 2.128356686585282e-07, | |
| "loss": 0.1399, | |
| "reward": 2.603515625, | |
| "reward_std": 0.5087036734446884, | |
| "rewards/accuracy_reward": 0.539453125, | |
| "rewards/format_reward": 0.976171875, | |
| "rewards/influence_reward": 0.444140625, | |
| "rewards/len_reward": 0.64375, | |
| "step": 155 | |
| }, | |
| { | |
| "completion_length": 283.583203125, | |
| "epoch": 1.5136741973840666, | |
| "grad_norm": 2.4719135321159986, | |
| "kl": 2.696875, | |
| "learning_rate": 1.8001572974834168e-07, | |
| "loss": 0.1079, | |
| "reward": 2.537109375, | |
| "reward_std": 0.4965396413579583, | |
| "rewards/accuracy_reward": 0.495703125, | |
| "rewards/format_reward": 0.97890625, | |
| "rewards/influence_reward": 0.4078125, | |
| "rewards/len_reward": 0.6546875, | |
| "step": 160 | |
| }, | |
| { | |
| "completion_length": 281.8515625, | |
| "epoch": 1.5612366230677766, | |
| "grad_norm": 6.845144411654133, | |
| "kl": 2.631396484375, | |
| "learning_rate": 1.493816078637557e-07, | |
| "loss": 0.1052, | |
| "reward": 2.54296875, | |
| "reward_std": 0.48724669627845285, | |
| "rewards/accuracy_reward": 0.501171875, | |
| "rewards/format_reward": 0.97421875, | |
| "rewards/influence_reward": 0.41015625, | |
| "rewards/len_reward": 0.657421875, | |
| "step": 165 | |
| }, | |
| { | |
| "completion_length": 276.32578125, | |
| "epoch": 1.6087990487514863, | |
| "grad_norm": 2.820753870764231, | |
| "kl": 2.634814453125, | |
| "learning_rate": 1.2114256511983274e-07, | |
| "loss": 0.1054, | |
| "reward": 2.5515625, | |
| "reward_std": 0.492490841075778, | |
| "rewards/accuracy_reward": 0.501171875, | |
| "rewards/format_reward": 0.978125, | |
| "rewards/influence_reward": 0.408203125, | |
| "rewards/len_reward": 0.6640625, | |
| "step": 170 | |
| }, | |
| { | |
| "completion_length": 282.89296875, | |
| "epoch": 1.6563614744351962, | |
| "grad_norm": 3.065824553875067, | |
| "kl": 2.65234375, | |
| "learning_rate": 9.549150281252632e-08, | |
| "loss": 0.1061, | |
| "reward": 2.530078125, | |
| "reward_std": 0.4955192942172289, | |
| "rewards/accuracy_reward": 0.5, | |
| "rewards/format_reward": 0.973046875, | |
| "rewards/influence_reward": 0.4046875, | |
| "rewards/len_reward": 0.65234375, | |
| "step": 175 | |
| }, | |
| { | |
| "completion_length": 274.28359375, | |
| "epoch": 1.7039239001189062, | |
| "grad_norm": 3.281579185573704, | |
| "kl": 2.66640625, | |
| "learning_rate": 7.260364370723043e-08, | |
| "loss": 0.1066, | |
| "reward": 2.540625, | |
| "reward_std": 0.45585995763540266, | |
| "rewards/accuracy_reward": 0.504296875, | |
| "rewards/format_reward": 0.98203125, | |
| "rewards/influence_reward": 0.420703125, | |
| "rewards/len_reward": 0.63359375, | |
| "step": 180 | |
| }, | |
| { | |
| "completion_length": 282.63828125, | |
| "epoch": 1.7514863258026159, | |
| "grad_norm": 7.835917126431856, | |
| "kl": 2.7076171875, | |
| "learning_rate": 5.263533508961826e-08, | |
| "loss": 0.1083, | |
| "reward": 2.524609375, | |
| "reward_std": 0.49459295999258757, | |
| "rewards/accuracy_reward": 0.486328125, | |
| "rewards/format_reward": 0.97578125, | |
| "rewards/influence_reward": 0.405859375, | |
| "rewards/len_reward": 0.656640625, | |
| "step": 185 | |
| }, | |
| { | |
| "completion_length": 279.878515625, | |
| "epoch": 1.7990487514863258, | |
| "grad_norm": 3.407510264782874, | |
| "kl": 2.6724609375, | |
| "learning_rate": 3.572298075514652e-08, | |
| "loss": 0.1069, | |
| "reward": 2.572265625, | |
| "reward_std": 0.48079199306666853, | |
| "rewards/accuracy_reward": 0.5140625, | |
| "rewards/format_reward": 0.980859375, | |
| "rewards/influence_reward": 0.436328125, | |
| "rewards/len_reward": 0.641015625, | |
| "step": 190 | |
| }, | |
| { | |
| "completion_length": 280.059375, | |
| "epoch": 1.8466111771700358, | |
| "grad_norm": 20.679733097388006, | |
| "kl": 2.6583984375, | |
| "learning_rate": 2.1982109232821176e-08, | |
| "loss": 0.1063, | |
| "reward": 2.49453125, | |
| "reward_std": 0.4927243089303374, | |
| "rewards/accuracy_reward": 0.498828125, | |
| "rewards/format_reward": 0.97734375, | |
| "rewards/influence_reward": 0.403125, | |
| "rewards/len_reward": 0.615234375, | |
| "step": 195 | |
| }, | |
| { | |
| "completion_length": 278.7625, | |
| "epoch": 1.8941736028537455, | |
| "grad_norm": 2.48795349080197, | |
| "kl": 2.6826171875, | |
| "learning_rate": 1.1506584608200364e-08, | |
| "loss": 0.1073, | |
| "reward": 2.60546875, | |
| "reward_std": 0.4754039028659463, | |
| "rewards/accuracy_reward": 0.536328125, | |
| "rewards/format_reward": 0.981640625, | |
| "rewards/influence_reward": 0.446875, | |
| "rewards/len_reward": 0.640625, | |
| "step": 200 | |
| }, | |
| { | |
| "completion_length": 275.9921875, | |
| "epoch": 1.9417360285374554, | |
| "grad_norm": 2.346124453455794, | |
| "kl": 2.770361328125, | |
| "learning_rate": 4.367965336512403e-09, | |
| "loss": 0.1108, | |
| "reward": 2.6203125, | |
| "reward_std": 0.5024769959971309, | |
| "rewards/accuracy_reward": 0.5328125, | |
| "rewards/format_reward": 0.9796875, | |
| "rewards/influence_reward": 0.45, | |
| "rewards/len_reward": 0.6578125, | |
| "step": 205 | |
| }, | |
| { | |
| "completion_length": 280.690234375, | |
| "epoch": 1.9892984542211654, | |
| "grad_norm": 2.0191058844209615, | |
| "kl": 2.69677734375, | |
| "learning_rate": 6.150154258476314e-10, | |
| "loss": 0.1079, | |
| "reward": 2.544140625, | |
| "reward_std": 0.5300922216847539, | |
| "rewards/accuracy_reward": 0.4984375, | |
| "rewards/format_reward": 0.980078125, | |
| "rewards/influence_reward": 0.40703125, | |
| "rewards/len_reward": 0.65859375, | |
| "step": 210 | |
| }, | |
| { | |
| "completion_length": 299.63368055555554, | |
| "epoch": 2.0, | |
| "kl": 2.6888020833333335, | |
| "reward": 2.5711805555555554, | |
| "reward_std": 0.5023611783981323, | |
| "rewards/accuracy_reward": 0.5399305555555556, | |
| "rewards/format_reward": 0.9652777777777778, | |
| "rewards/influence_reward": 0.4131944444444444, | |
| "rewards/len_reward": 0.6527777777777778, | |
| "step": 212, | |
| "total_flos": 0.0, | |
| "train_loss": 0.0899008925090421, | |
| "train_runtime": 180643.6048, | |
| "train_samples_per_second": 0.149, | |
| "train_steps_per_second": 0.001 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 212, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |