diff --git a/.gitattributes b/.gitattributes index 11c9c0eb35153f80ca73dea4c86dcc324da9da50..4b2facc4f8ae70c2ea08661a821bd054eaa45757 100644 --- a/.gitattributes +++ b/.gitattributes @@ -38,3 +38,14 @@ gemma-2b-dpo/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-9b-dpo/tokenizer.json filter=lfs diff=lfs merge=lfs -text gguf/gemma-2b-dpo-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text gguf/gemma-2b-distilled-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-350/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-450/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-dpo/checkpoint-540/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-2b-dpo/README.md b/gemma-2b-dpo/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a5d41c95857125509ad7e5ad4027bdf78c9187bb --- /dev/null +++ b/gemma-2b-dpo/README.md @@ -0,0 +1,72 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +model_name: gemma-2b-dpo-600 +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +licence: license +pipeline_tag: text-generation +--- + +# Model Card for gemma-2b-dpo-600 + +This model is a fine-tuned version of [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + + + + +This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290). + +### Framework versions + +- PEFT 0.18.1 +- TRL: 0.28.0 +- Transformers: 5.2.0 +- Pytorch: 2.5.1+cu124 +- Datasets: 4.5.0 +- Tokenizers: 0.22.2 + +## Citations + +Cite DPO as: + +```bibtex +@inproceedings{rafailov2023direct, + title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}}, + author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn}, + year = 2023, + booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, + url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html}, + editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, +} +``` + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-100/README.md b/gemma-2b-dpo/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-100/adapter_config.json b/gemma-2b-dpo/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-100/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f0d1f9b0a3f212601c77f02b02daa51e349d295 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec2ebd73f8cbaa2e4529e7145398cf6eab41cc20ab84f8803b6740f2b62d3cd9 +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-100/chat_template.jinja b/gemma-2b-dpo/checkpoint-100/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-100/optimizer.pt b/gemma-2b-dpo/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b2e3b5eca63077cdb918f809fd95dbc5d40d0a3 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4121779bcb5b52c4e6bf9f35c31b434b1d3e5da9512b366d7ddf549ba0d6843 +size 42616388 diff --git a/gemma-2b-dpo/checkpoint-100/rng_state.pth b/gemma-2b-dpo/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..897ad19948758b40c785c678c136c4029433a27a --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d9cd6a0487226e5bd30d1846894c82af483733ab4381b75bae9c0745e05d405 +size 14244 diff --git a/gemma-2b-dpo/checkpoint-100/scheduler.pt b/gemma-2b-dpo/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc67d71588dc2e73bc5a5eec2043123455b06bd7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59ed8b62efe8bca262c392a5e54068f61391c5a2eaa96781fe40ad3af0958511 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-100/tokenizer.json b/gemma-2b-dpo/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-100/tokenizer_config.json b/gemma-2b-dpo/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-100/trainer_state.json b/gemma-2b-dpo/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cb7e2e4454d9b6df89170ea4911b00bd37bda8a0 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/trainer_state.json @@ -0,0 +1,334 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5578800557880056, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-100/training_args.bin b/gemma-2b-dpo/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-150/README.md b/gemma-2b-dpo/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-150/adapter_config.json b/gemma-2b-dpo/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-150/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..646a346eff77b799c56ef9965b597a1eca7907eb --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0e7281ac038a3a06b5f6bdf4372abd75e6c056a4533e9e5e306ad3eed5008bb +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-150/chat_template.jinja b/gemma-2b-dpo/checkpoint-150/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-150/optimizer.pt b/gemma-2b-dpo/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e576c07489992d352801bea969036520e12d06b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af476b3fe04731657a48b4630d7070fac2df55bd08f5de1eed63298e735a9b38 +size 42616388 diff --git a/gemma-2b-dpo/checkpoint-150/rng_state.pth b/gemma-2b-dpo/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..897ad19948758b40c785c678c136c4029433a27a --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d9cd6a0487226e5bd30d1846894c82af483733ab4381b75bae9c0745e05d405 +size 14244 diff --git a/gemma-2b-dpo/checkpoint-150/scheduler.pt b/gemma-2b-dpo/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b7826ba8ee780cd03a618cdcbc1da54780a9cc0 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25dd7b08411f6698ef9ec14f5080c8865e627e9cdd16cc854ed216ed294c1f45 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-150/tokenizer.json b/gemma-2b-dpo/checkpoint-150/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-150/tokenizer_config.json b/gemma-2b-dpo/checkpoint-150/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-150/trainer_state.json b/gemma-2b-dpo/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..be1a312e378135a7346d4ccceb079c6bf3546bf7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/trainer_state.json @@ -0,0 +1,484 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8368200836820083, + "eval_steps": 500, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + }, + { + "epoch": 0.5857740585774058, + "grad_norm": 2.6023478507995605, + "learning_rate": 4.485596707818931e-06, + "logits/chosen": -6.196796894073486, + "logits/rejected": -6.226622104644775, + "logps/chosen": -442.52685546875, + "logps/rejected": -516.7334594726562, + "loss": 0.6245638847351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18519389629364014, + "rewards/margins": 0.24079546332359314, + "rewards/rejected": -0.42598938941955566, + "step": 105 + }, + { + "epoch": 0.6136680613668062, + "grad_norm": 2.0638511180877686, + "learning_rate": 4.434156378600823e-06, + "logits/chosen": -6.1991071701049805, + "logits/rejected": -6.119466781616211, + "logps/chosen": -410.86669921875, + "logps/rejected": -450.365478515625, + "loss": 0.6201879501342773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0853937491774559, + "rewards/margins": 0.17646726965904236, + "rewards/rejected": -0.26186102628707886, + "step": 110 + }, + { + "epoch": 0.6415620641562064, + "grad_norm": 2.3625364303588867, + "learning_rate": 4.382716049382716e-06, + "logits/chosen": -6.220386505126953, + "logits/rejected": -6.223449230194092, + "logps/chosen": -435.92626953125, + "logps/rejected": -495.6065368652344, + "loss": 0.6151515483856201, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2208656519651413, + "rewards/margins": 0.21172885596752167, + "rewards/rejected": -0.43259453773498535, + "step": 115 + }, + { + "epoch": 0.6694560669456067, + "grad_norm": 1.8082666397094727, + "learning_rate": 4.331275720164609e-06, + "logits/chosen": -6.262181282043457, + "logits/rejected": -6.250016212463379, + "logps/chosen": -354.21795654296875, + "logps/rejected": -389.14556884765625, + "loss": 0.6109379768371582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17473874986171722, + "rewards/margins": 0.19315743446350098, + "rewards/rejected": -0.367896169424057, + "step": 120 + }, + { + "epoch": 0.697350069735007, + "grad_norm": 1.9556658267974854, + "learning_rate": 4.2798353909465025e-06, + "logits/chosen": -6.167700290679932, + "logits/rejected": -6.1421003341674805, + "logps/chosen": -379.1827392578125, + "logps/rejected": -426.69549560546875, + "loss": 0.6202447414398193, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17535170912742615, + "rewards/margins": 0.18863627314567566, + "rewards/rejected": -0.3639879822731018, + "step": 125 + }, + { + "epoch": 0.7252440725244073, + "grad_norm": 3.001298666000366, + "learning_rate": 4.228395061728396e-06, + "logits/chosen": -6.2535905838012695, + "logits/rejected": -6.232400894165039, + "logps/chosen": -424.8458557128906, + "logps/rejected": -494.52960205078125, + "loss": 0.5493914127349854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22046081721782684, + "rewards/margins": 0.3785194754600525, + "rewards/rejected": -0.5989803075790405, + "step": 130 + }, + { + "epoch": 0.7531380753138075, + "grad_norm": 2.5210413932800293, + "learning_rate": 4.176954732510288e-06, + "logits/chosen": -6.078260898590088, + "logits/rejected": -6.0126447677612305, + "logps/chosen": -414.69940185546875, + "logps/rejected": -432.3282165527344, + "loss": 0.579456901550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15084640681743622, + "rewards/margins": 0.2960701882839203, + "rewards/rejected": -0.4469165802001953, + "step": 135 + }, + { + "epoch": 0.7810320781032078, + "grad_norm": 2.6807265281677246, + "learning_rate": 4.125514403292181e-06, + "logits/chosen": -6.243051052093506, + "logits/rejected": -6.220357418060303, + "logps/chosen": -400.6156311035156, + "logps/rejected": -450.0393981933594, + "loss": 0.5514531135559082, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25727975368499756, + "rewards/margins": 0.3991738259792328, + "rewards/rejected": -0.6564534902572632, + "step": 140 + }, + { + "epoch": 0.8089260808926081, + "grad_norm": 2.4137353897094727, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.186558246612549, + "logits/rejected": -6.139374256134033, + "logps/chosen": -442.314453125, + "logps/rejected": -491.584716796875, + "loss": 0.5633067131042481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35670942068099976, + "rewards/margins": 0.38038796186447144, + "rewards/rejected": -0.7370973825454712, + "step": 145 + }, + { + "epoch": 0.8368200836820083, + "grad_norm": 2.1043145656585693, + "learning_rate": 4.022633744855967e-06, + "logits/chosen": -6.177689552307129, + "logits/rejected": -6.167322635650635, + "logps/chosen": -435.2288513183594, + "logps/rejected": -469.41436767578125, + "loss": 0.5640112876892089, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.32313138246536255, + "rewards/margins": 0.45979684591293335, + "rewards/rejected": -0.7829282283782959, + "step": 150 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-150/training_args.bin b/gemma-2b-dpo/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-200/README.md b/gemma-2b-dpo/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-200/adapter_config.json b/gemma-2b-dpo/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-200/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab11b046a31bf4120ab1c58137ae2eab3aaebc8d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a8b1c232692070243d2eea541206d224f59667647d540f141eb6ffc01921c7 +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-200/chat_template.jinja b/gemma-2b-dpo/checkpoint-200/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-200/optimizer.pt b/gemma-2b-dpo/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e05e42ef2b8368cdbcfefe5b39f86c28d7a3ba5 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dc2a947fbcec57f9eb4155feb8616e481574b23f6236819610628b8cc9fa67f +size 42616388 diff --git a/gemma-2b-dpo/checkpoint-200/rng_state.pth b/gemma-2b-dpo/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ece67508ffa7f89d1f8e8b4e514d0551447e32a9 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b66e3cc7c452b707ddac5caf0aa17618afb9bc1a0333600a22c4afb353f3165 +size 14244 diff --git a/gemma-2b-dpo/checkpoint-200/scheduler.pt b/gemma-2b-dpo/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb43ba26d02e4535977ec25ae69e04e18d3cf8ae --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b8fddcda8c1fe5171026971b2f6b7f66207f007bd5795edf244b841b0d1519 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-200/tokenizer.json b/gemma-2b-dpo/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-200/tokenizer_config.json b/gemma-2b-dpo/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-200/trainer_state.json b/gemma-2b-dpo/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..134e5ede922b688e47926fbcf88a2dda3fafa05b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1115760111576012, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + }, + { + "epoch": 0.5857740585774058, + "grad_norm": 2.6023478507995605, + "learning_rate": 4.485596707818931e-06, + "logits/chosen": -6.196796894073486, + "logits/rejected": -6.226622104644775, + "logps/chosen": -442.52685546875, + "logps/rejected": -516.7334594726562, + "loss": 0.6245638847351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18519389629364014, + "rewards/margins": 0.24079546332359314, + "rewards/rejected": -0.42598938941955566, + "step": 105 + }, + { + "epoch": 0.6136680613668062, + "grad_norm": 2.0638511180877686, + "learning_rate": 4.434156378600823e-06, + "logits/chosen": -6.1991071701049805, + "logits/rejected": -6.119466781616211, + "logps/chosen": -410.86669921875, + "logps/rejected": -450.365478515625, + "loss": 0.6201879501342773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0853937491774559, + "rewards/margins": 0.17646726965904236, + "rewards/rejected": -0.26186102628707886, + "step": 110 + }, + { + "epoch": 0.6415620641562064, + "grad_norm": 2.3625364303588867, + "learning_rate": 4.382716049382716e-06, + "logits/chosen": -6.220386505126953, + "logits/rejected": -6.223449230194092, + "logps/chosen": -435.92626953125, + "logps/rejected": -495.6065368652344, + "loss": 0.6151515483856201, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2208656519651413, + "rewards/margins": 0.21172885596752167, + "rewards/rejected": -0.43259453773498535, + "step": 115 + }, + { + "epoch": 0.6694560669456067, + "grad_norm": 1.8082666397094727, + "learning_rate": 4.331275720164609e-06, + "logits/chosen": -6.262181282043457, + "logits/rejected": -6.250016212463379, + "logps/chosen": -354.21795654296875, + "logps/rejected": -389.14556884765625, + "loss": 0.6109379768371582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17473874986171722, + "rewards/margins": 0.19315743446350098, + "rewards/rejected": -0.367896169424057, + "step": 120 + }, + { + "epoch": 0.697350069735007, + "grad_norm": 1.9556658267974854, + "learning_rate": 4.2798353909465025e-06, + "logits/chosen": -6.167700290679932, + "logits/rejected": -6.1421003341674805, + "logps/chosen": -379.1827392578125, + "logps/rejected": -426.69549560546875, + "loss": 0.6202447414398193, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17535170912742615, + "rewards/margins": 0.18863627314567566, + "rewards/rejected": -0.3639879822731018, + "step": 125 + }, + { + "epoch": 0.7252440725244073, + "grad_norm": 3.001298666000366, + "learning_rate": 4.228395061728396e-06, + "logits/chosen": -6.2535905838012695, + "logits/rejected": -6.232400894165039, + "logps/chosen": -424.8458557128906, + "logps/rejected": -494.52960205078125, + "loss": 0.5493914127349854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22046081721782684, + "rewards/margins": 0.3785194754600525, + "rewards/rejected": -0.5989803075790405, + "step": 130 + }, + { + "epoch": 0.7531380753138075, + "grad_norm": 2.5210413932800293, + "learning_rate": 4.176954732510288e-06, + "logits/chosen": -6.078260898590088, + "logits/rejected": -6.0126447677612305, + "logps/chosen": -414.69940185546875, + "logps/rejected": -432.3282165527344, + "loss": 0.579456901550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15084640681743622, + "rewards/margins": 0.2960701882839203, + "rewards/rejected": -0.4469165802001953, + "step": 135 + }, + { + "epoch": 0.7810320781032078, + "grad_norm": 2.6807265281677246, + "learning_rate": 4.125514403292181e-06, + "logits/chosen": -6.243051052093506, + "logits/rejected": -6.220357418060303, + "logps/chosen": -400.6156311035156, + "logps/rejected": -450.0393981933594, + "loss": 0.5514531135559082, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25727975368499756, + "rewards/margins": 0.3991738259792328, + "rewards/rejected": -0.6564534902572632, + "step": 140 + }, + { + "epoch": 0.8089260808926081, + "grad_norm": 2.4137353897094727, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.186558246612549, + "logits/rejected": -6.139374256134033, + "logps/chosen": -442.314453125, + "logps/rejected": -491.584716796875, + "loss": 0.5633067131042481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35670942068099976, + "rewards/margins": 0.38038796186447144, + "rewards/rejected": -0.7370973825454712, + "step": 145 + }, + { + "epoch": 0.8368200836820083, + "grad_norm": 2.1043145656585693, + "learning_rate": 4.022633744855967e-06, + "logits/chosen": -6.177689552307129, + "logits/rejected": -6.167322635650635, + "logps/chosen": -435.2288513183594, + "logps/rejected": -469.41436767578125, + "loss": 0.5640112876892089, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.32313138246536255, + "rewards/margins": 0.45979684591293335, + "rewards/rejected": -0.7829282283782959, + "step": 150 + }, + { + "epoch": 0.8647140864714087, + "grad_norm": 1.9623620510101318, + "learning_rate": 3.97119341563786e-06, + "logits/chosen": -6.0590739250183105, + "logits/rejected": -6.033650875091553, + "logps/chosen": -421.5000915527344, + "logps/rejected": -370.30902099609375, + "loss": 0.6319089412689209, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29380694031715393, + "rewards/margins": 0.1507532149553299, + "rewards/rejected": -0.44456014037132263, + "step": 155 + }, + { + "epoch": 0.8926080892608089, + "grad_norm": 1.9432786703109741, + "learning_rate": 3.9197530864197535e-06, + "logits/chosen": -6.267019271850586, + "logits/rejected": -6.214621067047119, + "logps/chosen": -417.11724853515625, + "logps/rejected": -431.72698974609375, + "loss": 0.5186795234680176, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18987610936164856, + "rewards/margins": 0.5616164803504944, + "rewards/rejected": -0.7514925599098206, + "step": 160 + }, + { + "epoch": 0.9205020920502092, + "grad_norm": 1.8146827220916748, + "learning_rate": 3.868312757201647e-06, + "logits/chosen": -6.230213165283203, + "logits/rejected": -6.109362602233887, + "logps/chosen": -376.6744384765625, + "logps/rejected": -376.6526184082031, + "loss": 0.5346522808074952, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19152329862117767, + "rewards/margins": 0.38060134649276733, + "rewards/rejected": -0.5721246004104614, + "step": 165 + }, + { + "epoch": 0.9483960948396095, + "grad_norm": 1.936680793762207, + "learning_rate": 3.81687242798354e-06, + "logits/chosen": -6.194340705871582, + "logits/rejected": -6.1444597244262695, + "logps/chosen": -389.017822265625, + "logps/rejected": -445.76544189453125, + "loss": 0.49420690536499023, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.055042725056409836, + "rewards/margins": 0.5315954685211182, + "rewards/rejected": -0.5866381525993347, + "step": 170 + }, + { + "epoch": 0.9762900976290098, + "grad_norm": 3.2391903400421143, + "learning_rate": 3.7654320987654325e-06, + "logits/chosen": -6.190616607666016, + "logits/rejected": -6.1138739585876465, + "logps/chosen": -421.6878356933594, + "logps/rejected": -460.1180114746094, + "loss": 0.5374621391296387, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22659805417060852, + "rewards/margins": 0.43736472725868225, + "rewards/rejected": -0.6639627814292908, + "step": 175 + }, + { + "epoch": 1.0, + "grad_norm": 4.300363063812256, + "learning_rate": 3.7139917695473256e-06, + "logits/chosen": -6.227687358856201, + "logits/rejected": -6.129978179931641, + "logps/chosen": -449.4482116699219, + "logps/rejected": -430.434326171875, + "loss": 0.49641432762146, + "rewards/accuracies": 0.8529411554336548, + "rewards/chosen": -0.03447714447975159, + "rewards/margins": 0.5154433846473694, + "rewards/rejected": -0.5499205589294434, + "step": 180 + }, + { + "epoch": 1.0278940027894004, + "grad_norm": 1.5967351198196411, + "learning_rate": 3.6625514403292183e-06, + "logits/chosen": -6.067181587219238, + "logits/rejected": -6.06889533996582, + "logps/chosen": -399.98968505859375, + "logps/rejected": -447.72039794921875, + "loss": 0.4079257011413574, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.03092392347753048, + "rewards/margins": 0.7688177824020386, + "rewards/rejected": -0.7378939390182495, + "step": 185 + }, + { + "epoch": 1.0557880055788005, + "grad_norm": 1.7787078619003296, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.146653175354004, + "logits/rejected": -6.154219627380371, + "logps/chosen": -419.3291015625, + "logps/rejected": -463.1239318847656, + "loss": 0.42812933921813967, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.007557299919426441, + "rewards/margins": 0.7340337634086609, + "rewards/rejected": -0.7415911555290222, + "step": 190 + }, + { + "epoch": 1.0836820083682008, + "grad_norm": 1.7694693803787231, + "learning_rate": 3.559670781893004e-06, + "logits/chosen": -6.160831928253174, + "logits/rejected": -6.151050567626953, + "logps/chosen": -379.6001281738281, + "logps/rejected": -394.00531005859375, + "loss": 0.41410012245178224, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.03558122366666794, + "rewards/margins": 0.8024934530258179, + "rewards/rejected": -0.7669121623039246, + "step": 195 + }, + { + "epoch": 1.1115760111576012, + "grad_norm": 1.3202013969421387, + "learning_rate": 3.5082304526748973e-06, + "logits/chosen": -6.110814094543457, + "logits/rejected": -6.176726341247559, + "logps/chosen": -400.85052490234375, + "logps/rejected": -405.44195556640625, + "loss": 0.391094446182251, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.02833392843604088, + "rewards/margins": 0.8581112623214722, + "rewards/rejected": -0.8864452242851257, + "step": 200 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-200/training_args.bin b/gemma-2b-dpo/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-250/README.md b/gemma-2b-dpo/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-250/adapter_config.json b/gemma-2b-dpo/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-250/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce1ec525c31f2dc06d71b04f0b3024a214802b1e --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521436babeb20da3f9706c576539c659fcd45d1a0c22acd84a44d3fc9d4fe370 +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-250/chat_template.jinja b/gemma-2b-dpo/checkpoint-250/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-250/optimizer.pt b/gemma-2b-dpo/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..38821c184218fcfd1289fc37db9612f73264b0dd --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5783ae02fe015af6554b431db64dd006bf1b056f5f5ff304151d37bf7db93739 +size 42616388 diff --git a/gemma-2b-dpo/checkpoint-250/rng_state.pth b/gemma-2b-dpo/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ece67508ffa7f89d1f8e8b4e514d0551447e32a9 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b66e3cc7c452b707ddac5caf0aa17618afb9bc1a0333600a22c4afb353f3165 +size 14244 diff --git a/gemma-2b-dpo/checkpoint-250/scheduler.pt b/gemma-2b-dpo/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac480fad53ec4b364c8c9eeb84ac6e819ba4717e --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a04c3b44e05e12a36faabdecba5dc808f7cdeb9643fae2e7683908edc539b44 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-250/tokenizer.json b/gemma-2b-dpo/checkpoint-250/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-250/tokenizer_config.json b/gemma-2b-dpo/checkpoint-250/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-250/trainer_state.json b/gemma-2b-dpo/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2004c06bfa498e18689c11ad1dda053b08ffaf7c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/trainer_state.json @@ -0,0 +1,784 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.390516039051604, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + }, + { + "epoch": 0.5857740585774058, + "grad_norm": 2.6023478507995605, + "learning_rate": 4.485596707818931e-06, + "logits/chosen": -6.196796894073486, + "logits/rejected": -6.226622104644775, + "logps/chosen": -442.52685546875, + "logps/rejected": -516.7334594726562, + "loss": 0.6245638847351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18519389629364014, + "rewards/margins": 0.24079546332359314, + "rewards/rejected": -0.42598938941955566, + "step": 105 + }, + { + "epoch": 0.6136680613668062, + "grad_norm": 2.0638511180877686, + "learning_rate": 4.434156378600823e-06, + "logits/chosen": -6.1991071701049805, + "logits/rejected": -6.119466781616211, + "logps/chosen": -410.86669921875, + "logps/rejected": -450.365478515625, + "loss": 0.6201879501342773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0853937491774559, + "rewards/margins": 0.17646726965904236, + "rewards/rejected": -0.26186102628707886, + "step": 110 + }, + { + "epoch": 0.6415620641562064, + "grad_norm": 2.3625364303588867, + "learning_rate": 4.382716049382716e-06, + "logits/chosen": -6.220386505126953, + "logits/rejected": -6.223449230194092, + "logps/chosen": -435.92626953125, + "logps/rejected": -495.6065368652344, + "loss": 0.6151515483856201, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2208656519651413, + "rewards/margins": 0.21172885596752167, + "rewards/rejected": -0.43259453773498535, + "step": 115 + }, + { + "epoch": 0.6694560669456067, + "grad_norm": 1.8082666397094727, + "learning_rate": 4.331275720164609e-06, + "logits/chosen": -6.262181282043457, + "logits/rejected": -6.250016212463379, + "logps/chosen": -354.21795654296875, + "logps/rejected": -389.14556884765625, + "loss": 0.6109379768371582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17473874986171722, + "rewards/margins": 0.19315743446350098, + "rewards/rejected": -0.367896169424057, + "step": 120 + }, + { + "epoch": 0.697350069735007, + "grad_norm": 1.9556658267974854, + "learning_rate": 4.2798353909465025e-06, + "logits/chosen": -6.167700290679932, + "logits/rejected": -6.1421003341674805, + "logps/chosen": -379.1827392578125, + "logps/rejected": -426.69549560546875, + "loss": 0.6202447414398193, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17535170912742615, + "rewards/margins": 0.18863627314567566, + "rewards/rejected": -0.3639879822731018, + "step": 125 + }, + { + "epoch": 0.7252440725244073, + "grad_norm": 3.001298666000366, + "learning_rate": 4.228395061728396e-06, + "logits/chosen": -6.2535905838012695, + "logits/rejected": -6.232400894165039, + "logps/chosen": -424.8458557128906, + "logps/rejected": -494.52960205078125, + "loss": 0.5493914127349854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22046081721782684, + "rewards/margins": 0.3785194754600525, + "rewards/rejected": -0.5989803075790405, + "step": 130 + }, + { + "epoch": 0.7531380753138075, + "grad_norm": 2.5210413932800293, + "learning_rate": 4.176954732510288e-06, + "logits/chosen": -6.078260898590088, + "logits/rejected": -6.0126447677612305, + "logps/chosen": -414.69940185546875, + "logps/rejected": -432.3282165527344, + "loss": 0.579456901550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15084640681743622, + "rewards/margins": 0.2960701882839203, + "rewards/rejected": -0.4469165802001953, + "step": 135 + }, + { + "epoch": 0.7810320781032078, + "grad_norm": 2.6807265281677246, + "learning_rate": 4.125514403292181e-06, + "logits/chosen": -6.243051052093506, + "logits/rejected": -6.220357418060303, + "logps/chosen": -400.6156311035156, + "logps/rejected": -450.0393981933594, + "loss": 0.5514531135559082, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25727975368499756, + "rewards/margins": 0.3991738259792328, + "rewards/rejected": -0.6564534902572632, + "step": 140 + }, + { + "epoch": 0.8089260808926081, + "grad_norm": 2.4137353897094727, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.186558246612549, + "logits/rejected": -6.139374256134033, + "logps/chosen": -442.314453125, + "logps/rejected": -491.584716796875, + "loss": 0.5633067131042481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35670942068099976, + "rewards/margins": 0.38038796186447144, + "rewards/rejected": -0.7370973825454712, + "step": 145 + }, + { + "epoch": 0.8368200836820083, + "grad_norm": 2.1043145656585693, + "learning_rate": 4.022633744855967e-06, + "logits/chosen": -6.177689552307129, + "logits/rejected": -6.167322635650635, + "logps/chosen": -435.2288513183594, + "logps/rejected": -469.41436767578125, + "loss": 0.5640112876892089, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.32313138246536255, + "rewards/margins": 0.45979684591293335, + "rewards/rejected": -0.7829282283782959, + "step": 150 + }, + { + "epoch": 0.8647140864714087, + "grad_norm": 1.9623620510101318, + "learning_rate": 3.97119341563786e-06, + "logits/chosen": -6.0590739250183105, + "logits/rejected": -6.033650875091553, + "logps/chosen": -421.5000915527344, + "logps/rejected": -370.30902099609375, + "loss": 0.6319089412689209, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29380694031715393, + "rewards/margins": 0.1507532149553299, + "rewards/rejected": -0.44456014037132263, + "step": 155 + }, + { + "epoch": 0.8926080892608089, + "grad_norm": 1.9432786703109741, + "learning_rate": 3.9197530864197535e-06, + "logits/chosen": -6.267019271850586, + "logits/rejected": -6.214621067047119, + "logps/chosen": -417.11724853515625, + "logps/rejected": -431.72698974609375, + "loss": 0.5186795234680176, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18987610936164856, + "rewards/margins": 0.5616164803504944, + "rewards/rejected": -0.7514925599098206, + "step": 160 + }, + { + "epoch": 0.9205020920502092, + "grad_norm": 1.8146827220916748, + "learning_rate": 3.868312757201647e-06, + "logits/chosen": -6.230213165283203, + "logits/rejected": -6.109362602233887, + "logps/chosen": -376.6744384765625, + "logps/rejected": -376.6526184082031, + "loss": 0.5346522808074952, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19152329862117767, + "rewards/margins": 0.38060134649276733, + "rewards/rejected": -0.5721246004104614, + "step": 165 + }, + { + "epoch": 0.9483960948396095, + "grad_norm": 1.936680793762207, + "learning_rate": 3.81687242798354e-06, + "logits/chosen": -6.194340705871582, + "logits/rejected": -6.1444597244262695, + "logps/chosen": -389.017822265625, + "logps/rejected": -445.76544189453125, + "loss": 0.49420690536499023, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.055042725056409836, + "rewards/margins": 0.5315954685211182, + "rewards/rejected": -0.5866381525993347, + "step": 170 + }, + { + "epoch": 0.9762900976290098, + "grad_norm": 3.2391903400421143, + "learning_rate": 3.7654320987654325e-06, + "logits/chosen": -6.190616607666016, + "logits/rejected": -6.1138739585876465, + "logps/chosen": -421.6878356933594, + "logps/rejected": -460.1180114746094, + "loss": 0.5374621391296387, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22659805417060852, + "rewards/margins": 0.43736472725868225, + "rewards/rejected": -0.6639627814292908, + "step": 175 + }, + { + "epoch": 1.0, + "grad_norm": 4.300363063812256, + "learning_rate": 3.7139917695473256e-06, + "logits/chosen": -6.227687358856201, + "logits/rejected": -6.129978179931641, + "logps/chosen": -449.4482116699219, + "logps/rejected": -430.434326171875, + "loss": 0.49641432762146, + "rewards/accuracies": 0.8529411554336548, + "rewards/chosen": -0.03447714447975159, + "rewards/margins": 0.5154433846473694, + "rewards/rejected": -0.5499205589294434, + "step": 180 + }, + { + "epoch": 1.0278940027894004, + "grad_norm": 1.5967351198196411, + "learning_rate": 3.6625514403292183e-06, + "logits/chosen": -6.067181587219238, + "logits/rejected": -6.06889533996582, + "logps/chosen": -399.98968505859375, + "logps/rejected": -447.72039794921875, + "loss": 0.4079257011413574, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.03092392347753048, + "rewards/margins": 0.7688177824020386, + "rewards/rejected": -0.7378939390182495, + "step": 185 + }, + { + "epoch": 1.0557880055788005, + "grad_norm": 1.7787078619003296, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.146653175354004, + "logits/rejected": -6.154219627380371, + "logps/chosen": -419.3291015625, + "logps/rejected": -463.1239318847656, + "loss": 0.42812933921813967, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.007557299919426441, + "rewards/margins": 0.7340337634086609, + "rewards/rejected": -0.7415911555290222, + "step": 190 + }, + { + "epoch": 1.0836820083682008, + "grad_norm": 1.7694693803787231, + "learning_rate": 3.559670781893004e-06, + "logits/chosen": -6.160831928253174, + "logits/rejected": -6.151050567626953, + "logps/chosen": -379.6001281738281, + "logps/rejected": -394.00531005859375, + "loss": 0.41410012245178224, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.03558122366666794, + "rewards/margins": 0.8024934530258179, + "rewards/rejected": -0.7669121623039246, + "step": 195 + }, + { + "epoch": 1.1115760111576012, + "grad_norm": 1.3202013969421387, + "learning_rate": 3.5082304526748973e-06, + "logits/chosen": -6.110814094543457, + "logits/rejected": -6.176726341247559, + "logps/chosen": -400.85052490234375, + "logps/rejected": -405.44195556640625, + "loss": 0.391094446182251, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.02833392843604088, + "rewards/margins": 0.8581112623214722, + "rewards/rejected": -0.8864452242851257, + "step": 200 + }, + { + "epoch": 1.1394700139470013, + "grad_norm": 1.9367257356643677, + "learning_rate": 3.4567901234567904e-06, + "logits/chosen": -6.311105728149414, + "logits/rejected": -6.179243087768555, + "logps/chosen": -440.97625732421875, + "logps/rejected": -463.1673278808594, + "loss": 0.3870258331298828, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.012000990100204945, + "rewards/margins": 0.9945917129516602, + "rewards/rejected": -1.0065927505493164, + "step": 205 + }, + { + "epoch": 1.1673640167364017, + "grad_norm": 2.4570703506469727, + "learning_rate": 3.405349794238683e-06, + "logits/chosen": -6.199883937835693, + "logits/rejected": -6.160645484924316, + "logps/chosen": -448.8758850097656, + "logps/rejected": -439.4584045410156, + "loss": 0.3908271551132202, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04756501317024231, + "rewards/margins": 0.8816453218460083, + "rewards/rejected": -0.8340802192687988, + "step": 210 + }, + { + "epoch": 1.195258019525802, + "grad_norm": 1.5992087125778198, + "learning_rate": 3.3539094650205767e-06, + "logits/chosen": -6.163644313812256, + "logits/rejected": -6.093722343444824, + "logps/chosen": -449.8214416503906, + "logps/rejected": -481.3743591308594, + "loss": 0.3612337350845337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09812992066144943, + "rewards/margins": 0.9562546014785767, + "rewards/rejected": -0.858124852180481, + "step": 215 + }, + { + "epoch": 1.2231520223152021, + "grad_norm": 1.4101840257644653, + "learning_rate": 3.30246913580247e-06, + "logits/chosen": -6.281071662902832, + "logits/rejected": -6.337766170501709, + "logps/chosen": -281.46795654296875, + "logps/rejected": -336.60845947265625, + "loss": 0.43022546768188474, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.1486438810825348, + "rewards/margins": 0.7064955234527588, + "rewards/rejected": -0.855139434337616, + "step": 220 + }, + { + "epoch": 1.2510460251046025, + "grad_norm": 1.5817450284957886, + "learning_rate": 3.2510288065843625e-06, + "logits/chosen": -6.1745758056640625, + "logits/rejected": -6.192706108093262, + "logps/chosen": -399.51190185546875, + "logps/rejected": -424.1844177246094, + "loss": 0.3992297887802124, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.051351286470890045, + "rewards/margins": 0.827163815498352, + "rewards/rejected": -0.8785150647163391, + "step": 225 + }, + { + "epoch": 1.2789400278940029, + "grad_norm": 1.3157438039779663, + "learning_rate": 3.1995884773662556e-06, + "logits/chosen": -6.1543779373168945, + "logits/rejected": -6.1732258796691895, + "logps/chosen": -425.80755615234375, + "logps/rejected": -447.39453125, + "loss": 0.35113141536712644, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.01585063710808754, + "rewards/margins": 1.0381678342819214, + "rewards/rejected": -1.0223171710968018, + "step": 230 + }, + { + "epoch": 1.3068340306834032, + "grad_norm": 1.4981003999710083, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.220085620880127, + "logits/rejected": -6.215539455413818, + "logps/chosen": -393.77716064453125, + "logps/rejected": -474.4695739746094, + "loss": 0.35107009410858153, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.07319364696741104, + "rewards/margins": 1.0109622478485107, + "rewards/rejected": -0.9377686381340027, + "step": 235 + }, + { + "epoch": 1.3347280334728033, + "grad_norm": 1.6417901515960693, + "learning_rate": 3.0967078189300415e-06, + "logits/chosen": -6.223210334777832, + "logits/rejected": -6.187335968017578, + "logps/chosen": -454.0006408691406, + "logps/rejected": -439.3124084472656, + "loss": 0.3300657272338867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09006929397583008, + "rewards/margins": 1.089814305305481, + "rewards/rejected": -0.9997450709342957, + "step": 240 + }, + { + "epoch": 1.3626220362622037, + "grad_norm": 1.3642381429672241, + "learning_rate": 3.0452674897119346e-06, + "logits/chosen": -6.211455821990967, + "logits/rejected": -6.1171441078186035, + "logps/chosen": -402.8586730957031, + "logps/rejected": -431.7958984375, + "loss": 0.3634498119354248, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0397757962346077, + "rewards/margins": 1.0654242038726807, + "rewards/rejected": -1.1052000522613525, + "step": 245 + }, + { + "epoch": 1.390516039051604, + "grad_norm": 1.9008878469467163, + "learning_rate": 2.9938271604938273e-06, + "logits/chosen": -6.2566046714782715, + "logits/rejected": -6.222296714782715, + "logps/chosen": -433.3427734375, + "logps/rejected": -489.8169860839844, + "loss": 0.3385239839553833, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04979880154132843, + "rewards/margins": 1.0801963806152344, + "rewards/rejected": -1.0303975343704224, + "step": 250 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-250/training_args.bin b/gemma-2b-dpo/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-300/README.md b/gemma-2b-dpo/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-300/adapter_config.json b/gemma-2b-dpo/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-300/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ee7d7b4814ee2bbf48d68df6901569066aa2a16 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3cb7b1f7ca8c60a99cef4f5748b8d2fdc829a7b08e5d5e51a8fc3eda5f5de86 +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-300/chat_template.jinja b/gemma-2b-dpo/checkpoint-300/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-300/optimizer.pt b/gemma-2b-dpo/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..af2de71d71b9c5ae025462dcefbf161f41a3de24 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8947354f85814c3da0768d6a90674faf2908045c9987e336fb283a8bbf979c65 +size 42616772 diff --git a/gemma-2b-dpo/checkpoint-300/rng_state.pth b/gemma-2b-dpo/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ece67508ffa7f89d1f8e8b4e514d0551447e32a9 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b66e3cc7c452b707ddac5caf0aa17618afb9bc1a0333600a22c4afb353f3165 +size 14244 diff --git a/gemma-2b-dpo/checkpoint-300/scheduler.pt b/gemma-2b-dpo/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..713d5e26144f187a62a0987f6025d7d01402dca4 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e8b34902c4b26a7f217223e99a8ef2d80870b839e1daee31b46b9a8a893160 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-300/tokenizer.json b/gemma-2b-dpo/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-300/tokenizer_config.json b/gemma-2b-dpo/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-300/trainer_state.json b/gemma-2b-dpo/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8a345c2ce70b0b982277eb152fba6ca53a75f96c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/trainer_state.json @@ -0,0 +1,934 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6694560669456067, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + }, + { + "epoch": 0.5857740585774058, + "grad_norm": 2.6023478507995605, + "learning_rate": 4.485596707818931e-06, + "logits/chosen": -6.196796894073486, + "logits/rejected": -6.226622104644775, + "logps/chosen": -442.52685546875, + "logps/rejected": -516.7334594726562, + "loss": 0.6245638847351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18519389629364014, + "rewards/margins": 0.24079546332359314, + "rewards/rejected": -0.42598938941955566, + "step": 105 + }, + { + "epoch": 0.6136680613668062, + "grad_norm": 2.0638511180877686, + "learning_rate": 4.434156378600823e-06, + "logits/chosen": -6.1991071701049805, + "logits/rejected": -6.119466781616211, + "logps/chosen": -410.86669921875, + "logps/rejected": -450.365478515625, + "loss": 0.6201879501342773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0853937491774559, + "rewards/margins": 0.17646726965904236, + "rewards/rejected": -0.26186102628707886, + "step": 110 + }, + { + "epoch": 0.6415620641562064, + "grad_norm": 2.3625364303588867, + "learning_rate": 4.382716049382716e-06, + "logits/chosen": -6.220386505126953, + "logits/rejected": -6.223449230194092, + "logps/chosen": -435.92626953125, + "logps/rejected": -495.6065368652344, + "loss": 0.6151515483856201, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2208656519651413, + "rewards/margins": 0.21172885596752167, + "rewards/rejected": -0.43259453773498535, + "step": 115 + }, + { + "epoch": 0.6694560669456067, + "grad_norm": 1.8082666397094727, + "learning_rate": 4.331275720164609e-06, + "logits/chosen": -6.262181282043457, + "logits/rejected": -6.250016212463379, + "logps/chosen": -354.21795654296875, + "logps/rejected": -389.14556884765625, + "loss": 0.6109379768371582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17473874986171722, + "rewards/margins": 0.19315743446350098, + "rewards/rejected": -0.367896169424057, + "step": 120 + }, + { + "epoch": 0.697350069735007, + "grad_norm": 1.9556658267974854, + "learning_rate": 4.2798353909465025e-06, + "logits/chosen": -6.167700290679932, + "logits/rejected": -6.1421003341674805, + "logps/chosen": -379.1827392578125, + "logps/rejected": -426.69549560546875, + "loss": 0.6202447414398193, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17535170912742615, + "rewards/margins": 0.18863627314567566, + "rewards/rejected": -0.3639879822731018, + "step": 125 + }, + { + "epoch": 0.7252440725244073, + "grad_norm": 3.001298666000366, + "learning_rate": 4.228395061728396e-06, + "logits/chosen": -6.2535905838012695, + "logits/rejected": -6.232400894165039, + "logps/chosen": -424.8458557128906, + "logps/rejected": -494.52960205078125, + "loss": 0.5493914127349854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22046081721782684, + "rewards/margins": 0.3785194754600525, + "rewards/rejected": -0.5989803075790405, + "step": 130 + }, + { + "epoch": 0.7531380753138075, + "grad_norm": 2.5210413932800293, + "learning_rate": 4.176954732510288e-06, + "logits/chosen": -6.078260898590088, + "logits/rejected": -6.0126447677612305, + "logps/chosen": -414.69940185546875, + "logps/rejected": -432.3282165527344, + "loss": 0.579456901550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15084640681743622, + "rewards/margins": 0.2960701882839203, + "rewards/rejected": -0.4469165802001953, + "step": 135 + }, + { + "epoch": 0.7810320781032078, + "grad_norm": 2.6807265281677246, + "learning_rate": 4.125514403292181e-06, + "logits/chosen": -6.243051052093506, + "logits/rejected": -6.220357418060303, + "logps/chosen": -400.6156311035156, + "logps/rejected": -450.0393981933594, + "loss": 0.5514531135559082, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25727975368499756, + "rewards/margins": 0.3991738259792328, + "rewards/rejected": -0.6564534902572632, + "step": 140 + }, + { + "epoch": 0.8089260808926081, + "grad_norm": 2.4137353897094727, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.186558246612549, + "logits/rejected": -6.139374256134033, + "logps/chosen": -442.314453125, + "logps/rejected": -491.584716796875, + "loss": 0.5633067131042481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35670942068099976, + "rewards/margins": 0.38038796186447144, + "rewards/rejected": -0.7370973825454712, + "step": 145 + }, + { + "epoch": 0.8368200836820083, + "grad_norm": 2.1043145656585693, + "learning_rate": 4.022633744855967e-06, + "logits/chosen": -6.177689552307129, + "logits/rejected": -6.167322635650635, + "logps/chosen": -435.2288513183594, + "logps/rejected": -469.41436767578125, + "loss": 0.5640112876892089, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.32313138246536255, + "rewards/margins": 0.45979684591293335, + "rewards/rejected": -0.7829282283782959, + "step": 150 + }, + { + "epoch": 0.8647140864714087, + "grad_norm": 1.9623620510101318, + "learning_rate": 3.97119341563786e-06, + "logits/chosen": -6.0590739250183105, + "logits/rejected": -6.033650875091553, + "logps/chosen": -421.5000915527344, + "logps/rejected": -370.30902099609375, + "loss": 0.6319089412689209, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29380694031715393, + "rewards/margins": 0.1507532149553299, + "rewards/rejected": -0.44456014037132263, + "step": 155 + }, + { + "epoch": 0.8926080892608089, + "grad_norm": 1.9432786703109741, + "learning_rate": 3.9197530864197535e-06, + "logits/chosen": -6.267019271850586, + "logits/rejected": -6.214621067047119, + "logps/chosen": -417.11724853515625, + "logps/rejected": -431.72698974609375, + "loss": 0.5186795234680176, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18987610936164856, + "rewards/margins": 0.5616164803504944, + "rewards/rejected": -0.7514925599098206, + "step": 160 + }, + { + "epoch": 0.9205020920502092, + "grad_norm": 1.8146827220916748, + "learning_rate": 3.868312757201647e-06, + "logits/chosen": -6.230213165283203, + "logits/rejected": -6.109362602233887, + "logps/chosen": -376.6744384765625, + "logps/rejected": -376.6526184082031, + "loss": 0.5346522808074952, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19152329862117767, + "rewards/margins": 0.38060134649276733, + "rewards/rejected": -0.5721246004104614, + "step": 165 + }, + { + "epoch": 0.9483960948396095, + "grad_norm": 1.936680793762207, + "learning_rate": 3.81687242798354e-06, + "logits/chosen": -6.194340705871582, + "logits/rejected": -6.1444597244262695, + "logps/chosen": -389.017822265625, + "logps/rejected": -445.76544189453125, + "loss": 0.49420690536499023, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.055042725056409836, + "rewards/margins": 0.5315954685211182, + "rewards/rejected": -0.5866381525993347, + "step": 170 + }, + { + "epoch": 0.9762900976290098, + "grad_norm": 3.2391903400421143, + "learning_rate": 3.7654320987654325e-06, + "logits/chosen": -6.190616607666016, + "logits/rejected": -6.1138739585876465, + "logps/chosen": -421.6878356933594, + "logps/rejected": -460.1180114746094, + "loss": 0.5374621391296387, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22659805417060852, + "rewards/margins": 0.43736472725868225, + "rewards/rejected": -0.6639627814292908, + "step": 175 + }, + { + "epoch": 1.0, + "grad_norm": 4.300363063812256, + "learning_rate": 3.7139917695473256e-06, + "logits/chosen": -6.227687358856201, + "logits/rejected": -6.129978179931641, + "logps/chosen": -449.4482116699219, + "logps/rejected": -430.434326171875, + "loss": 0.49641432762146, + "rewards/accuracies": 0.8529411554336548, + "rewards/chosen": -0.03447714447975159, + "rewards/margins": 0.5154433846473694, + "rewards/rejected": -0.5499205589294434, + "step": 180 + }, + { + "epoch": 1.0278940027894004, + "grad_norm": 1.5967351198196411, + "learning_rate": 3.6625514403292183e-06, + "logits/chosen": -6.067181587219238, + "logits/rejected": -6.06889533996582, + "logps/chosen": -399.98968505859375, + "logps/rejected": -447.72039794921875, + "loss": 0.4079257011413574, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.03092392347753048, + "rewards/margins": 0.7688177824020386, + "rewards/rejected": -0.7378939390182495, + "step": 185 + }, + { + "epoch": 1.0557880055788005, + "grad_norm": 1.7787078619003296, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.146653175354004, + "logits/rejected": -6.154219627380371, + "logps/chosen": -419.3291015625, + "logps/rejected": -463.1239318847656, + "loss": 0.42812933921813967, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.007557299919426441, + "rewards/margins": 0.7340337634086609, + "rewards/rejected": -0.7415911555290222, + "step": 190 + }, + { + "epoch": 1.0836820083682008, + "grad_norm": 1.7694693803787231, + "learning_rate": 3.559670781893004e-06, + "logits/chosen": -6.160831928253174, + "logits/rejected": -6.151050567626953, + "logps/chosen": -379.6001281738281, + "logps/rejected": -394.00531005859375, + "loss": 0.41410012245178224, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.03558122366666794, + "rewards/margins": 0.8024934530258179, + "rewards/rejected": -0.7669121623039246, + "step": 195 + }, + { + "epoch": 1.1115760111576012, + "grad_norm": 1.3202013969421387, + "learning_rate": 3.5082304526748973e-06, + "logits/chosen": -6.110814094543457, + "logits/rejected": -6.176726341247559, + "logps/chosen": -400.85052490234375, + "logps/rejected": -405.44195556640625, + "loss": 0.391094446182251, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.02833392843604088, + "rewards/margins": 0.8581112623214722, + "rewards/rejected": -0.8864452242851257, + "step": 200 + }, + { + "epoch": 1.1394700139470013, + "grad_norm": 1.9367257356643677, + "learning_rate": 3.4567901234567904e-06, + "logits/chosen": -6.311105728149414, + "logits/rejected": -6.179243087768555, + "logps/chosen": -440.97625732421875, + "logps/rejected": -463.1673278808594, + "loss": 0.3870258331298828, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.012000990100204945, + "rewards/margins": 0.9945917129516602, + "rewards/rejected": -1.0065927505493164, + "step": 205 + }, + { + "epoch": 1.1673640167364017, + "grad_norm": 2.4570703506469727, + "learning_rate": 3.405349794238683e-06, + "logits/chosen": -6.199883937835693, + "logits/rejected": -6.160645484924316, + "logps/chosen": -448.8758850097656, + "logps/rejected": -439.4584045410156, + "loss": 0.3908271551132202, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04756501317024231, + "rewards/margins": 0.8816453218460083, + "rewards/rejected": -0.8340802192687988, + "step": 210 + }, + { + "epoch": 1.195258019525802, + "grad_norm": 1.5992087125778198, + "learning_rate": 3.3539094650205767e-06, + "logits/chosen": -6.163644313812256, + "logits/rejected": -6.093722343444824, + "logps/chosen": -449.8214416503906, + "logps/rejected": -481.3743591308594, + "loss": 0.3612337350845337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09812992066144943, + "rewards/margins": 0.9562546014785767, + "rewards/rejected": -0.858124852180481, + "step": 215 + }, + { + "epoch": 1.2231520223152021, + "grad_norm": 1.4101840257644653, + "learning_rate": 3.30246913580247e-06, + "logits/chosen": -6.281071662902832, + "logits/rejected": -6.337766170501709, + "logps/chosen": -281.46795654296875, + "logps/rejected": -336.60845947265625, + "loss": 0.43022546768188474, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.1486438810825348, + "rewards/margins": 0.7064955234527588, + "rewards/rejected": -0.855139434337616, + "step": 220 + }, + { + "epoch": 1.2510460251046025, + "grad_norm": 1.5817450284957886, + "learning_rate": 3.2510288065843625e-06, + "logits/chosen": -6.1745758056640625, + "logits/rejected": -6.192706108093262, + "logps/chosen": -399.51190185546875, + "logps/rejected": -424.1844177246094, + "loss": 0.3992297887802124, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.051351286470890045, + "rewards/margins": 0.827163815498352, + "rewards/rejected": -0.8785150647163391, + "step": 225 + }, + { + "epoch": 1.2789400278940029, + "grad_norm": 1.3157438039779663, + "learning_rate": 3.1995884773662556e-06, + "logits/chosen": -6.1543779373168945, + "logits/rejected": -6.1732258796691895, + "logps/chosen": -425.80755615234375, + "logps/rejected": -447.39453125, + "loss": 0.35113141536712644, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.01585063710808754, + "rewards/margins": 1.0381678342819214, + "rewards/rejected": -1.0223171710968018, + "step": 230 + }, + { + "epoch": 1.3068340306834032, + "grad_norm": 1.4981003999710083, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.220085620880127, + "logits/rejected": -6.215539455413818, + "logps/chosen": -393.77716064453125, + "logps/rejected": -474.4695739746094, + "loss": 0.35107009410858153, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.07319364696741104, + "rewards/margins": 1.0109622478485107, + "rewards/rejected": -0.9377686381340027, + "step": 235 + }, + { + "epoch": 1.3347280334728033, + "grad_norm": 1.6417901515960693, + "learning_rate": 3.0967078189300415e-06, + "logits/chosen": -6.223210334777832, + "logits/rejected": -6.187335968017578, + "logps/chosen": -454.0006408691406, + "logps/rejected": -439.3124084472656, + "loss": 0.3300657272338867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09006929397583008, + "rewards/margins": 1.089814305305481, + "rewards/rejected": -0.9997450709342957, + "step": 240 + }, + { + "epoch": 1.3626220362622037, + "grad_norm": 1.3642381429672241, + "learning_rate": 3.0452674897119346e-06, + "logits/chosen": -6.211455821990967, + "logits/rejected": -6.1171441078186035, + "logps/chosen": -402.8586730957031, + "logps/rejected": -431.7958984375, + "loss": 0.3634498119354248, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0397757962346077, + "rewards/margins": 1.0654242038726807, + "rewards/rejected": -1.1052000522613525, + "step": 245 + }, + { + "epoch": 1.390516039051604, + "grad_norm": 1.9008878469467163, + "learning_rate": 2.9938271604938273e-06, + "logits/chosen": -6.2566046714782715, + "logits/rejected": -6.222296714782715, + "logps/chosen": -433.3427734375, + "logps/rejected": -489.8169860839844, + "loss": 0.3385239839553833, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04979880154132843, + "rewards/margins": 1.0801963806152344, + "rewards/rejected": -1.0303975343704224, + "step": 250 + }, + { + "epoch": 1.4184100418410042, + "grad_norm": 1.5302000045776367, + "learning_rate": 2.9423868312757204e-06, + "logits/chosen": -6.219883918762207, + "logits/rejected": -6.1428399085998535, + "logps/chosen": -428.93914794921875, + "logps/rejected": -493.17437744140625, + "loss": 0.3642880916595459, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09988997876644135, + "rewards/margins": 1.0157719850540161, + "rewards/rejected": -1.1156619787216187, + "step": 255 + }, + { + "epoch": 1.4463040446304045, + "grad_norm": 1.898163080215454, + "learning_rate": 2.890946502057613e-06, + "logits/chosen": -6.123999118804932, + "logits/rejected": -6.1102776527404785, + "logps/chosen": -494.8221740722656, + "logps/rejected": -464.3958435058594, + "loss": 0.31599912643432615, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03670971840620041, + "rewards/margins": 1.1374431848526, + "rewards/rejected": -1.1741528511047363, + "step": 260 + }, + { + "epoch": 1.4741980474198049, + "grad_norm": 1.7610703706741333, + "learning_rate": 2.8395061728395062e-06, + "logits/chosen": -6.1201653480529785, + "logits/rejected": -6.155616760253906, + "logps/chosen": -489.81201171875, + "logps/rejected": -511.11724853515625, + "loss": 0.30640478134155275, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.057888150215148926, + "rewards/margins": 1.2739557027816772, + "rewards/rejected": -1.3318438529968262, + "step": 265 + }, + { + "epoch": 1.502092050209205, + "grad_norm": 1.3035284280776978, + "learning_rate": 2.7880658436213994e-06, + "logits/chosen": -6.300120830535889, + "logits/rejected": -6.1527228355407715, + "logps/chosen": -433.002685546875, + "logps/rejected": -471.416015625, + "loss": 0.2971210956573486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03341478109359741, + "rewards/margins": 1.295318841934204, + "rewards/rejected": -1.261904001235962, + "step": 270 + }, + { + "epoch": 1.5299860529986054, + "grad_norm": 1.7996495962142944, + "learning_rate": 2.736625514403292e-06, + "logits/chosen": -6.244246482849121, + "logits/rejected": -6.148303985595703, + "logps/chosen": -403.05133056640625, + "logps/rejected": -380.7239990234375, + "loss": 0.36082763671875, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.016453832387924194, + "rewards/margins": 1.103393793106079, + "rewards/rejected": -1.1198475360870361, + "step": 275 + }, + { + "epoch": 1.5578800557880057, + "grad_norm": 2.193908929824829, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.1752519607543945, + "logits/rejected": -6.249385833740234, + "logps/chosen": -482.1385803222656, + "logps/rejected": -504.50311279296875, + "loss": 0.3047311782836914, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.20222747325897217, + "rewards/margins": 1.3389281034469604, + "rewards/rejected": -1.1367007493972778, + "step": 280 + }, + { + "epoch": 1.5857740585774058, + "grad_norm": 1.3961774110794067, + "learning_rate": 2.6337448559670788e-06, + "logits/chosen": -6.2296528816223145, + "logits/rejected": -6.2554612159729, + "logps/chosen": -370.95428466796875, + "logps/rejected": -465.4556579589844, + "loss": 0.31553618907928466, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10641174018383026, + "rewards/margins": 1.2302197217941284, + "rewards/rejected": -1.336631417274475, + "step": 285 + }, + { + "epoch": 1.6136680613668062, + "grad_norm": 1.296229600906372, + "learning_rate": 2.5823045267489715e-06, + "logits/chosen": -6.245091438293457, + "logits/rejected": -6.184089660644531, + "logps/chosen": -420.3929138183594, + "logps/rejected": -484.88934326171875, + "loss": 0.28981173038482666, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03132909908890724, + "rewards/margins": 1.3171762228012085, + "rewards/rejected": -1.3485053777694702, + "step": 290 + }, + { + "epoch": 1.6415620641562065, + "grad_norm": 1.6442267894744873, + "learning_rate": 2.5308641975308646e-06, + "logits/chosen": -6.171110153198242, + "logits/rejected": -6.142795562744141, + "logps/chosen": -480.75726318359375, + "logps/rejected": -476.60552978515625, + "loss": 0.31265769004821775, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19172248244285583, + "rewards/margins": 1.2785747051239014, + "rewards/rejected": -1.0868523120880127, + "step": 295 + }, + { + "epoch": 1.6694560669456067, + "grad_norm": 1.4329712390899658, + "learning_rate": 2.4794238683127577e-06, + "logits/chosen": -6.2438530921936035, + "logits/rejected": -6.167972564697266, + "logps/chosen": -398.7960510253906, + "logps/rejected": -416.4847717285156, + "loss": 0.3146506786346436, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.06857718527317047, + "rewards/margins": 1.1940205097198486, + "rewards/rejected": -1.2625977993011475, + "step": 300 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-300/training_args.bin b/gemma-2b-dpo/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-350/README.md b/gemma-2b-dpo/checkpoint-350/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-350/adapter_config.json b/gemma-2b-dpo/checkpoint-350/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-350/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-350/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc2df66559557de9ede30e82551da88a0f0e5d3d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb0a959cfe0b94d126d82a18354f3e31dfa05a91da2caddc19dacb86fab37307 +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-350/chat_template.jinja b/gemma-2b-dpo/checkpoint-350/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-350/optimizer.pt b/gemma-2b-dpo/checkpoint-350/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9114582cc410f799f6226caa752fa0440c06e582 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2165fc38f54a38de769c323969541a8bc3d4fff980408643e468ca8901a5f77 +size 42616772 diff --git a/gemma-2b-dpo/checkpoint-350/rng_state.pth b/gemma-2b-dpo/checkpoint-350/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ece67508ffa7f89d1f8e8b4e514d0551447e32a9 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b66e3cc7c452b707ddac5caf0aa17618afb9bc1a0333600a22c4afb353f3165 +size 14244 diff --git a/gemma-2b-dpo/checkpoint-350/scheduler.pt b/gemma-2b-dpo/checkpoint-350/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5073a165a9c7810223007d4765b64d0e7fcb479 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955a31aa2f281ab43c39b1871945ec6da9e9a16e4d34232e09db3994ebfb7a43 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-350/tokenizer.json b/gemma-2b-dpo/checkpoint-350/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-350/tokenizer_config.json b/gemma-2b-dpo/checkpoint-350/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-350/trainer_state.json b/gemma-2b-dpo/checkpoint-350/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..474b2ccfcb5a3cc05720845158f978d94db71c33 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/trainer_state.json @@ -0,0 +1,1084 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9483960948396095, + "eval_steps": 500, + "global_step": 350, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + }, + { + "epoch": 0.5857740585774058, + "grad_norm": 2.6023478507995605, + "learning_rate": 4.485596707818931e-06, + "logits/chosen": -6.196796894073486, + "logits/rejected": -6.226622104644775, + "logps/chosen": -442.52685546875, + "logps/rejected": -516.7334594726562, + "loss": 0.6245638847351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18519389629364014, + "rewards/margins": 0.24079546332359314, + "rewards/rejected": -0.42598938941955566, + "step": 105 + }, + { + "epoch": 0.6136680613668062, + "grad_norm": 2.0638511180877686, + "learning_rate": 4.434156378600823e-06, + "logits/chosen": -6.1991071701049805, + "logits/rejected": -6.119466781616211, + "logps/chosen": -410.86669921875, + "logps/rejected": -450.365478515625, + "loss": 0.6201879501342773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0853937491774559, + "rewards/margins": 0.17646726965904236, + "rewards/rejected": -0.26186102628707886, + "step": 110 + }, + { + "epoch": 0.6415620641562064, + "grad_norm": 2.3625364303588867, + "learning_rate": 4.382716049382716e-06, + "logits/chosen": -6.220386505126953, + "logits/rejected": -6.223449230194092, + "logps/chosen": -435.92626953125, + "logps/rejected": -495.6065368652344, + "loss": 0.6151515483856201, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2208656519651413, + "rewards/margins": 0.21172885596752167, + "rewards/rejected": -0.43259453773498535, + "step": 115 + }, + { + "epoch": 0.6694560669456067, + "grad_norm": 1.8082666397094727, + "learning_rate": 4.331275720164609e-06, + "logits/chosen": -6.262181282043457, + "logits/rejected": -6.250016212463379, + "logps/chosen": -354.21795654296875, + "logps/rejected": -389.14556884765625, + "loss": 0.6109379768371582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17473874986171722, + "rewards/margins": 0.19315743446350098, + "rewards/rejected": -0.367896169424057, + "step": 120 + }, + { + "epoch": 0.697350069735007, + "grad_norm": 1.9556658267974854, + "learning_rate": 4.2798353909465025e-06, + "logits/chosen": -6.167700290679932, + "logits/rejected": -6.1421003341674805, + "logps/chosen": -379.1827392578125, + "logps/rejected": -426.69549560546875, + "loss": 0.6202447414398193, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17535170912742615, + "rewards/margins": 0.18863627314567566, + "rewards/rejected": -0.3639879822731018, + "step": 125 + }, + { + "epoch": 0.7252440725244073, + "grad_norm": 3.001298666000366, + "learning_rate": 4.228395061728396e-06, + "logits/chosen": -6.2535905838012695, + "logits/rejected": -6.232400894165039, + "logps/chosen": -424.8458557128906, + "logps/rejected": -494.52960205078125, + "loss": 0.5493914127349854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22046081721782684, + "rewards/margins": 0.3785194754600525, + "rewards/rejected": -0.5989803075790405, + "step": 130 + }, + { + "epoch": 0.7531380753138075, + "grad_norm": 2.5210413932800293, + "learning_rate": 4.176954732510288e-06, + "logits/chosen": -6.078260898590088, + "logits/rejected": -6.0126447677612305, + "logps/chosen": -414.69940185546875, + "logps/rejected": -432.3282165527344, + "loss": 0.579456901550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15084640681743622, + "rewards/margins": 0.2960701882839203, + "rewards/rejected": -0.4469165802001953, + "step": 135 + }, + { + "epoch": 0.7810320781032078, + "grad_norm": 2.6807265281677246, + "learning_rate": 4.125514403292181e-06, + "logits/chosen": -6.243051052093506, + "logits/rejected": -6.220357418060303, + "logps/chosen": -400.6156311035156, + "logps/rejected": -450.0393981933594, + "loss": 0.5514531135559082, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25727975368499756, + "rewards/margins": 0.3991738259792328, + "rewards/rejected": -0.6564534902572632, + "step": 140 + }, + { + "epoch": 0.8089260808926081, + "grad_norm": 2.4137353897094727, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.186558246612549, + "logits/rejected": -6.139374256134033, + "logps/chosen": -442.314453125, + "logps/rejected": -491.584716796875, + "loss": 0.5633067131042481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35670942068099976, + "rewards/margins": 0.38038796186447144, + "rewards/rejected": -0.7370973825454712, + "step": 145 + }, + { + "epoch": 0.8368200836820083, + "grad_norm": 2.1043145656585693, + "learning_rate": 4.022633744855967e-06, + "logits/chosen": -6.177689552307129, + "logits/rejected": -6.167322635650635, + "logps/chosen": -435.2288513183594, + "logps/rejected": -469.41436767578125, + "loss": 0.5640112876892089, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.32313138246536255, + "rewards/margins": 0.45979684591293335, + "rewards/rejected": -0.7829282283782959, + "step": 150 + }, + { + "epoch": 0.8647140864714087, + "grad_norm": 1.9623620510101318, + "learning_rate": 3.97119341563786e-06, + "logits/chosen": -6.0590739250183105, + "logits/rejected": -6.033650875091553, + "logps/chosen": -421.5000915527344, + "logps/rejected": -370.30902099609375, + "loss": 0.6319089412689209, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29380694031715393, + "rewards/margins": 0.1507532149553299, + "rewards/rejected": -0.44456014037132263, + "step": 155 + }, + { + "epoch": 0.8926080892608089, + "grad_norm": 1.9432786703109741, + "learning_rate": 3.9197530864197535e-06, + "logits/chosen": -6.267019271850586, + "logits/rejected": -6.214621067047119, + "logps/chosen": -417.11724853515625, + "logps/rejected": -431.72698974609375, + "loss": 0.5186795234680176, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18987610936164856, + "rewards/margins": 0.5616164803504944, + "rewards/rejected": -0.7514925599098206, + "step": 160 + }, + { + "epoch": 0.9205020920502092, + "grad_norm": 1.8146827220916748, + "learning_rate": 3.868312757201647e-06, + "logits/chosen": -6.230213165283203, + "logits/rejected": -6.109362602233887, + "logps/chosen": -376.6744384765625, + "logps/rejected": -376.6526184082031, + "loss": 0.5346522808074952, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19152329862117767, + "rewards/margins": 0.38060134649276733, + "rewards/rejected": -0.5721246004104614, + "step": 165 + }, + { + "epoch": 0.9483960948396095, + "grad_norm": 1.936680793762207, + "learning_rate": 3.81687242798354e-06, + "logits/chosen": -6.194340705871582, + "logits/rejected": -6.1444597244262695, + "logps/chosen": -389.017822265625, + "logps/rejected": -445.76544189453125, + "loss": 0.49420690536499023, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.055042725056409836, + "rewards/margins": 0.5315954685211182, + "rewards/rejected": -0.5866381525993347, + "step": 170 + }, + { + "epoch": 0.9762900976290098, + "grad_norm": 3.2391903400421143, + "learning_rate": 3.7654320987654325e-06, + "logits/chosen": -6.190616607666016, + "logits/rejected": -6.1138739585876465, + "logps/chosen": -421.6878356933594, + "logps/rejected": -460.1180114746094, + "loss": 0.5374621391296387, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22659805417060852, + "rewards/margins": 0.43736472725868225, + "rewards/rejected": -0.6639627814292908, + "step": 175 + }, + { + "epoch": 1.0, + "grad_norm": 4.300363063812256, + "learning_rate": 3.7139917695473256e-06, + "logits/chosen": -6.227687358856201, + "logits/rejected": -6.129978179931641, + "logps/chosen": -449.4482116699219, + "logps/rejected": -430.434326171875, + "loss": 0.49641432762146, + "rewards/accuracies": 0.8529411554336548, + "rewards/chosen": -0.03447714447975159, + "rewards/margins": 0.5154433846473694, + "rewards/rejected": -0.5499205589294434, + "step": 180 + }, + { + "epoch": 1.0278940027894004, + "grad_norm": 1.5967351198196411, + "learning_rate": 3.6625514403292183e-06, + "logits/chosen": -6.067181587219238, + "logits/rejected": -6.06889533996582, + "logps/chosen": -399.98968505859375, + "logps/rejected": -447.72039794921875, + "loss": 0.4079257011413574, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.03092392347753048, + "rewards/margins": 0.7688177824020386, + "rewards/rejected": -0.7378939390182495, + "step": 185 + }, + { + "epoch": 1.0557880055788005, + "grad_norm": 1.7787078619003296, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.146653175354004, + "logits/rejected": -6.154219627380371, + "logps/chosen": -419.3291015625, + "logps/rejected": -463.1239318847656, + "loss": 0.42812933921813967, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.007557299919426441, + "rewards/margins": 0.7340337634086609, + "rewards/rejected": -0.7415911555290222, + "step": 190 + }, + { + "epoch": 1.0836820083682008, + "grad_norm": 1.7694693803787231, + "learning_rate": 3.559670781893004e-06, + "logits/chosen": -6.160831928253174, + "logits/rejected": -6.151050567626953, + "logps/chosen": -379.6001281738281, + "logps/rejected": -394.00531005859375, + "loss": 0.41410012245178224, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.03558122366666794, + "rewards/margins": 0.8024934530258179, + "rewards/rejected": -0.7669121623039246, + "step": 195 + }, + { + "epoch": 1.1115760111576012, + "grad_norm": 1.3202013969421387, + "learning_rate": 3.5082304526748973e-06, + "logits/chosen": -6.110814094543457, + "logits/rejected": -6.176726341247559, + "logps/chosen": -400.85052490234375, + "logps/rejected": -405.44195556640625, + "loss": 0.391094446182251, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.02833392843604088, + "rewards/margins": 0.8581112623214722, + "rewards/rejected": -0.8864452242851257, + "step": 200 + }, + { + "epoch": 1.1394700139470013, + "grad_norm": 1.9367257356643677, + "learning_rate": 3.4567901234567904e-06, + "logits/chosen": -6.311105728149414, + "logits/rejected": -6.179243087768555, + "logps/chosen": -440.97625732421875, + "logps/rejected": -463.1673278808594, + "loss": 0.3870258331298828, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.012000990100204945, + "rewards/margins": 0.9945917129516602, + "rewards/rejected": -1.0065927505493164, + "step": 205 + }, + { + "epoch": 1.1673640167364017, + "grad_norm": 2.4570703506469727, + "learning_rate": 3.405349794238683e-06, + "logits/chosen": -6.199883937835693, + "logits/rejected": -6.160645484924316, + "logps/chosen": -448.8758850097656, + "logps/rejected": -439.4584045410156, + "loss": 0.3908271551132202, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04756501317024231, + "rewards/margins": 0.8816453218460083, + "rewards/rejected": -0.8340802192687988, + "step": 210 + }, + { + "epoch": 1.195258019525802, + "grad_norm": 1.5992087125778198, + "learning_rate": 3.3539094650205767e-06, + "logits/chosen": -6.163644313812256, + "logits/rejected": -6.093722343444824, + "logps/chosen": -449.8214416503906, + "logps/rejected": -481.3743591308594, + "loss": 0.3612337350845337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09812992066144943, + "rewards/margins": 0.9562546014785767, + "rewards/rejected": -0.858124852180481, + "step": 215 + }, + { + "epoch": 1.2231520223152021, + "grad_norm": 1.4101840257644653, + "learning_rate": 3.30246913580247e-06, + "logits/chosen": -6.281071662902832, + "logits/rejected": -6.337766170501709, + "logps/chosen": -281.46795654296875, + "logps/rejected": -336.60845947265625, + "loss": 0.43022546768188474, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.1486438810825348, + "rewards/margins": 0.7064955234527588, + "rewards/rejected": -0.855139434337616, + "step": 220 + }, + { + "epoch": 1.2510460251046025, + "grad_norm": 1.5817450284957886, + "learning_rate": 3.2510288065843625e-06, + "logits/chosen": -6.1745758056640625, + "logits/rejected": -6.192706108093262, + "logps/chosen": -399.51190185546875, + "logps/rejected": -424.1844177246094, + "loss": 0.3992297887802124, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.051351286470890045, + "rewards/margins": 0.827163815498352, + "rewards/rejected": -0.8785150647163391, + "step": 225 + }, + { + "epoch": 1.2789400278940029, + "grad_norm": 1.3157438039779663, + "learning_rate": 3.1995884773662556e-06, + "logits/chosen": -6.1543779373168945, + "logits/rejected": -6.1732258796691895, + "logps/chosen": -425.80755615234375, + "logps/rejected": -447.39453125, + "loss": 0.35113141536712644, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.01585063710808754, + "rewards/margins": 1.0381678342819214, + "rewards/rejected": -1.0223171710968018, + "step": 230 + }, + { + "epoch": 1.3068340306834032, + "grad_norm": 1.4981003999710083, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.220085620880127, + "logits/rejected": -6.215539455413818, + "logps/chosen": -393.77716064453125, + "logps/rejected": -474.4695739746094, + "loss": 0.35107009410858153, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.07319364696741104, + "rewards/margins": 1.0109622478485107, + "rewards/rejected": -0.9377686381340027, + "step": 235 + }, + { + "epoch": 1.3347280334728033, + "grad_norm": 1.6417901515960693, + "learning_rate": 3.0967078189300415e-06, + "logits/chosen": -6.223210334777832, + "logits/rejected": -6.187335968017578, + "logps/chosen": -454.0006408691406, + "logps/rejected": -439.3124084472656, + "loss": 0.3300657272338867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09006929397583008, + "rewards/margins": 1.089814305305481, + "rewards/rejected": -0.9997450709342957, + "step": 240 + }, + { + "epoch": 1.3626220362622037, + "grad_norm": 1.3642381429672241, + "learning_rate": 3.0452674897119346e-06, + "logits/chosen": -6.211455821990967, + "logits/rejected": -6.1171441078186035, + "logps/chosen": -402.8586730957031, + "logps/rejected": -431.7958984375, + "loss": 0.3634498119354248, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0397757962346077, + "rewards/margins": 1.0654242038726807, + "rewards/rejected": -1.1052000522613525, + "step": 245 + }, + { + "epoch": 1.390516039051604, + "grad_norm": 1.9008878469467163, + "learning_rate": 2.9938271604938273e-06, + "logits/chosen": -6.2566046714782715, + "logits/rejected": -6.222296714782715, + "logps/chosen": -433.3427734375, + "logps/rejected": -489.8169860839844, + "loss": 0.3385239839553833, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04979880154132843, + "rewards/margins": 1.0801963806152344, + "rewards/rejected": -1.0303975343704224, + "step": 250 + }, + { + "epoch": 1.4184100418410042, + "grad_norm": 1.5302000045776367, + "learning_rate": 2.9423868312757204e-06, + "logits/chosen": -6.219883918762207, + "logits/rejected": -6.1428399085998535, + "logps/chosen": -428.93914794921875, + "logps/rejected": -493.17437744140625, + "loss": 0.3642880916595459, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09988997876644135, + "rewards/margins": 1.0157719850540161, + "rewards/rejected": -1.1156619787216187, + "step": 255 + }, + { + "epoch": 1.4463040446304045, + "grad_norm": 1.898163080215454, + "learning_rate": 2.890946502057613e-06, + "logits/chosen": -6.123999118804932, + "logits/rejected": -6.1102776527404785, + "logps/chosen": -494.8221740722656, + "logps/rejected": -464.3958435058594, + "loss": 0.31599912643432615, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03670971840620041, + "rewards/margins": 1.1374431848526, + "rewards/rejected": -1.1741528511047363, + "step": 260 + }, + { + "epoch": 1.4741980474198049, + "grad_norm": 1.7610703706741333, + "learning_rate": 2.8395061728395062e-06, + "logits/chosen": -6.1201653480529785, + "logits/rejected": -6.155616760253906, + "logps/chosen": -489.81201171875, + "logps/rejected": -511.11724853515625, + "loss": 0.30640478134155275, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.057888150215148926, + "rewards/margins": 1.2739557027816772, + "rewards/rejected": -1.3318438529968262, + "step": 265 + }, + { + "epoch": 1.502092050209205, + "grad_norm": 1.3035284280776978, + "learning_rate": 2.7880658436213994e-06, + "logits/chosen": -6.300120830535889, + "logits/rejected": -6.1527228355407715, + "logps/chosen": -433.002685546875, + "logps/rejected": -471.416015625, + "loss": 0.2971210956573486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03341478109359741, + "rewards/margins": 1.295318841934204, + "rewards/rejected": -1.261904001235962, + "step": 270 + }, + { + "epoch": 1.5299860529986054, + "grad_norm": 1.7996495962142944, + "learning_rate": 2.736625514403292e-06, + "logits/chosen": -6.244246482849121, + "logits/rejected": -6.148303985595703, + "logps/chosen": -403.05133056640625, + "logps/rejected": -380.7239990234375, + "loss": 0.36082763671875, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.016453832387924194, + "rewards/margins": 1.103393793106079, + "rewards/rejected": -1.1198475360870361, + "step": 275 + }, + { + "epoch": 1.5578800557880057, + "grad_norm": 2.193908929824829, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.1752519607543945, + "logits/rejected": -6.249385833740234, + "logps/chosen": -482.1385803222656, + "logps/rejected": -504.50311279296875, + "loss": 0.3047311782836914, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.20222747325897217, + "rewards/margins": 1.3389281034469604, + "rewards/rejected": -1.1367007493972778, + "step": 280 + }, + { + "epoch": 1.5857740585774058, + "grad_norm": 1.3961774110794067, + "learning_rate": 2.6337448559670788e-06, + "logits/chosen": -6.2296528816223145, + "logits/rejected": -6.2554612159729, + "logps/chosen": -370.95428466796875, + "logps/rejected": -465.4556579589844, + "loss": 0.31553618907928466, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10641174018383026, + "rewards/margins": 1.2302197217941284, + "rewards/rejected": -1.336631417274475, + "step": 285 + }, + { + "epoch": 1.6136680613668062, + "grad_norm": 1.296229600906372, + "learning_rate": 2.5823045267489715e-06, + "logits/chosen": -6.245091438293457, + "logits/rejected": -6.184089660644531, + "logps/chosen": -420.3929138183594, + "logps/rejected": -484.88934326171875, + "loss": 0.28981173038482666, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03132909908890724, + "rewards/margins": 1.3171762228012085, + "rewards/rejected": -1.3485053777694702, + "step": 290 + }, + { + "epoch": 1.6415620641562065, + "grad_norm": 1.6442267894744873, + "learning_rate": 2.5308641975308646e-06, + "logits/chosen": -6.171110153198242, + "logits/rejected": -6.142795562744141, + "logps/chosen": -480.75726318359375, + "logps/rejected": -476.60552978515625, + "loss": 0.31265769004821775, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19172248244285583, + "rewards/margins": 1.2785747051239014, + "rewards/rejected": -1.0868523120880127, + "step": 295 + }, + { + "epoch": 1.6694560669456067, + "grad_norm": 1.4329712390899658, + "learning_rate": 2.4794238683127577e-06, + "logits/chosen": -6.2438530921936035, + "logits/rejected": -6.167972564697266, + "logps/chosen": -398.7960510253906, + "logps/rejected": -416.4847717285156, + "loss": 0.3146506786346436, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.06857718527317047, + "rewards/margins": 1.1940205097198486, + "rewards/rejected": -1.2625977993011475, + "step": 300 + }, + { + "epoch": 1.697350069735007, + "grad_norm": 1.2974578142166138, + "learning_rate": 2.4279835390946504e-06, + "logits/chosen": -6.2253522872924805, + "logits/rejected": -6.272657871246338, + "logps/chosen": -405.8463439941406, + "logps/rejected": -451.399169921875, + "loss": 0.2888355255126953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.040930021554231644, + "rewards/margins": 1.2757234573364258, + "rewards/rejected": -1.3166534900665283, + "step": 305 + }, + { + "epoch": 1.7252440725244074, + "grad_norm": 1.865118384361267, + "learning_rate": 2.3765432098765435e-06, + "logits/chosen": -6.285956382751465, + "logits/rejected": -6.284170627593994, + "logps/chosen": -376.63543701171875, + "logps/rejected": -404.28131103515625, + "loss": 0.34237470626831057, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.3045514225959778, + "rewards/margins": 1.1192312240600586, + "rewards/rejected": -1.4237825870513916, + "step": 310 + }, + { + "epoch": 1.7531380753138075, + "grad_norm": 1.3118988275527954, + "learning_rate": 2.3251028806584362e-06, + "logits/chosen": -6.330617904663086, + "logits/rejected": -6.2173357009887695, + "logps/chosen": -411.6089782714844, + "logps/rejected": -405.8570861816406, + "loss": 0.30266666412353516, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.08704517036676407, + "rewards/margins": 1.2953578233718872, + "rewards/rejected": -1.382403016090393, + "step": 315 + }, + { + "epoch": 1.7810320781032078, + "grad_norm": 2.04417085647583, + "learning_rate": 2.2736625514403294e-06, + "logits/chosen": -6.217926502227783, + "logits/rejected": -6.2158918380737305, + "logps/chosen": -472.285400390625, + "logps/rejected": -431.4971618652344, + "loss": 0.2793667078018188, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1601211428642273, + "rewards/margins": 1.4048590660095215, + "rewards/rejected": -1.564980149269104, + "step": 320 + }, + { + "epoch": 1.8089260808926082, + "grad_norm": 1.6911827325820923, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.251864910125732, + "logits/rejected": -6.211045742034912, + "logps/chosen": -341.28216552734375, + "logps/rejected": -376.18780517578125, + "loss": 0.3147707223892212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09754385054111481, + "rewards/margins": 1.1059117317199707, + "rewards/rejected": -1.2034555673599243, + "step": 325 + }, + { + "epoch": 1.8368200836820083, + "grad_norm": 1.974066138267517, + "learning_rate": 2.1707818930041156e-06, + "logits/chosen": -6.293444633483887, + "logits/rejected": -6.139300346374512, + "logps/chosen": -379.49871826171875, + "logps/rejected": -472.8863830566406, + "loss": 0.25607054233551024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1596871316432953, + "rewards/margins": 1.4329643249511719, + "rewards/rejected": -1.2732770442962646, + "step": 330 + }, + { + "epoch": 1.8647140864714087, + "grad_norm": 1.5956170558929443, + "learning_rate": 2.1193415637860083e-06, + "logits/chosen": -6.274291038513184, + "logits/rejected": -6.236454010009766, + "logps/chosen": -415.7079162597656, + "logps/rejected": -418.0286560058594, + "loss": 0.27625508308410646, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.059024881571531296, + "rewards/margins": 1.4343515634536743, + "rewards/rejected": -1.3753265142440796, + "step": 335 + }, + { + "epoch": 1.892608089260809, + "grad_norm": 1.2464466094970703, + "learning_rate": 2.0679012345679015e-06, + "logits/chosen": -6.2509446144104, + "logits/rejected": -6.182458400726318, + "logps/chosen": -478.2040100097656, + "logps/rejected": -437.84063720703125, + "loss": 0.2548848867416382, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.04282323271036148, + "rewards/margins": 1.5546890497207642, + "rewards/rejected": -1.597512125968933, + "step": 340 + }, + { + "epoch": 1.9205020920502092, + "grad_norm": 2.1779489517211914, + "learning_rate": 2.0164609053497946e-06, + "logits/chosen": -6.29345703125, + "logits/rejected": -6.269745826721191, + "logps/chosen": -481.49871826171875, + "logps/rejected": -510.9442443847656, + "loss": 0.24130442142486572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11563090234994888, + "rewards/margins": 1.6354728937149048, + "rewards/rejected": -1.7511036396026611, + "step": 345 + }, + { + "epoch": 1.9483960948396095, + "grad_norm": 1.3139598369598389, + "learning_rate": 1.9650205761316873e-06, + "logits/chosen": -6.220123291015625, + "logits/rejected": -6.242280006408691, + "logps/chosen": -407.80804443359375, + "logps/rejected": -401.532958984375, + "loss": 0.31305124759674074, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2018294781446457, + "rewards/margins": 1.1702749729156494, + "rewards/rejected": -1.372104525566101, + "step": 350 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-350/training_args.bin b/gemma-2b-dpo/checkpoint-350/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-350/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-400/README.md b/gemma-2b-dpo/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-400/adapter_config.json b/gemma-2b-dpo/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-400/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbf748a1566f28e415f8bd9a5ef9270d76e4d6f4 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712d8b51b1dec708f280f564f30ff1139903cb69596c08f1765b12afa08e9c0b +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-400/chat_template.jinja b/gemma-2b-dpo/checkpoint-400/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-400/optimizer.pt b/gemma-2b-dpo/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..467d29c04a922c78c84b7637eae39f1fd9979f59 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0310087f6c3af3f3e16df2af2e43051e1cf47f81c8a72b37ef5bb2fc80ebc346 +size 42616772 diff --git a/gemma-2b-dpo/checkpoint-400/rng_state.pth b/gemma-2b-dpo/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e29ad0909e8c1afefa182e7ed890fa257c75af25 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf9097d4513154245c48236b6ec5137b7ee2a21c9f58f2cba798ea275c6026f +size 14244 diff --git a/gemma-2b-dpo/checkpoint-400/scheduler.pt b/gemma-2b-dpo/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f041695c05ff2047cfe01f78c397b7836ff2510d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d934eac960d3da1d255bdc473c6a5320bc323d4c76e2e5703e27873b2b5223d4 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-400/tokenizer.json b/gemma-2b-dpo/checkpoint-400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-400/tokenizer_config.json b/gemma-2b-dpo/checkpoint-400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-400/trainer_state.json b/gemma-2b-dpo/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ed75a27e417aa09a67c6ca61f2739005b181df1b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/trainer_state.json @@ -0,0 +1,1234 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2231520223152024, + "eval_steps": 500, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + }, + { + "epoch": 0.5857740585774058, + "grad_norm": 2.6023478507995605, + "learning_rate": 4.485596707818931e-06, + "logits/chosen": -6.196796894073486, + "logits/rejected": -6.226622104644775, + "logps/chosen": -442.52685546875, + "logps/rejected": -516.7334594726562, + "loss": 0.6245638847351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18519389629364014, + "rewards/margins": 0.24079546332359314, + "rewards/rejected": -0.42598938941955566, + "step": 105 + }, + { + "epoch": 0.6136680613668062, + "grad_norm": 2.0638511180877686, + "learning_rate": 4.434156378600823e-06, + "logits/chosen": -6.1991071701049805, + "logits/rejected": -6.119466781616211, + "logps/chosen": -410.86669921875, + "logps/rejected": -450.365478515625, + "loss": 0.6201879501342773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0853937491774559, + "rewards/margins": 0.17646726965904236, + "rewards/rejected": -0.26186102628707886, + "step": 110 + }, + { + "epoch": 0.6415620641562064, + "grad_norm": 2.3625364303588867, + "learning_rate": 4.382716049382716e-06, + "logits/chosen": -6.220386505126953, + "logits/rejected": -6.223449230194092, + "logps/chosen": -435.92626953125, + "logps/rejected": -495.6065368652344, + "loss": 0.6151515483856201, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2208656519651413, + "rewards/margins": 0.21172885596752167, + "rewards/rejected": -0.43259453773498535, + "step": 115 + }, + { + "epoch": 0.6694560669456067, + "grad_norm": 1.8082666397094727, + "learning_rate": 4.331275720164609e-06, + "logits/chosen": -6.262181282043457, + "logits/rejected": -6.250016212463379, + "logps/chosen": -354.21795654296875, + "logps/rejected": -389.14556884765625, + "loss": 0.6109379768371582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17473874986171722, + "rewards/margins": 0.19315743446350098, + "rewards/rejected": -0.367896169424057, + "step": 120 + }, + { + "epoch": 0.697350069735007, + "grad_norm": 1.9556658267974854, + "learning_rate": 4.2798353909465025e-06, + "logits/chosen": -6.167700290679932, + "logits/rejected": -6.1421003341674805, + "logps/chosen": -379.1827392578125, + "logps/rejected": -426.69549560546875, + "loss": 0.6202447414398193, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17535170912742615, + "rewards/margins": 0.18863627314567566, + "rewards/rejected": -0.3639879822731018, + "step": 125 + }, + { + "epoch": 0.7252440725244073, + "grad_norm": 3.001298666000366, + "learning_rate": 4.228395061728396e-06, + "logits/chosen": -6.2535905838012695, + "logits/rejected": -6.232400894165039, + "logps/chosen": -424.8458557128906, + "logps/rejected": -494.52960205078125, + "loss": 0.5493914127349854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22046081721782684, + "rewards/margins": 0.3785194754600525, + "rewards/rejected": -0.5989803075790405, + "step": 130 + }, + { + "epoch": 0.7531380753138075, + "grad_norm": 2.5210413932800293, + "learning_rate": 4.176954732510288e-06, + "logits/chosen": -6.078260898590088, + "logits/rejected": -6.0126447677612305, + "logps/chosen": -414.69940185546875, + "logps/rejected": -432.3282165527344, + "loss": 0.579456901550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15084640681743622, + "rewards/margins": 0.2960701882839203, + "rewards/rejected": -0.4469165802001953, + "step": 135 + }, + { + "epoch": 0.7810320781032078, + "grad_norm": 2.6807265281677246, + "learning_rate": 4.125514403292181e-06, + "logits/chosen": -6.243051052093506, + "logits/rejected": -6.220357418060303, + "logps/chosen": -400.6156311035156, + "logps/rejected": -450.0393981933594, + "loss": 0.5514531135559082, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25727975368499756, + "rewards/margins": 0.3991738259792328, + "rewards/rejected": -0.6564534902572632, + "step": 140 + }, + { + "epoch": 0.8089260808926081, + "grad_norm": 2.4137353897094727, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.186558246612549, + "logits/rejected": -6.139374256134033, + "logps/chosen": -442.314453125, + "logps/rejected": -491.584716796875, + "loss": 0.5633067131042481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35670942068099976, + "rewards/margins": 0.38038796186447144, + "rewards/rejected": -0.7370973825454712, + "step": 145 + }, + { + "epoch": 0.8368200836820083, + "grad_norm": 2.1043145656585693, + "learning_rate": 4.022633744855967e-06, + "logits/chosen": -6.177689552307129, + "logits/rejected": -6.167322635650635, + "logps/chosen": -435.2288513183594, + "logps/rejected": -469.41436767578125, + "loss": 0.5640112876892089, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.32313138246536255, + "rewards/margins": 0.45979684591293335, + "rewards/rejected": -0.7829282283782959, + "step": 150 + }, + { + "epoch": 0.8647140864714087, + "grad_norm": 1.9623620510101318, + "learning_rate": 3.97119341563786e-06, + "logits/chosen": -6.0590739250183105, + "logits/rejected": -6.033650875091553, + "logps/chosen": -421.5000915527344, + "logps/rejected": -370.30902099609375, + "loss": 0.6319089412689209, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29380694031715393, + "rewards/margins": 0.1507532149553299, + "rewards/rejected": -0.44456014037132263, + "step": 155 + }, + { + "epoch": 0.8926080892608089, + "grad_norm": 1.9432786703109741, + "learning_rate": 3.9197530864197535e-06, + "logits/chosen": -6.267019271850586, + "logits/rejected": -6.214621067047119, + "logps/chosen": -417.11724853515625, + "logps/rejected": -431.72698974609375, + "loss": 0.5186795234680176, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18987610936164856, + "rewards/margins": 0.5616164803504944, + "rewards/rejected": -0.7514925599098206, + "step": 160 + }, + { + "epoch": 0.9205020920502092, + "grad_norm": 1.8146827220916748, + "learning_rate": 3.868312757201647e-06, + "logits/chosen": -6.230213165283203, + "logits/rejected": -6.109362602233887, + "logps/chosen": -376.6744384765625, + "logps/rejected": -376.6526184082031, + "loss": 0.5346522808074952, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19152329862117767, + "rewards/margins": 0.38060134649276733, + "rewards/rejected": -0.5721246004104614, + "step": 165 + }, + { + "epoch": 0.9483960948396095, + "grad_norm": 1.936680793762207, + "learning_rate": 3.81687242798354e-06, + "logits/chosen": -6.194340705871582, + "logits/rejected": -6.1444597244262695, + "logps/chosen": -389.017822265625, + "logps/rejected": -445.76544189453125, + "loss": 0.49420690536499023, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.055042725056409836, + "rewards/margins": 0.5315954685211182, + "rewards/rejected": -0.5866381525993347, + "step": 170 + }, + { + "epoch": 0.9762900976290098, + "grad_norm": 3.2391903400421143, + "learning_rate": 3.7654320987654325e-06, + "logits/chosen": -6.190616607666016, + "logits/rejected": -6.1138739585876465, + "logps/chosen": -421.6878356933594, + "logps/rejected": -460.1180114746094, + "loss": 0.5374621391296387, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22659805417060852, + "rewards/margins": 0.43736472725868225, + "rewards/rejected": -0.6639627814292908, + "step": 175 + }, + { + "epoch": 1.0, + "grad_norm": 4.300363063812256, + "learning_rate": 3.7139917695473256e-06, + "logits/chosen": -6.227687358856201, + "logits/rejected": -6.129978179931641, + "logps/chosen": -449.4482116699219, + "logps/rejected": -430.434326171875, + "loss": 0.49641432762146, + "rewards/accuracies": 0.8529411554336548, + "rewards/chosen": -0.03447714447975159, + "rewards/margins": 0.5154433846473694, + "rewards/rejected": -0.5499205589294434, + "step": 180 + }, + { + "epoch": 1.0278940027894004, + "grad_norm": 1.5967351198196411, + "learning_rate": 3.6625514403292183e-06, + "logits/chosen": -6.067181587219238, + "logits/rejected": -6.06889533996582, + "logps/chosen": -399.98968505859375, + "logps/rejected": -447.72039794921875, + "loss": 0.4079257011413574, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.03092392347753048, + "rewards/margins": 0.7688177824020386, + "rewards/rejected": -0.7378939390182495, + "step": 185 + }, + { + "epoch": 1.0557880055788005, + "grad_norm": 1.7787078619003296, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.146653175354004, + "logits/rejected": -6.154219627380371, + "logps/chosen": -419.3291015625, + "logps/rejected": -463.1239318847656, + "loss": 0.42812933921813967, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.007557299919426441, + "rewards/margins": 0.7340337634086609, + "rewards/rejected": -0.7415911555290222, + "step": 190 + }, + { + "epoch": 1.0836820083682008, + "grad_norm": 1.7694693803787231, + "learning_rate": 3.559670781893004e-06, + "logits/chosen": -6.160831928253174, + "logits/rejected": -6.151050567626953, + "logps/chosen": -379.6001281738281, + "logps/rejected": -394.00531005859375, + "loss": 0.41410012245178224, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.03558122366666794, + "rewards/margins": 0.8024934530258179, + "rewards/rejected": -0.7669121623039246, + "step": 195 + }, + { + "epoch": 1.1115760111576012, + "grad_norm": 1.3202013969421387, + "learning_rate": 3.5082304526748973e-06, + "logits/chosen": -6.110814094543457, + "logits/rejected": -6.176726341247559, + "logps/chosen": -400.85052490234375, + "logps/rejected": -405.44195556640625, + "loss": 0.391094446182251, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.02833392843604088, + "rewards/margins": 0.8581112623214722, + "rewards/rejected": -0.8864452242851257, + "step": 200 + }, + { + "epoch": 1.1394700139470013, + "grad_norm": 1.9367257356643677, + "learning_rate": 3.4567901234567904e-06, + "logits/chosen": -6.311105728149414, + "logits/rejected": -6.179243087768555, + "logps/chosen": -440.97625732421875, + "logps/rejected": -463.1673278808594, + "loss": 0.3870258331298828, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.012000990100204945, + "rewards/margins": 0.9945917129516602, + "rewards/rejected": -1.0065927505493164, + "step": 205 + }, + { + "epoch": 1.1673640167364017, + "grad_norm": 2.4570703506469727, + "learning_rate": 3.405349794238683e-06, + "logits/chosen": -6.199883937835693, + "logits/rejected": -6.160645484924316, + "logps/chosen": -448.8758850097656, + "logps/rejected": -439.4584045410156, + "loss": 0.3908271551132202, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04756501317024231, + "rewards/margins": 0.8816453218460083, + "rewards/rejected": -0.8340802192687988, + "step": 210 + }, + { + "epoch": 1.195258019525802, + "grad_norm": 1.5992087125778198, + "learning_rate": 3.3539094650205767e-06, + "logits/chosen": -6.163644313812256, + "logits/rejected": -6.093722343444824, + "logps/chosen": -449.8214416503906, + "logps/rejected": -481.3743591308594, + "loss": 0.3612337350845337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09812992066144943, + "rewards/margins": 0.9562546014785767, + "rewards/rejected": -0.858124852180481, + "step": 215 + }, + { + "epoch": 1.2231520223152021, + "grad_norm": 1.4101840257644653, + "learning_rate": 3.30246913580247e-06, + "logits/chosen": -6.281071662902832, + "logits/rejected": -6.337766170501709, + "logps/chosen": -281.46795654296875, + "logps/rejected": -336.60845947265625, + "loss": 0.43022546768188474, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.1486438810825348, + "rewards/margins": 0.7064955234527588, + "rewards/rejected": -0.855139434337616, + "step": 220 + }, + { + "epoch": 1.2510460251046025, + "grad_norm": 1.5817450284957886, + "learning_rate": 3.2510288065843625e-06, + "logits/chosen": -6.1745758056640625, + "logits/rejected": -6.192706108093262, + "logps/chosen": -399.51190185546875, + "logps/rejected": -424.1844177246094, + "loss": 0.3992297887802124, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.051351286470890045, + "rewards/margins": 0.827163815498352, + "rewards/rejected": -0.8785150647163391, + "step": 225 + }, + { + "epoch": 1.2789400278940029, + "grad_norm": 1.3157438039779663, + "learning_rate": 3.1995884773662556e-06, + "logits/chosen": -6.1543779373168945, + "logits/rejected": -6.1732258796691895, + "logps/chosen": -425.80755615234375, + "logps/rejected": -447.39453125, + "loss": 0.35113141536712644, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.01585063710808754, + "rewards/margins": 1.0381678342819214, + "rewards/rejected": -1.0223171710968018, + "step": 230 + }, + { + "epoch": 1.3068340306834032, + "grad_norm": 1.4981003999710083, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.220085620880127, + "logits/rejected": -6.215539455413818, + "logps/chosen": -393.77716064453125, + "logps/rejected": -474.4695739746094, + "loss": 0.35107009410858153, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.07319364696741104, + "rewards/margins": 1.0109622478485107, + "rewards/rejected": -0.9377686381340027, + "step": 235 + }, + { + "epoch": 1.3347280334728033, + "grad_norm": 1.6417901515960693, + "learning_rate": 3.0967078189300415e-06, + "logits/chosen": -6.223210334777832, + "logits/rejected": -6.187335968017578, + "logps/chosen": -454.0006408691406, + "logps/rejected": -439.3124084472656, + "loss": 0.3300657272338867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09006929397583008, + "rewards/margins": 1.089814305305481, + "rewards/rejected": -0.9997450709342957, + "step": 240 + }, + { + "epoch": 1.3626220362622037, + "grad_norm": 1.3642381429672241, + "learning_rate": 3.0452674897119346e-06, + "logits/chosen": -6.211455821990967, + "logits/rejected": -6.1171441078186035, + "logps/chosen": -402.8586730957031, + "logps/rejected": -431.7958984375, + "loss": 0.3634498119354248, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0397757962346077, + "rewards/margins": 1.0654242038726807, + "rewards/rejected": -1.1052000522613525, + "step": 245 + }, + { + "epoch": 1.390516039051604, + "grad_norm": 1.9008878469467163, + "learning_rate": 2.9938271604938273e-06, + "logits/chosen": -6.2566046714782715, + "logits/rejected": -6.222296714782715, + "logps/chosen": -433.3427734375, + "logps/rejected": -489.8169860839844, + "loss": 0.3385239839553833, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04979880154132843, + "rewards/margins": 1.0801963806152344, + "rewards/rejected": -1.0303975343704224, + "step": 250 + }, + { + "epoch": 1.4184100418410042, + "grad_norm": 1.5302000045776367, + "learning_rate": 2.9423868312757204e-06, + "logits/chosen": -6.219883918762207, + "logits/rejected": -6.1428399085998535, + "logps/chosen": -428.93914794921875, + "logps/rejected": -493.17437744140625, + "loss": 0.3642880916595459, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09988997876644135, + "rewards/margins": 1.0157719850540161, + "rewards/rejected": -1.1156619787216187, + "step": 255 + }, + { + "epoch": 1.4463040446304045, + "grad_norm": 1.898163080215454, + "learning_rate": 2.890946502057613e-06, + "logits/chosen": -6.123999118804932, + "logits/rejected": -6.1102776527404785, + "logps/chosen": -494.8221740722656, + "logps/rejected": -464.3958435058594, + "loss": 0.31599912643432615, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03670971840620041, + "rewards/margins": 1.1374431848526, + "rewards/rejected": -1.1741528511047363, + "step": 260 + }, + { + "epoch": 1.4741980474198049, + "grad_norm": 1.7610703706741333, + "learning_rate": 2.8395061728395062e-06, + "logits/chosen": -6.1201653480529785, + "logits/rejected": -6.155616760253906, + "logps/chosen": -489.81201171875, + "logps/rejected": -511.11724853515625, + "loss": 0.30640478134155275, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.057888150215148926, + "rewards/margins": 1.2739557027816772, + "rewards/rejected": -1.3318438529968262, + "step": 265 + }, + { + "epoch": 1.502092050209205, + "grad_norm": 1.3035284280776978, + "learning_rate": 2.7880658436213994e-06, + "logits/chosen": -6.300120830535889, + "logits/rejected": -6.1527228355407715, + "logps/chosen": -433.002685546875, + "logps/rejected": -471.416015625, + "loss": 0.2971210956573486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03341478109359741, + "rewards/margins": 1.295318841934204, + "rewards/rejected": -1.261904001235962, + "step": 270 + }, + { + "epoch": 1.5299860529986054, + "grad_norm": 1.7996495962142944, + "learning_rate": 2.736625514403292e-06, + "logits/chosen": -6.244246482849121, + "logits/rejected": -6.148303985595703, + "logps/chosen": -403.05133056640625, + "logps/rejected": -380.7239990234375, + "loss": 0.36082763671875, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.016453832387924194, + "rewards/margins": 1.103393793106079, + "rewards/rejected": -1.1198475360870361, + "step": 275 + }, + { + "epoch": 1.5578800557880057, + "grad_norm": 2.193908929824829, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.1752519607543945, + "logits/rejected": -6.249385833740234, + "logps/chosen": -482.1385803222656, + "logps/rejected": -504.50311279296875, + "loss": 0.3047311782836914, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.20222747325897217, + "rewards/margins": 1.3389281034469604, + "rewards/rejected": -1.1367007493972778, + "step": 280 + }, + { + "epoch": 1.5857740585774058, + "grad_norm": 1.3961774110794067, + "learning_rate": 2.6337448559670788e-06, + "logits/chosen": -6.2296528816223145, + "logits/rejected": -6.2554612159729, + "logps/chosen": -370.95428466796875, + "logps/rejected": -465.4556579589844, + "loss": 0.31553618907928466, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10641174018383026, + "rewards/margins": 1.2302197217941284, + "rewards/rejected": -1.336631417274475, + "step": 285 + }, + { + "epoch": 1.6136680613668062, + "grad_norm": 1.296229600906372, + "learning_rate": 2.5823045267489715e-06, + "logits/chosen": -6.245091438293457, + "logits/rejected": -6.184089660644531, + "logps/chosen": -420.3929138183594, + "logps/rejected": -484.88934326171875, + "loss": 0.28981173038482666, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03132909908890724, + "rewards/margins": 1.3171762228012085, + "rewards/rejected": -1.3485053777694702, + "step": 290 + }, + { + "epoch": 1.6415620641562065, + "grad_norm": 1.6442267894744873, + "learning_rate": 2.5308641975308646e-06, + "logits/chosen": -6.171110153198242, + "logits/rejected": -6.142795562744141, + "logps/chosen": -480.75726318359375, + "logps/rejected": -476.60552978515625, + "loss": 0.31265769004821775, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19172248244285583, + "rewards/margins": 1.2785747051239014, + "rewards/rejected": -1.0868523120880127, + "step": 295 + }, + { + "epoch": 1.6694560669456067, + "grad_norm": 1.4329712390899658, + "learning_rate": 2.4794238683127577e-06, + "logits/chosen": -6.2438530921936035, + "logits/rejected": -6.167972564697266, + "logps/chosen": -398.7960510253906, + "logps/rejected": -416.4847717285156, + "loss": 0.3146506786346436, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.06857718527317047, + "rewards/margins": 1.1940205097198486, + "rewards/rejected": -1.2625977993011475, + "step": 300 + }, + { + "epoch": 1.697350069735007, + "grad_norm": 1.2974578142166138, + "learning_rate": 2.4279835390946504e-06, + "logits/chosen": -6.2253522872924805, + "logits/rejected": -6.272657871246338, + "logps/chosen": -405.8463439941406, + "logps/rejected": -451.399169921875, + "loss": 0.2888355255126953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.040930021554231644, + "rewards/margins": 1.2757234573364258, + "rewards/rejected": -1.3166534900665283, + "step": 305 + }, + { + "epoch": 1.7252440725244074, + "grad_norm": 1.865118384361267, + "learning_rate": 2.3765432098765435e-06, + "logits/chosen": -6.285956382751465, + "logits/rejected": -6.284170627593994, + "logps/chosen": -376.63543701171875, + "logps/rejected": -404.28131103515625, + "loss": 0.34237470626831057, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.3045514225959778, + "rewards/margins": 1.1192312240600586, + "rewards/rejected": -1.4237825870513916, + "step": 310 + }, + { + "epoch": 1.7531380753138075, + "grad_norm": 1.3118988275527954, + "learning_rate": 2.3251028806584362e-06, + "logits/chosen": -6.330617904663086, + "logits/rejected": -6.2173357009887695, + "logps/chosen": -411.6089782714844, + "logps/rejected": -405.8570861816406, + "loss": 0.30266666412353516, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.08704517036676407, + "rewards/margins": 1.2953578233718872, + "rewards/rejected": -1.382403016090393, + "step": 315 + }, + { + "epoch": 1.7810320781032078, + "grad_norm": 2.04417085647583, + "learning_rate": 2.2736625514403294e-06, + "logits/chosen": -6.217926502227783, + "logits/rejected": -6.2158918380737305, + "logps/chosen": -472.285400390625, + "logps/rejected": -431.4971618652344, + "loss": 0.2793667078018188, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1601211428642273, + "rewards/margins": 1.4048590660095215, + "rewards/rejected": -1.564980149269104, + "step": 320 + }, + { + "epoch": 1.8089260808926082, + "grad_norm": 1.6911827325820923, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.251864910125732, + "logits/rejected": -6.211045742034912, + "logps/chosen": -341.28216552734375, + "logps/rejected": -376.18780517578125, + "loss": 0.3147707223892212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09754385054111481, + "rewards/margins": 1.1059117317199707, + "rewards/rejected": -1.2034555673599243, + "step": 325 + }, + { + "epoch": 1.8368200836820083, + "grad_norm": 1.974066138267517, + "learning_rate": 2.1707818930041156e-06, + "logits/chosen": -6.293444633483887, + "logits/rejected": -6.139300346374512, + "logps/chosen": -379.49871826171875, + "logps/rejected": -472.8863830566406, + "loss": 0.25607054233551024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1596871316432953, + "rewards/margins": 1.4329643249511719, + "rewards/rejected": -1.2732770442962646, + "step": 330 + }, + { + "epoch": 1.8647140864714087, + "grad_norm": 1.5956170558929443, + "learning_rate": 2.1193415637860083e-06, + "logits/chosen": -6.274291038513184, + "logits/rejected": -6.236454010009766, + "logps/chosen": -415.7079162597656, + "logps/rejected": -418.0286560058594, + "loss": 0.27625508308410646, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.059024881571531296, + "rewards/margins": 1.4343515634536743, + "rewards/rejected": -1.3753265142440796, + "step": 335 + }, + { + "epoch": 1.892608089260809, + "grad_norm": 1.2464466094970703, + "learning_rate": 2.0679012345679015e-06, + "logits/chosen": -6.2509446144104, + "logits/rejected": -6.182458400726318, + "logps/chosen": -478.2040100097656, + "logps/rejected": -437.84063720703125, + "loss": 0.2548848867416382, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.04282323271036148, + "rewards/margins": 1.5546890497207642, + "rewards/rejected": -1.597512125968933, + "step": 340 + }, + { + "epoch": 1.9205020920502092, + "grad_norm": 2.1779489517211914, + "learning_rate": 2.0164609053497946e-06, + "logits/chosen": -6.29345703125, + "logits/rejected": -6.269745826721191, + "logps/chosen": -481.49871826171875, + "logps/rejected": -510.9442443847656, + "loss": 0.24130442142486572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11563090234994888, + "rewards/margins": 1.6354728937149048, + "rewards/rejected": -1.7511036396026611, + "step": 345 + }, + { + "epoch": 1.9483960948396095, + "grad_norm": 1.3139598369598389, + "learning_rate": 1.9650205761316873e-06, + "logits/chosen": -6.220123291015625, + "logits/rejected": -6.242280006408691, + "logps/chosen": -407.80804443359375, + "logps/rejected": -401.532958984375, + "loss": 0.31305124759674074, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2018294781446457, + "rewards/margins": 1.1702749729156494, + "rewards/rejected": -1.372104525566101, + "step": 350 + }, + { + "epoch": 1.9762900976290099, + "grad_norm": 0.7809721827507019, + "learning_rate": 1.9135802469135804e-06, + "logits/chosen": -6.304191589355469, + "logits/rejected": -6.235474109649658, + "logps/chosen": -424.8037109375, + "logps/rejected": -488.02362060546875, + "loss": 0.20149641036987304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028687676414847374, + "rewards/margins": 1.7642078399658203, + "rewards/rejected": -1.7928955554962158, + "step": 355 + }, + { + "epoch": 2.0, + "grad_norm": 1.6750562191009521, + "learning_rate": 1.8621399176954735e-06, + "logits/chosen": -6.249261379241943, + "logits/rejected": -6.197325706481934, + "logps/chosen": -384.4549255371094, + "logps/rejected": -450.3216552734375, + "loss": 0.2427699089050293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29954269528388977, + "rewards/margins": 1.3911824226379395, + "rewards/rejected": -1.6907252073287964, + "step": 360 + }, + { + "epoch": 2.0278940027894, + "grad_norm": 1.4706112146377563, + "learning_rate": 1.8106995884773665e-06, + "logits/chosen": -6.396174430847168, + "logits/rejected": -6.274371147155762, + "logps/chosen": -365.41265869140625, + "logps/rejected": -433.7682189941406, + "loss": 0.2221092700958252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011003097519278526, + "rewards/margins": 1.695127248764038, + "rewards/rejected": -1.7061303853988647, + "step": 365 + }, + { + "epoch": 2.0557880055788007, + "grad_norm": 1.6888645887374878, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.299677848815918, + "logits/rejected": -6.338282108306885, + "logps/chosen": -460.0879821777344, + "logps/rejected": -489.38031005859375, + "loss": 0.19544492959976195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004593986086547375, + "rewards/margins": 1.8440967798233032, + "rewards/rejected": -1.8486907482147217, + "step": 370 + }, + { + "epoch": 2.083682008368201, + "grad_norm": 0.998672604560852, + "learning_rate": 1.7078189300411525e-06, + "logits/chosen": -6.379012584686279, + "logits/rejected": -6.325904846191406, + "logps/chosen": -421.4281311035156, + "logps/rejected": -519.4888916015625, + "loss": 0.198160719871521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06311459839344025, + "rewards/margins": 1.9342191219329834, + "rewards/rejected": -1.8711044788360596, + "step": 375 + }, + { + "epoch": 2.111576011157601, + "grad_norm": 1.0411016941070557, + "learning_rate": 1.6563786008230454e-06, + "logits/chosen": -6.331459999084473, + "logits/rejected": -6.230687141418457, + "logps/chosen": -381.6875915527344, + "logps/rejected": -436.54522705078125, + "loss": 0.17946820259094237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09231789410114288, + "rewards/margins": 1.843112587928772, + "rewards/rejected": -1.9354302883148193, + "step": 380 + }, + { + "epoch": 2.1394700139470015, + "grad_norm": 1.7208154201507568, + "learning_rate": 1.6049382716049383e-06, + "logits/chosen": -6.328245639801025, + "logits/rejected": -6.348081111907959, + "logps/chosen": -479.76763916015625, + "logps/rejected": -452.90863037109375, + "loss": 0.18842675685882568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0751882940530777, + "rewards/margins": 1.8472039699554443, + "rewards/rejected": -1.9223921298980713, + "step": 385 + }, + { + "epoch": 2.1673640167364017, + "grad_norm": 1.2389427423477173, + "learning_rate": 1.5534979423868312e-06, + "logits/chosen": -6.397286891937256, + "logits/rejected": -6.3191328048706055, + "logps/chosen": -385.9268493652344, + "logps/rejected": -412.65350341796875, + "loss": 0.2650484800338745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.056599367409944534, + "rewards/margins": 1.42960524559021, + "rewards/rejected": -1.4862048625946045, + "step": 390 + }, + { + "epoch": 2.195258019525802, + "grad_norm": 1.1179108619689941, + "learning_rate": 1.5020576131687246e-06, + "logits/chosen": -6.415247440338135, + "logits/rejected": -6.397636413574219, + "logps/chosen": -443.06640625, + "logps/rejected": -513.3284912109375, + "loss": 0.18407890796661378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1382242888212204, + "rewards/margins": 1.9546597003936768, + "rewards/rejected": -2.0928843021392822, + "step": 395 + }, + { + "epoch": 2.2231520223152024, + "grad_norm": 0.8621686697006226, + "learning_rate": 1.4506172839506175e-06, + "logits/chosen": -6.363873481750488, + "logits/rejected": -6.3192243576049805, + "logps/chosen": -446.64532470703125, + "logps/rejected": -479.0135192871094, + "loss": 0.1950451135635376, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.012487297877669334, + "rewards/margins": 1.913094162940979, + "rewards/rejected": -1.9255813360214233, + "step": 400 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-400/training_args.bin b/gemma-2b-dpo/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-450/README.md b/gemma-2b-dpo/checkpoint-450/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-450/adapter_config.json b/gemma-2b-dpo/checkpoint-450/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-450/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-450/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22165e622aef4f429f9993539de58f59b463ae78 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15de6b1b1ce0cddb665ae069774d1d4b07904e3d402e4c90ff688618c33382ac +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-450/chat_template.jinja b/gemma-2b-dpo/checkpoint-450/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-450/optimizer.pt b/gemma-2b-dpo/checkpoint-450/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe8d4ba2a716c01cead08c4ea2748bc034277805 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c80f939acc3fe927e1d3b4304c99992e936d0fda63038d66a3487156bda8fe30 +size 42616772 diff --git a/gemma-2b-dpo/checkpoint-450/rng_state.pth b/gemma-2b-dpo/checkpoint-450/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e29ad0909e8c1afefa182e7ed890fa257c75af25 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf9097d4513154245c48236b6ec5137b7ee2a21c9f58f2cba798ea275c6026f +size 14244 diff --git a/gemma-2b-dpo/checkpoint-450/scheduler.pt b/gemma-2b-dpo/checkpoint-450/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d2b8de4fcdaad6d2f270863575a18a779df7212 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3068abe798cbf2c981ee6e233c2da30a3c530489b1722612a94bb00ce6b2b99e +size 1064 diff --git a/gemma-2b-dpo/checkpoint-450/tokenizer.json b/gemma-2b-dpo/checkpoint-450/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-450/tokenizer_config.json b/gemma-2b-dpo/checkpoint-450/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-450/trainer_state.json b/gemma-2b-dpo/checkpoint-450/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0f0ca14efd9f7a4954866cbc21e946db52edd57c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/trainer_state.json @@ -0,0 +1,1384 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.502092050209205, + "eval_steps": 500, + "global_step": 450, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + }, + { + "epoch": 0.5857740585774058, + "grad_norm": 2.6023478507995605, + "learning_rate": 4.485596707818931e-06, + "logits/chosen": -6.196796894073486, + "logits/rejected": -6.226622104644775, + "logps/chosen": -442.52685546875, + "logps/rejected": -516.7334594726562, + "loss": 0.6245638847351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18519389629364014, + "rewards/margins": 0.24079546332359314, + "rewards/rejected": -0.42598938941955566, + "step": 105 + }, + { + "epoch": 0.6136680613668062, + "grad_norm": 2.0638511180877686, + "learning_rate": 4.434156378600823e-06, + "logits/chosen": -6.1991071701049805, + "logits/rejected": -6.119466781616211, + "logps/chosen": -410.86669921875, + "logps/rejected": -450.365478515625, + "loss": 0.6201879501342773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0853937491774559, + "rewards/margins": 0.17646726965904236, + "rewards/rejected": -0.26186102628707886, + "step": 110 + }, + { + "epoch": 0.6415620641562064, + "grad_norm": 2.3625364303588867, + "learning_rate": 4.382716049382716e-06, + "logits/chosen": -6.220386505126953, + "logits/rejected": -6.223449230194092, + "logps/chosen": -435.92626953125, + "logps/rejected": -495.6065368652344, + "loss": 0.6151515483856201, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2208656519651413, + "rewards/margins": 0.21172885596752167, + "rewards/rejected": -0.43259453773498535, + "step": 115 + }, + { + "epoch": 0.6694560669456067, + "grad_norm": 1.8082666397094727, + "learning_rate": 4.331275720164609e-06, + "logits/chosen": -6.262181282043457, + "logits/rejected": -6.250016212463379, + "logps/chosen": -354.21795654296875, + "logps/rejected": -389.14556884765625, + "loss": 0.6109379768371582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17473874986171722, + "rewards/margins": 0.19315743446350098, + "rewards/rejected": -0.367896169424057, + "step": 120 + }, + { + "epoch": 0.697350069735007, + "grad_norm": 1.9556658267974854, + "learning_rate": 4.2798353909465025e-06, + "logits/chosen": -6.167700290679932, + "logits/rejected": -6.1421003341674805, + "logps/chosen": -379.1827392578125, + "logps/rejected": -426.69549560546875, + "loss": 0.6202447414398193, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17535170912742615, + "rewards/margins": 0.18863627314567566, + "rewards/rejected": -0.3639879822731018, + "step": 125 + }, + { + "epoch": 0.7252440725244073, + "grad_norm": 3.001298666000366, + "learning_rate": 4.228395061728396e-06, + "logits/chosen": -6.2535905838012695, + "logits/rejected": -6.232400894165039, + "logps/chosen": -424.8458557128906, + "logps/rejected": -494.52960205078125, + "loss": 0.5493914127349854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22046081721782684, + "rewards/margins": 0.3785194754600525, + "rewards/rejected": -0.5989803075790405, + "step": 130 + }, + { + "epoch": 0.7531380753138075, + "grad_norm": 2.5210413932800293, + "learning_rate": 4.176954732510288e-06, + "logits/chosen": -6.078260898590088, + "logits/rejected": -6.0126447677612305, + "logps/chosen": -414.69940185546875, + "logps/rejected": -432.3282165527344, + "loss": 0.579456901550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15084640681743622, + "rewards/margins": 0.2960701882839203, + "rewards/rejected": -0.4469165802001953, + "step": 135 + }, + { + "epoch": 0.7810320781032078, + "grad_norm": 2.6807265281677246, + "learning_rate": 4.125514403292181e-06, + "logits/chosen": -6.243051052093506, + "logits/rejected": -6.220357418060303, + "logps/chosen": -400.6156311035156, + "logps/rejected": -450.0393981933594, + "loss": 0.5514531135559082, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25727975368499756, + "rewards/margins": 0.3991738259792328, + "rewards/rejected": -0.6564534902572632, + "step": 140 + }, + { + "epoch": 0.8089260808926081, + "grad_norm": 2.4137353897094727, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.186558246612549, + "logits/rejected": -6.139374256134033, + "logps/chosen": -442.314453125, + "logps/rejected": -491.584716796875, + "loss": 0.5633067131042481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35670942068099976, + "rewards/margins": 0.38038796186447144, + "rewards/rejected": -0.7370973825454712, + "step": 145 + }, + { + "epoch": 0.8368200836820083, + "grad_norm": 2.1043145656585693, + "learning_rate": 4.022633744855967e-06, + "logits/chosen": -6.177689552307129, + "logits/rejected": -6.167322635650635, + "logps/chosen": -435.2288513183594, + "logps/rejected": -469.41436767578125, + "loss": 0.5640112876892089, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.32313138246536255, + "rewards/margins": 0.45979684591293335, + "rewards/rejected": -0.7829282283782959, + "step": 150 + }, + { + "epoch": 0.8647140864714087, + "grad_norm": 1.9623620510101318, + "learning_rate": 3.97119341563786e-06, + "logits/chosen": -6.0590739250183105, + "logits/rejected": -6.033650875091553, + "logps/chosen": -421.5000915527344, + "logps/rejected": -370.30902099609375, + "loss": 0.6319089412689209, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29380694031715393, + "rewards/margins": 0.1507532149553299, + "rewards/rejected": -0.44456014037132263, + "step": 155 + }, + { + "epoch": 0.8926080892608089, + "grad_norm": 1.9432786703109741, + "learning_rate": 3.9197530864197535e-06, + "logits/chosen": -6.267019271850586, + "logits/rejected": -6.214621067047119, + "logps/chosen": -417.11724853515625, + "logps/rejected": -431.72698974609375, + "loss": 0.5186795234680176, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18987610936164856, + "rewards/margins": 0.5616164803504944, + "rewards/rejected": -0.7514925599098206, + "step": 160 + }, + { + "epoch": 0.9205020920502092, + "grad_norm": 1.8146827220916748, + "learning_rate": 3.868312757201647e-06, + "logits/chosen": -6.230213165283203, + "logits/rejected": -6.109362602233887, + "logps/chosen": -376.6744384765625, + "logps/rejected": -376.6526184082031, + "loss": 0.5346522808074952, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19152329862117767, + "rewards/margins": 0.38060134649276733, + "rewards/rejected": -0.5721246004104614, + "step": 165 + }, + { + "epoch": 0.9483960948396095, + "grad_norm": 1.936680793762207, + "learning_rate": 3.81687242798354e-06, + "logits/chosen": -6.194340705871582, + "logits/rejected": -6.1444597244262695, + "logps/chosen": -389.017822265625, + "logps/rejected": -445.76544189453125, + "loss": 0.49420690536499023, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.055042725056409836, + "rewards/margins": 0.5315954685211182, + "rewards/rejected": -0.5866381525993347, + "step": 170 + }, + { + "epoch": 0.9762900976290098, + "grad_norm": 3.2391903400421143, + "learning_rate": 3.7654320987654325e-06, + "logits/chosen": -6.190616607666016, + "logits/rejected": -6.1138739585876465, + "logps/chosen": -421.6878356933594, + "logps/rejected": -460.1180114746094, + "loss": 0.5374621391296387, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22659805417060852, + "rewards/margins": 0.43736472725868225, + "rewards/rejected": -0.6639627814292908, + "step": 175 + }, + { + "epoch": 1.0, + "grad_norm": 4.300363063812256, + "learning_rate": 3.7139917695473256e-06, + "logits/chosen": -6.227687358856201, + "logits/rejected": -6.129978179931641, + "logps/chosen": -449.4482116699219, + "logps/rejected": -430.434326171875, + "loss": 0.49641432762146, + "rewards/accuracies": 0.8529411554336548, + "rewards/chosen": -0.03447714447975159, + "rewards/margins": 0.5154433846473694, + "rewards/rejected": -0.5499205589294434, + "step": 180 + }, + { + "epoch": 1.0278940027894004, + "grad_norm": 1.5967351198196411, + "learning_rate": 3.6625514403292183e-06, + "logits/chosen": -6.067181587219238, + "logits/rejected": -6.06889533996582, + "logps/chosen": -399.98968505859375, + "logps/rejected": -447.72039794921875, + "loss": 0.4079257011413574, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.03092392347753048, + "rewards/margins": 0.7688177824020386, + "rewards/rejected": -0.7378939390182495, + "step": 185 + }, + { + "epoch": 1.0557880055788005, + "grad_norm": 1.7787078619003296, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.146653175354004, + "logits/rejected": -6.154219627380371, + "logps/chosen": -419.3291015625, + "logps/rejected": -463.1239318847656, + "loss": 0.42812933921813967, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.007557299919426441, + "rewards/margins": 0.7340337634086609, + "rewards/rejected": -0.7415911555290222, + "step": 190 + }, + { + "epoch": 1.0836820083682008, + "grad_norm": 1.7694693803787231, + "learning_rate": 3.559670781893004e-06, + "logits/chosen": -6.160831928253174, + "logits/rejected": -6.151050567626953, + "logps/chosen": -379.6001281738281, + "logps/rejected": -394.00531005859375, + "loss": 0.41410012245178224, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.03558122366666794, + "rewards/margins": 0.8024934530258179, + "rewards/rejected": -0.7669121623039246, + "step": 195 + }, + { + "epoch": 1.1115760111576012, + "grad_norm": 1.3202013969421387, + "learning_rate": 3.5082304526748973e-06, + "logits/chosen": -6.110814094543457, + "logits/rejected": -6.176726341247559, + "logps/chosen": -400.85052490234375, + "logps/rejected": -405.44195556640625, + "loss": 0.391094446182251, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.02833392843604088, + "rewards/margins": 0.8581112623214722, + "rewards/rejected": -0.8864452242851257, + "step": 200 + }, + { + "epoch": 1.1394700139470013, + "grad_norm": 1.9367257356643677, + "learning_rate": 3.4567901234567904e-06, + "logits/chosen": -6.311105728149414, + "logits/rejected": -6.179243087768555, + "logps/chosen": -440.97625732421875, + "logps/rejected": -463.1673278808594, + "loss": 0.3870258331298828, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.012000990100204945, + "rewards/margins": 0.9945917129516602, + "rewards/rejected": -1.0065927505493164, + "step": 205 + }, + { + "epoch": 1.1673640167364017, + "grad_norm": 2.4570703506469727, + "learning_rate": 3.405349794238683e-06, + "logits/chosen": -6.199883937835693, + "logits/rejected": -6.160645484924316, + "logps/chosen": -448.8758850097656, + "logps/rejected": -439.4584045410156, + "loss": 0.3908271551132202, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04756501317024231, + "rewards/margins": 0.8816453218460083, + "rewards/rejected": -0.8340802192687988, + "step": 210 + }, + { + "epoch": 1.195258019525802, + "grad_norm": 1.5992087125778198, + "learning_rate": 3.3539094650205767e-06, + "logits/chosen": -6.163644313812256, + "logits/rejected": -6.093722343444824, + "logps/chosen": -449.8214416503906, + "logps/rejected": -481.3743591308594, + "loss": 0.3612337350845337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09812992066144943, + "rewards/margins": 0.9562546014785767, + "rewards/rejected": -0.858124852180481, + "step": 215 + }, + { + "epoch": 1.2231520223152021, + "grad_norm": 1.4101840257644653, + "learning_rate": 3.30246913580247e-06, + "logits/chosen": -6.281071662902832, + "logits/rejected": -6.337766170501709, + "logps/chosen": -281.46795654296875, + "logps/rejected": -336.60845947265625, + "loss": 0.43022546768188474, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.1486438810825348, + "rewards/margins": 0.7064955234527588, + "rewards/rejected": -0.855139434337616, + "step": 220 + }, + { + "epoch": 1.2510460251046025, + "grad_norm": 1.5817450284957886, + "learning_rate": 3.2510288065843625e-06, + "logits/chosen": -6.1745758056640625, + "logits/rejected": -6.192706108093262, + "logps/chosen": -399.51190185546875, + "logps/rejected": -424.1844177246094, + "loss": 0.3992297887802124, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.051351286470890045, + "rewards/margins": 0.827163815498352, + "rewards/rejected": -0.8785150647163391, + "step": 225 + }, + { + "epoch": 1.2789400278940029, + "grad_norm": 1.3157438039779663, + "learning_rate": 3.1995884773662556e-06, + "logits/chosen": -6.1543779373168945, + "logits/rejected": -6.1732258796691895, + "logps/chosen": -425.80755615234375, + "logps/rejected": -447.39453125, + "loss": 0.35113141536712644, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.01585063710808754, + "rewards/margins": 1.0381678342819214, + "rewards/rejected": -1.0223171710968018, + "step": 230 + }, + { + "epoch": 1.3068340306834032, + "grad_norm": 1.4981003999710083, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.220085620880127, + "logits/rejected": -6.215539455413818, + "logps/chosen": -393.77716064453125, + "logps/rejected": -474.4695739746094, + "loss": 0.35107009410858153, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.07319364696741104, + "rewards/margins": 1.0109622478485107, + "rewards/rejected": -0.9377686381340027, + "step": 235 + }, + { + "epoch": 1.3347280334728033, + "grad_norm": 1.6417901515960693, + "learning_rate": 3.0967078189300415e-06, + "logits/chosen": -6.223210334777832, + "logits/rejected": -6.187335968017578, + "logps/chosen": -454.0006408691406, + "logps/rejected": -439.3124084472656, + "loss": 0.3300657272338867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09006929397583008, + "rewards/margins": 1.089814305305481, + "rewards/rejected": -0.9997450709342957, + "step": 240 + }, + { + "epoch": 1.3626220362622037, + "grad_norm": 1.3642381429672241, + "learning_rate": 3.0452674897119346e-06, + "logits/chosen": -6.211455821990967, + "logits/rejected": -6.1171441078186035, + "logps/chosen": -402.8586730957031, + "logps/rejected": -431.7958984375, + "loss": 0.3634498119354248, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0397757962346077, + "rewards/margins": 1.0654242038726807, + "rewards/rejected": -1.1052000522613525, + "step": 245 + }, + { + "epoch": 1.390516039051604, + "grad_norm": 1.9008878469467163, + "learning_rate": 2.9938271604938273e-06, + "logits/chosen": -6.2566046714782715, + "logits/rejected": -6.222296714782715, + "logps/chosen": -433.3427734375, + "logps/rejected": -489.8169860839844, + "loss": 0.3385239839553833, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04979880154132843, + "rewards/margins": 1.0801963806152344, + "rewards/rejected": -1.0303975343704224, + "step": 250 + }, + { + "epoch": 1.4184100418410042, + "grad_norm": 1.5302000045776367, + "learning_rate": 2.9423868312757204e-06, + "logits/chosen": -6.219883918762207, + "logits/rejected": -6.1428399085998535, + "logps/chosen": -428.93914794921875, + "logps/rejected": -493.17437744140625, + "loss": 0.3642880916595459, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09988997876644135, + "rewards/margins": 1.0157719850540161, + "rewards/rejected": -1.1156619787216187, + "step": 255 + }, + { + "epoch": 1.4463040446304045, + "grad_norm": 1.898163080215454, + "learning_rate": 2.890946502057613e-06, + "logits/chosen": -6.123999118804932, + "logits/rejected": -6.1102776527404785, + "logps/chosen": -494.8221740722656, + "logps/rejected": -464.3958435058594, + "loss": 0.31599912643432615, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03670971840620041, + "rewards/margins": 1.1374431848526, + "rewards/rejected": -1.1741528511047363, + "step": 260 + }, + { + "epoch": 1.4741980474198049, + "grad_norm": 1.7610703706741333, + "learning_rate": 2.8395061728395062e-06, + "logits/chosen": -6.1201653480529785, + "logits/rejected": -6.155616760253906, + "logps/chosen": -489.81201171875, + "logps/rejected": -511.11724853515625, + "loss": 0.30640478134155275, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.057888150215148926, + "rewards/margins": 1.2739557027816772, + "rewards/rejected": -1.3318438529968262, + "step": 265 + }, + { + "epoch": 1.502092050209205, + "grad_norm": 1.3035284280776978, + "learning_rate": 2.7880658436213994e-06, + "logits/chosen": -6.300120830535889, + "logits/rejected": -6.1527228355407715, + "logps/chosen": -433.002685546875, + "logps/rejected": -471.416015625, + "loss": 0.2971210956573486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03341478109359741, + "rewards/margins": 1.295318841934204, + "rewards/rejected": -1.261904001235962, + "step": 270 + }, + { + "epoch": 1.5299860529986054, + "grad_norm": 1.7996495962142944, + "learning_rate": 2.736625514403292e-06, + "logits/chosen": -6.244246482849121, + "logits/rejected": -6.148303985595703, + "logps/chosen": -403.05133056640625, + "logps/rejected": -380.7239990234375, + "loss": 0.36082763671875, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.016453832387924194, + "rewards/margins": 1.103393793106079, + "rewards/rejected": -1.1198475360870361, + "step": 275 + }, + { + "epoch": 1.5578800557880057, + "grad_norm": 2.193908929824829, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.1752519607543945, + "logits/rejected": -6.249385833740234, + "logps/chosen": -482.1385803222656, + "logps/rejected": -504.50311279296875, + "loss": 0.3047311782836914, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.20222747325897217, + "rewards/margins": 1.3389281034469604, + "rewards/rejected": -1.1367007493972778, + "step": 280 + }, + { + "epoch": 1.5857740585774058, + "grad_norm": 1.3961774110794067, + "learning_rate": 2.6337448559670788e-06, + "logits/chosen": -6.2296528816223145, + "logits/rejected": -6.2554612159729, + "logps/chosen": -370.95428466796875, + "logps/rejected": -465.4556579589844, + "loss": 0.31553618907928466, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10641174018383026, + "rewards/margins": 1.2302197217941284, + "rewards/rejected": -1.336631417274475, + "step": 285 + }, + { + "epoch": 1.6136680613668062, + "grad_norm": 1.296229600906372, + "learning_rate": 2.5823045267489715e-06, + "logits/chosen": -6.245091438293457, + "logits/rejected": -6.184089660644531, + "logps/chosen": -420.3929138183594, + "logps/rejected": -484.88934326171875, + "loss": 0.28981173038482666, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03132909908890724, + "rewards/margins": 1.3171762228012085, + "rewards/rejected": -1.3485053777694702, + "step": 290 + }, + { + "epoch": 1.6415620641562065, + "grad_norm": 1.6442267894744873, + "learning_rate": 2.5308641975308646e-06, + "logits/chosen": -6.171110153198242, + "logits/rejected": -6.142795562744141, + "logps/chosen": -480.75726318359375, + "logps/rejected": -476.60552978515625, + "loss": 0.31265769004821775, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19172248244285583, + "rewards/margins": 1.2785747051239014, + "rewards/rejected": -1.0868523120880127, + "step": 295 + }, + { + "epoch": 1.6694560669456067, + "grad_norm": 1.4329712390899658, + "learning_rate": 2.4794238683127577e-06, + "logits/chosen": -6.2438530921936035, + "logits/rejected": -6.167972564697266, + "logps/chosen": -398.7960510253906, + "logps/rejected": -416.4847717285156, + "loss": 0.3146506786346436, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.06857718527317047, + "rewards/margins": 1.1940205097198486, + "rewards/rejected": -1.2625977993011475, + "step": 300 + }, + { + "epoch": 1.697350069735007, + "grad_norm": 1.2974578142166138, + "learning_rate": 2.4279835390946504e-06, + "logits/chosen": -6.2253522872924805, + "logits/rejected": -6.272657871246338, + "logps/chosen": -405.8463439941406, + "logps/rejected": -451.399169921875, + "loss": 0.2888355255126953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.040930021554231644, + "rewards/margins": 1.2757234573364258, + "rewards/rejected": -1.3166534900665283, + "step": 305 + }, + { + "epoch": 1.7252440725244074, + "grad_norm": 1.865118384361267, + "learning_rate": 2.3765432098765435e-06, + "logits/chosen": -6.285956382751465, + "logits/rejected": -6.284170627593994, + "logps/chosen": -376.63543701171875, + "logps/rejected": -404.28131103515625, + "loss": 0.34237470626831057, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.3045514225959778, + "rewards/margins": 1.1192312240600586, + "rewards/rejected": -1.4237825870513916, + "step": 310 + }, + { + "epoch": 1.7531380753138075, + "grad_norm": 1.3118988275527954, + "learning_rate": 2.3251028806584362e-06, + "logits/chosen": -6.330617904663086, + "logits/rejected": -6.2173357009887695, + "logps/chosen": -411.6089782714844, + "logps/rejected": -405.8570861816406, + "loss": 0.30266666412353516, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.08704517036676407, + "rewards/margins": 1.2953578233718872, + "rewards/rejected": -1.382403016090393, + "step": 315 + }, + { + "epoch": 1.7810320781032078, + "grad_norm": 2.04417085647583, + "learning_rate": 2.2736625514403294e-06, + "logits/chosen": -6.217926502227783, + "logits/rejected": -6.2158918380737305, + "logps/chosen": -472.285400390625, + "logps/rejected": -431.4971618652344, + "loss": 0.2793667078018188, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1601211428642273, + "rewards/margins": 1.4048590660095215, + "rewards/rejected": -1.564980149269104, + "step": 320 + }, + { + "epoch": 1.8089260808926082, + "grad_norm": 1.6911827325820923, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.251864910125732, + "logits/rejected": -6.211045742034912, + "logps/chosen": -341.28216552734375, + "logps/rejected": -376.18780517578125, + "loss": 0.3147707223892212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09754385054111481, + "rewards/margins": 1.1059117317199707, + "rewards/rejected": -1.2034555673599243, + "step": 325 + }, + { + "epoch": 1.8368200836820083, + "grad_norm": 1.974066138267517, + "learning_rate": 2.1707818930041156e-06, + "logits/chosen": -6.293444633483887, + "logits/rejected": -6.139300346374512, + "logps/chosen": -379.49871826171875, + "logps/rejected": -472.8863830566406, + "loss": 0.25607054233551024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1596871316432953, + "rewards/margins": 1.4329643249511719, + "rewards/rejected": -1.2732770442962646, + "step": 330 + }, + { + "epoch": 1.8647140864714087, + "grad_norm": 1.5956170558929443, + "learning_rate": 2.1193415637860083e-06, + "logits/chosen": -6.274291038513184, + "logits/rejected": -6.236454010009766, + "logps/chosen": -415.7079162597656, + "logps/rejected": -418.0286560058594, + "loss": 0.27625508308410646, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.059024881571531296, + "rewards/margins": 1.4343515634536743, + "rewards/rejected": -1.3753265142440796, + "step": 335 + }, + { + "epoch": 1.892608089260809, + "grad_norm": 1.2464466094970703, + "learning_rate": 2.0679012345679015e-06, + "logits/chosen": -6.2509446144104, + "logits/rejected": -6.182458400726318, + "logps/chosen": -478.2040100097656, + "logps/rejected": -437.84063720703125, + "loss": 0.2548848867416382, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.04282323271036148, + "rewards/margins": 1.5546890497207642, + "rewards/rejected": -1.597512125968933, + "step": 340 + }, + { + "epoch": 1.9205020920502092, + "grad_norm": 2.1779489517211914, + "learning_rate": 2.0164609053497946e-06, + "logits/chosen": -6.29345703125, + "logits/rejected": -6.269745826721191, + "logps/chosen": -481.49871826171875, + "logps/rejected": -510.9442443847656, + "loss": 0.24130442142486572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11563090234994888, + "rewards/margins": 1.6354728937149048, + "rewards/rejected": -1.7511036396026611, + "step": 345 + }, + { + "epoch": 1.9483960948396095, + "grad_norm": 1.3139598369598389, + "learning_rate": 1.9650205761316873e-06, + "logits/chosen": -6.220123291015625, + "logits/rejected": -6.242280006408691, + "logps/chosen": -407.80804443359375, + "logps/rejected": -401.532958984375, + "loss": 0.31305124759674074, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2018294781446457, + "rewards/margins": 1.1702749729156494, + "rewards/rejected": -1.372104525566101, + "step": 350 + }, + { + "epoch": 1.9762900976290099, + "grad_norm": 0.7809721827507019, + "learning_rate": 1.9135802469135804e-06, + "logits/chosen": -6.304191589355469, + "logits/rejected": -6.235474109649658, + "logps/chosen": -424.8037109375, + "logps/rejected": -488.02362060546875, + "loss": 0.20149641036987304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028687676414847374, + "rewards/margins": 1.7642078399658203, + "rewards/rejected": -1.7928955554962158, + "step": 355 + }, + { + "epoch": 2.0, + "grad_norm": 1.6750562191009521, + "learning_rate": 1.8621399176954735e-06, + "logits/chosen": -6.249261379241943, + "logits/rejected": -6.197325706481934, + "logps/chosen": -384.4549255371094, + "logps/rejected": -450.3216552734375, + "loss": 0.2427699089050293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29954269528388977, + "rewards/margins": 1.3911824226379395, + "rewards/rejected": -1.6907252073287964, + "step": 360 + }, + { + "epoch": 2.0278940027894, + "grad_norm": 1.4706112146377563, + "learning_rate": 1.8106995884773665e-06, + "logits/chosen": -6.396174430847168, + "logits/rejected": -6.274371147155762, + "logps/chosen": -365.41265869140625, + "logps/rejected": -433.7682189941406, + "loss": 0.2221092700958252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011003097519278526, + "rewards/margins": 1.695127248764038, + "rewards/rejected": -1.7061303853988647, + "step": 365 + }, + { + "epoch": 2.0557880055788007, + "grad_norm": 1.6888645887374878, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.299677848815918, + "logits/rejected": -6.338282108306885, + "logps/chosen": -460.0879821777344, + "logps/rejected": -489.38031005859375, + "loss": 0.19544492959976195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004593986086547375, + "rewards/margins": 1.8440967798233032, + "rewards/rejected": -1.8486907482147217, + "step": 370 + }, + { + "epoch": 2.083682008368201, + "grad_norm": 0.998672604560852, + "learning_rate": 1.7078189300411525e-06, + "logits/chosen": -6.379012584686279, + "logits/rejected": -6.325904846191406, + "logps/chosen": -421.4281311035156, + "logps/rejected": -519.4888916015625, + "loss": 0.198160719871521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06311459839344025, + "rewards/margins": 1.9342191219329834, + "rewards/rejected": -1.8711044788360596, + "step": 375 + }, + { + "epoch": 2.111576011157601, + "grad_norm": 1.0411016941070557, + "learning_rate": 1.6563786008230454e-06, + "logits/chosen": -6.331459999084473, + "logits/rejected": -6.230687141418457, + "logps/chosen": -381.6875915527344, + "logps/rejected": -436.54522705078125, + "loss": 0.17946820259094237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09231789410114288, + "rewards/margins": 1.843112587928772, + "rewards/rejected": -1.9354302883148193, + "step": 380 + }, + { + "epoch": 2.1394700139470015, + "grad_norm": 1.7208154201507568, + "learning_rate": 1.6049382716049383e-06, + "logits/chosen": -6.328245639801025, + "logits/rejected": -6.348081111907959, + "logps/chosen": -479.76763916015625, + "logps/rejected": -452.90863037109375, + "loss": 0.18842675685882568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0751882940530777, + "rewards/margins": 1.8472039699554443, + "rewards/rejected": -1.9223921298980713, + "step": 385 + }, + { + "epoch": 2.1673640167364017, + "grad_norm": 1.2389427423477173, + "learning_rate": 1.5534979423868312e-06, + "logits/chosen": -6.397286891937256, + "logits/rejected": -6.3191328048706055, + "logps/chosen": -385.9268493652344, + "logps/rejected": -412.65350341796875, + "loss": 0.2650484800338745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.056599367409944534, + "rewards/margins": 1.42960524559021, + "rewards/rejected": -1.4862048625946045, + "step": 390 + }, + { + "epoch": 2.195258019525802, + "grad_norm": 1.1179108619689941, + "learning_rate": 1.5020576131687246e-06, + "logits/chosen": -6.415247440338135, + "logits/rejected": -6.397636413574219, + "logps/chosen": -443.06640625, + "logps/rejected": -513.3284912109375, + "loss": 0.18407890796661378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1382242888212204, + "rewards/margins": 1.9546597003936768, + "rewards/rejected": -2.0928843021392822, + "step": 395 + }, + { + "epoch": 2.2231520223152024, + "grad_norm": 0.8621686697006226, + "learning_rate": 1.4506172839506175e-06, + "logits/chosen": -6.363873481750488, + "logits/rejected": -6.3192243576049805, + "logps/chosen": -446.64532470703125, + "logps/rejected": -479.0135192871094, + "loss": 0.1950451135635376, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.012487297877669334, + "rewards/margins": 1.913094162940979, + "rewards/rejected": -1.9255813360214233, + "step": 400 + }, + { + "epoch": 2.2510460251046025, + "grad_norm": 1.1525218486785889, + "learning_rate": 1.3991769547325104e-06, + "logits/chosen": -6.443626403808594, + "logits/rejected": -6.286838054656982, + "logps/chosen": -391.4197692871094, + "logps/rejected": -440.87335205078125, + "loss": 0.19900518655776978, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0584767684340477, + "rewards/margins": 1.853623390197754, + "rewards/rejected": -1.9121001958847046, + "step": 405 + }, + { + "epoch": 2.2789400278940026, + "grad_norm": 0.9601161479949951, + "learning_rate": 1.3477366255144033e-06, + "logits/chosen": -6.391339302062988, + "logits/rejected": -6.394341468811035, + "logps/chosen": -403.82275390625, + "logps/rejected": -426.35418701171875, + "loss": 0.1934623599052429, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.085336834192276, + "rewards/margins": 2.0006136894226074, + "rewards/rejected": -2.0859506130218506, + "step": 410 + }, + { + "epoch": 2.306834030683403, + "grad_norm": 0.9428881406784058, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.419486045837402, + "logits/rejected": -6.426458835601807, + "logps/chosen": -467.2015075683594, + "logps/rejected": -397.3327941894531, + "loss": 0.16998686790466308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2600168287754059, + "rewards/margins": 2.0186750888824463, + "rewards/rejected": -1.7586581707000732, + "step": 415 + }, + { + "epoch": 2.3347280334728033, + "grad_norm": 1.0415892601013184, + "learning_rate": 1.2448559670781894e-06, + "logits/chosen": -6.375484466552734, + "logits/rejected": -6.470736026763916, + "logps/chosen": -452.739990234375, + "logps/rejected": -439.50146484375, + "loss": 0.16452697515487671, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.1337260901927948, + "rewards/margins": 1.992404580116272, + "rewards/rejected": -1.8586784601211548, + "step": 420 + }, + { + "epoch": 2.3626220362622035, + "grad_norm": 0.9086716175079346, + "learning_rate": 1.1934156378600823e-06, + "logits/chosen": -6.495448112487793, + "logits/rejected": -6.403497219085693, + "logps/chosen": -396.10760498046875, + "logps/rejected": -430.85968017578125, + "loss": 0.18365414142608644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09083832800388336, + "rewards/margins": 1.8667247295379639, + "rewards/rejected": -1.9575631618499756, + "step": 425 + }, + { + "epoch": 2.390516039051604, + "grad_norm": 0.9541850090026855, + "learning_rate": 1.1419753086419754e-06, + "logits/chosen": -6.468809604644775, + "logits/rejected": -6.406615257263184, + "logps/chosen": -459.69097900390625, + "logps/rejected": -530.5374755859375, + "loss": 0.1429673194885254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14917811751365662, + "rewards/margins": 2.2435402870178223, + "rewards/rejected": -2.094362258911133, + "step": 430 + }, + { + "epoch": 2.418410041841004, + "grad_norm": 1.0821386575698853, + "learning_rate": 1.0905349794238683e-06, + "logits/chosen": -6.456778526306152, + "logits/rejected": -6.414129734039307, + "logps/chosen": -456.4554138183594, + "logps/rejected": -449.54827880859375, + "loss": 0.17326489686965943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02010413445532322, + "rewards/margins": 1.948240876197815, + "rewards/rejected": -1.9683449268341064, + "step": 435 + }, + { + "epoch": 2.4463040446304043, + "grad_norm": 0.9114539623260498, + "learning_rate": 1.0390946502057615e-06, + "logits/chosen": -6.498774528503418, + "logits/rejected": -6.3586835861206055, + "logps/chosen": -354.0647277832031, + "logps/rejected": -390.14697265625, + "loss": 0.17193338871002198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015950988978147507, + "rewards/margins": 1.9409234523773193, + "rewards/rejected": -1.9568744897842407, + "step": 440 + }, + { + "epoch": 2.474198047419805, + "grad_norm": 0.8584111332893372, + "learning_rate": 9.876543209876544e-07, + "logits/chosen": -6.4719557762146, + "logits/rejected": -6.405210018157959, + "logps/chosen": -412.19158935546875, + "logps/rejected": -474.6786193847656, + "loss": 0.2001863956451416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18442487716674805, + "rewards/margins": 1.7562973499298096, + "rewards/rejected": -1.9407222270965576, + "step": 445 + }, + { + "epoch": 2.502092050209205, + "grad_norm": 1.4773839712142944, + "learning_rate": 9.362139917695474e-07, + "logits/chosen": -6.447714805603027, + "logits/rejected": -6.4641876220703125, + "logps/chosen": -472.32025146484375, + "logps/rejected": -461.2222595214844, + "loss": 0.17260836362838744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16385754942893982, + "rewards/margins": 2.0242795944213867, + "rewards/rejected": -2.1881370544433594, + "step": 450 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-450/training_args.bin b/gemma-2b-dpo/checkpoint-450/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-450/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-50/README.md b/gemma-2b-dpo/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-50/adapter_config.json b/gemma-2b-dpo/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-50/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecd0cfe4d51399689e818e2665ad09244623b307 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a6c7561fc750cfdd61ed620bd69baf8da530ef548d279228317a7b05e49048 +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-50/chat_template.jinja b/gemma-2b-dpo/checkpoint-50/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-50/optimizer.pt b/gemma-2b-dpo/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5426ea139a33ce1d1c8afddb5c9cfd654db2148 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d777ecee9db34b555466ec5668cab143cbeb3b14de53d82e968f09bd579b19f0 +size 42616388 diff --git a/gemma-2b-dpo/checkpoint-50/rng_state.pth b/gemma-2b-dpo/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..897ad19948758b40c785c678c136c4029433a27a --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d9cd6a0487226e5bd30d1846894c82af483733ab4381b75bae9c0745e05d405 +size 14244 diff --git a/gemma-2b-dpo/checkpoint-50/scheduler.pt b/gemma-2b-dpo/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3aa70fbf0941c5b0b08bccff49180cd64ee5daa0 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53fa6ffb019ab8bd82b9d14dd048634891d96bd560e0862ff3d73901e83e0c73 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-50/tokenizer.json b/gemma-2b-dpo/checkpoint-50/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-50/tokenizer_config.json b/gemma-2b-dpo/checkpoint-50/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-50/trainer_state.json b/gemma-2b-dpo/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4422862e42f9ee56bc24932575cc1bc6933fc6d7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/trainer_state.json @@ -0,0 +1,184 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2789400278940028, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-50/training_args.bin b/gemma-2b-dpo/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-500/README.md b/gemma-2b-dpo/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-500/adapter_config.json b/gemma-2b-dpo/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-500/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..924818d4745208b78efa8ac44fa5a8913b9b3951 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc9598bb118acad93255db6c418894d15518d877aeda622cfe129d657b34a87b +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-500/chat_template.jinja b/gemma-2b-dpo/checkpoint-500/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-500/optimizer.pt b/gemma-2b-dpo/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e000a3dd76e3c9b718ef67ec7096f62e0e203a12 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7fcc2897ff8390b57904f70c8972f8e70bc45e4c66c9d24c80d8bd0b51ddc7f +size 42616772 diff --git a/gemma-2b-dpo/checkpoint-500/rng_state.pth b/gemma-2b-dpo/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e29ad0909e8c1afefa182e7ed890fa257c75af25 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf9097d4513154245c48236b6ec5137b7ee2a21c9f58f2cba798ea275c6026f +size 14244 diff --git a/gemma-2b-dpo/checkpoint-500/scheduler.pt b/gemma-2b-dpo/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..944a6a6fd22dd623e539d66fa1885e599b8f95f5 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:139762ebeef25282c8f1bf0bb454f6dc8428cbd5cf33629c48f78b08c7a31cc2 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-500/tokenizer.json b/gemma-2b-dpo/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-500/tokenizer_config.json b/gemma-2b-dpo/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-500/trainer_state.json b/gemma-2b-dpo/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1cf9631ebfb7c32bbd4a8a047c0fa00630f99f4a --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/trainer_state.json @@ -0,0 +1,1534 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.781032078103208, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + }, + { + "epoch": 0.5857740585774058, + "grad_norm": 2.6023478507995605, + "learning_rate": 4.485596707818931e-06, + "logits/chosen": -6.196796894073486, + "logits/rejected": -6.226622104644775, + "logps/chosen": -442.52685546875, + "logps/rejected": -516.7334594726562, + "loss": 0.6245638847351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18519389629364014, + "rewards/margins": 0.24079546332359314, + "rewards/rejected": -0.42598938941955566, + "step": 105 + }, + { + "epoch": 0.6136680613668062, + "grad_norm": 2.0638511180877686, + "learning_rate": 4.434156378600823e-06, + "logits/chosen": -6.1991071701049805, + "logits/rejected": -6.119466781616211, + "logps/chosen": -410.86669921875, + "logps/rejected": -450.365478515625, + "loss": 0.6201879501342773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0853937491774559, + "rewards/margins": 0.17646726965904236, + "rewards/rejected": -0.26186102628707886, + "step": 110 + }, + { + "epoch": 0.6415620641562064, + "grad_norm": 2.3625364303588867, + "learning_rate": 4.382716049382716e-06, + "logits/chosen": -6.220386505126953, + "logits/rejected": -6.223449230194092, + "logps/chosen": -435.92626953125, + "logps/rejected": -495.6065368652344, + "loss": 0.6151515483856201, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2208656519651413, + "rewards/margins": 0.21172885596752167, + "rewards/rejected": -0.43259453773498535, + "step": 115 + }, + { + "epoch": 0.6694560669456067, + "grad_norm": 1.8082666397094727, + "learning_rate": 4.331275720164609e-06, + "logits/chosen": -6.262181282043457, + "logits/rejected": -6.250016212463379, + "logps/chosen": -354.21795654296875, + "logps/rejected": -389.14556884765625, + "loss": 0.6109379768371582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17473874986171722, + "rewards/margins": 0.19315743446350098, + "rewards/rejected": -0.367896169424057, + "step": 120 + }, + { + "epoch": 0.697350069735007, + "grad_norm": 1.9556658267974854, + "learning_rate": 4.2798353909465025e-06, + "logits/chosen": -6.167700290679932, + "logits/rejected": -6.1421003341674805, + "logps/chosen": -379.1827392578125, + "logps/rejected": -426.69549560546875, + "loss": 0.6202447414398193, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17535170912742615, + "rewards/margins": 0.18863627314567566, + "rewards/rejected": -0.3639879822731018, + "step": 125 + }, + { + "epoch": 0.7252440725244073, + "grad_norm": 3.001298666000366, + "learning_rate": 4.228395061728396e-06, + "logits/chosen": -6.2535905838012695, + "logits/rejected": -6.232400894165039, + "logps/chosen": -424.8458557128906, + "logps/rejected": -494.52960205078125, + "loss": 0.5493914127349854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22046081721782684, + "rewards/margins": 0.3785194754600525, + "rewards/rejected": -0.5989803075790405, + "step": 130 + }, + { + "epoch": 0.7531380753138075, + "grad_norm": 2.5210413932800293, + "learning_rate": 4.176954732510288e-06, + "logits/chosen": -6.078260898590088, + "logits/rejected": -6.0126447677612305, + "logps/chosen": -414.69940185546875, + "logps/rejected": -432.3282165527344, + "loss": 0.579456901550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15084640681743622, + "rewards/margins": 0.2960701882839203, + "rewards/rejected": -0.4469165802001953, + "step": 135 + }, + { + "epoch": 0.7810320781032078, + "grad_norm": 2.6807265281677246, + "learning_rate": 4.125514403292181e-06, + "logits/chosen": -6.243051052093506, + "logits/rejected": -6.220357418060303, + "logps/chosen": -400.6156311035156, + "logps/rejected": -450.0393981933594, + "loss": 0.5514531135559082, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25727975368499756, + "rewards/margins": 0.3991738259792328, + "rewards/rejected": -0.6564534902572632, + "step": 140 + }, + { + "epoch": 0.8089260808926081, + "grad_norm": 2.4137353897094727, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.186558246612549, + "logits/rejected": -6.139374256134033, + "logps/chosen": -442.314453125, + "logps/rejected": -491.584716796875, + "loss": 0.5633067131042481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35670942068099976, + "rewards/margins": 0.38038796186447144, + "rewards/rejected": -0.7370973825454712, + "step": 145 + }, + { + "epoch": 0.8368200836820083, + "grad_norm": 2.1043145656585693, + "learning_rate": 4.022633744855967e-06, + "logits/chosen": -6.177689552307129, + "logits/rejected": -6.167322635650635, + "logps/chosen": -435.2288513183594, + "logps/rejected": -469.41436767578125, + "loss": 0.5640112876892089, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.32313138246536255, + "rewards/margins": 0.45979684591293335, + "rewards/rejected": -0.7829282283782959, + "step": 150 + }, + { + "epoch": 0.8647140864714087, + "grad_norm": 1.9623620510101318, + "learning_rate": 3.97119341563786e-06, + "logits/chosen": -6.0590739250183105, + "logits/rejected": -6.033650875091553, + "logps/chosen": -421.5000915527344, + "logps/rejected": -370.30902099609375, + "loss": 0.6319089412689209, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29380694031715393, + "rewards/margins": 0.1507532149553299, + "rewards/rejected": -0.44456014037132263, + "step": 155 + }, + { + "epoch": 0.8926080892608089, + "grad_norm": 1.9432786703109741, + "learning_rate": 3.9197530864197535e-06, + "logits/chosen": -6.267019271850586, + "logits/rejected": -6.214621067047119, + "logps/chosen": -417.11724853515625, + "logps/rejected": -431.72698974609375, + "loss": 0.5186795234680176, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18987610936164856, + "rewards/margins": 0.5616164803504944, + "rewards/rejected": -0.7514925599098206, + "step": 160 + }, + { + "epoch": 0.9205020920502092, + "grad_norm": 1.8146827220916748, + "learning_rate": 3.868312757201647e-06, + "logits/chosen": -6.230213165283203, + "logits/rejected": -6.109362602233887, + "logps/chosen": -376.6744384765625, + "logps/rejected": -376.6526184082031, + "loss": 0.5346522808074952, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19152329862117767, + "rewards/margins": 0.38060134649276733, + "rewards/rejected": -0.5721246004104614, + "step": 165 + }, + { + "epoch": 0.9483960948396095, + "grad_norm": 1.936680793762207, + "learning_rate": 3.81687242798354e-06, + "logits/chosen": -6.194340705871582, + "logits/rejected": -6.1444597244262695, + "logps/chosen": -389.017822265625, + "logps/rejected": -445.76544189453125, + "loss": 0.49420690536499023, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.055042725056409836, + "rewards/margins": 0.5315954685211182, + "rewards/rejected": -0.5866381525993347, + "step": 170 + }, + { + "epoch": 0.9762900976290098, + "grad_norm": 3.2391903400421143, + "learning_rate": 3.7654320987654325e-06, + "logits/chosen": -6.190616607666016, + "logits/rejected": -6.1138739585876465, + "logps/chosen": -421.6878356933594, + "logps/rejected": -460.1180114746094, + "loss": 0.5374621391296387, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22659805417060852, + "rewards/margins": 0.43736472725868225, + "rewards/rejected": -0.6639627814292908, + "step": 175 + }, + { + "epoch": 1.0, + "grad_norm": 4.300363063812256, + "learning_rate": 3.7139917695473256e-06, + "logits/chosen": -6.227687358856201, + "logits/rejected": -6.129978179931641, + "logps/chosen": -449.4482116699219, + "logps/rejected": -430.434326171875, + "loss": 0.49641432762146, + "rewards/accuracies": 0.8529411554336548, + "rewards/chosen": -0.03447714447975159, + "rewards/margins": 0.5154433846473694, + "rewards/rejected": -0.5499205589294434, + "step": 180 + }, + { + "epoch": 1.0278940027894004, + "grad_norm": 1.5967351198196411, + "learning_rate": 3.6625514403292183e-06, + "logits/chosen": -6.067181587219238, + "logits/rejected": -6.06889533996582, + "logps/chosen": -399.98968505859375, + "logps/rejected": -447.72039794921875, + "loss": 0.4079257011413574, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.03092392347753048, + "rewards/margins": 0.7688177824020386, + "rewards/rejected": -0.7378939390182495, + "step": 185 + }, + { + "epoch": 1.0557880055788005, + "grad_norm": 1.7787078619003296, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.146653175354004, + "logits/rejected": -6.154219627380371, + "logps/chosen": -419.3291015625, + "logps/rejected": -463.1239318847656, + "loss": 0.42812933921813967, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.007557299919426441, + "rewards/margins": 0.7340337634086609, + "rewards/rejected": -0.7415911555290222, + "step": 190 + }, + { + "epoch": 1.0836820083682008, + "grad_norm": 1.7694693803787231, + "learning_rate": 3.559670781893004e-06, + "logits/chosen": -6.160831928253174, + "logits/rejected": -6.151050567626953, + "logps/chosen": -379.6001281738281, + "logps/rejected": -394.00531005859375, + "loss": 0.41410012245178224, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.03558122366666794, + "rewards/margins": 0.8024934530258179, + "rewards/rejected": -0.7669121623039246, + "step": 195 + }, + { + "epoch": 1.1115760111576012, + "grad_norm": 1.3202013969421387, + "learning_rate": 3.5082304526748973e-06, + "logits/chosen": -6.110814094543457, + "logits/rejected": -6.176726341247559, + "logps/chosen": -400.85052490234375, + "logps/rejected": -405.44195556640625, + "loss": 0.391094446182251, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.02833392843604088, + "rewards/margins": 0.8581112623214722, + "rewards/rejected": -0.8864452242851257, + "step": 200 + }, + { + "epoch": 1.1394700139470013, + "grad_norm": 1.9367257356643677, + "learning_rate": 3.4567901234567904e-06, + "logits/chosen": -6.311105728149414, + "logits/rejected": -6.179243087768555, + "logps/chosen": -440.97625732421875, + "logps/rejected": -463.1673278808594, + "loss": 0.3870258331298828, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.012000990100204945, + "rewards/margins": 0.9945917129516602, + "rewards/rejected": -1.0065927505493164, + "step": 205 + }, + { + "epoch": 1.1673640167364017, + "grad_norm": 2.4570703506469727, + "learning_rate": 3.405349794238683e-06, + "logits/chosen": -6.199883937835693, + "logits/rejected": -6.160645484924316, + "logps/chosen": -448.8758850097656, + "logps/rejected": -439.4584045410156, + "loss": 0.3908271551132202, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04756501317024231, + "rewards/margins": 0.8816453218460083, + "rewards/rejected": -0.8340802192687988, + "step": 210 + }, + { + "epoch": 1.195258019525802, + "grad_norm": 1.5992087125778198, + "learning_rate": 3.3539094650205767e-06, + "logits/chosen": -6.163644313812256, + "logits/rejected": -6.093722343444824, + "logps/chosen": -449.8214416503906, + "logps/rejected": -481.3743591308594, + "loss": 0.3612337350845337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09812992066144943, + "rewards/margins": 0.9562546014785767, + "rewards/rejected": -0.858124852180481, + "step": 215 + }, + { + "epoch": 1.2231520223152021, + "grad_norm": 1.4101840257644653, + "learning_rate": 3.30246913580247e-06, + "logits/chosen": -6.281071662902832, + "logits/rejected": -6.337766170501709, + "logps/chosen": -281.46795654296875, + "logps/rejected": -336.60845947265625, + "loss": 0.43022546768188474, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.1486438810825348, + "rewards/margins": 0.7064955234527588, + "rewards/rejected": -0.855139434337616, + "step": 220 + }, + { + "epoch": 1.2510460251046025, + "grad_norm": 1.5817450284957886, + "learning_rate": 3.2510288065843625e-06, + "logits/chosen": -6.1745758056640625, + "logits/rejected": -6.192706108093262, + "logps/chosen": -399.51190185546875, + "logps/rejected": -424.1844177246094, + "loss": 0.3992297887802124, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.051351286470890045, + "rewards/margins": 0.827163815498352, + "rewards/rejected": -0.8785150647163391, + "step": 225 + }, + { + "epoch": 1.2789400278940029, + "grad_norm": 1.3157438039779663, + "learning_rate": 3.1995884773662556e-06, + "logits/chosen": -6.1543779373168945, + "logits/rejected": -6.1732258796691895, + "logps/chosen": -425.80755615234375, + "logps/rejected": -447.39453125, + "loss": 0.35113141536712644, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.01585063710808754, + "rewards/margins": 1.0381678342819214, + "rewards/rejected": -1.0223171710968018, + "step": 230 + }, + { + "epoch": 1.3068340306834032, + "grad_norm": 1.4981003999710083, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.220085620880127, + "logits/rejected": -6.215539455413818, + "logps/chosen": -393.77716064453125, + "logps/rejected": -474.4695739746094, + "loss": 0.35107009410858153, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.07319364696741104, + "rewards/margins": 1.0109622478485107, + "rewards/rejected": -0.9377686381340027, + "step": 235 + }, + { + "epoch": 1.3347280334728033, + "grad_norm": 1.6417901515960693, + "learning_rate": 3.0967078189300415e-06, + "logits/chosen": -6.223210334777832, + "logits/rejected": -6.187335968017578, + "logps/chosen": -454.0006408691406, + "logps/rejected": -439.3124084472656, + "loss": 0.3300657272338867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09006929397583008, + "rewards/margins": 1.089814305305481, + "rewards/rejected": -0.9997450709342957, + "step": 240 + }, + { + "epoch": 1.3626220362622037, + "grad_norm": 1.3642381429672241, + "learning_rate": 3.0452674897119346e-06, + "logits/chosen": -6.211455821990967, + "logits/rejected": -6.1171441078186035, + "logps/chosen": -402.8586730957031, + "logps/rejected": -431.7958984375, + "loss": 0.3634498119354248, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0397757962346077, + "rewards/margins": 1.0654242038726807, + "rewards/rejected": -1.1052000522613525, + "step": 245 + }, + { + "epoch": 1.390516039051604, + "grad_norm": 1.9008878469467163, + "learning_rate": 2.9938271604938273e-06, + "logits/chosen": -6.2566046714782715, + "logits/rejected": -6.222296714782715, + "logps/chosen": -433.3427734375, + "logps/rejected": -489.8169860839844, + "loss": 0.3385239839553833, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04979880154132843, + "rewards/margins": 1.0801963806152344, + "rewards/rejected": -1.0303975343704224, + "step": 250 + }, + { + "epoch": 1.4184100418410042, + "grad_norm": 1.5302000045776367, + "learning_rate": 2.9423868312757204e-06, + "logits/chosen": -6.219883918762207, + "logits/rejected": -6.1428399085998535, + "logps/chosen": -428.93914794921875, + "logps/rejected": -493.17437744140625, + "loss": 0.3642880916595459, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09988997876644135, + "rewards/margins": 1.0157719850540161, + "rewards/rejected": -1.1156619787216187, + "step": 255 + }, + { + "epoch": 1.4463040446304045, + "grad_norm": 1.898163080215454, + "learning_rate": 2.890946502057613e-06, + "logits/chosen": -6.123999118804932, + "logits/rejected": -6.1102776527404785, + "logps/chosen": -494.8221740722656, + "logps/rejected": -464.3958435058594, + "loss": 0.31599912643432615, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03670971840620041, + "rewards/margins": 1.1374431848526, + "rewards/rejected": -1.1741528511047363, + "step": 260 + }, + { + "epoch": 1.4741980474198049, + "grad_norm": 1.7610703706741333, + "learning_rate": 2.8395061728395062e-06, + "logits/chosen": -6.1201653480529785, + "logits/rejected": -6.155616760253906, + "logps/chosen": -489.81201171875, + "logps/rejected": -511.11724853515625, + "loss": 0.30640478134155275, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.057888150215148926, + "rewards/margins": 1.2739557027816772, + "rewards/rejected": -1.3318438529968262, + "step": 265 + }, + { + "epoch": 1.502092050209205, + "grad_norm": 1.3035284280776978, + "learning_rate": 2.7880658436213994e-06, + "logits/chosen": -6.300120830535889, + "logits/rejected": -6.1527228355407715, + "logps/chosen": -433.002685546875, + "logps/rejected": -471.416015625, + "loss": 0.2971210956573486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03341478109359741, + "rewards/margins": 1.295318841934204, + "rewards/rejected": -1.261904001235962, + "step": 270 + }, + { + "epoch": 1.5299860529986054, + "grad_norm": 1.7996495962142944, + "learning_rate": 2.736625514403292e-06, + "logits/chosen": -6.244246482849121, + "logits/rejected": -6.148303985595703, + "logps/chosen": -403.05133056640625, + "logps/rejected": -380.7239990234375, + "loss": 0.36082763671875, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.016453832387924194, + "rewards/margins": 1.103393793106079, + "rewards/rejected": -1.1198475360870361, + "step": 275 + }, + { + "epoch": 1.5578800557880057, + "grad_norm": 2.193908929824829, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.1752519607543945, + "logits/rejected": -6.249385833740234, + "logps/chosen": -482.1385803222656, + "logps/rejected": -504.50311279296875, + "loss": 0.3047311782836914, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.20222747325897217, + "rewards/margins": 1.3389281034469604, + "rewards/rejected": -1.1367007493972778, + "step": 280 + }, + { + "epoch": 1.5857740585774058, + "grad_norm": 1.3961774110794067, + "learning_rate": 2.6337448559670788e-06, + "logits/chosen": -6.2296528816223145, + "logits/rejected": -6.2554612159729, + "logps/chosen": -370.95428466796875, + "logps/rejected": -465.4556579589844, + "loss": 0.31553618907928466, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10641174018383026, + "rewards/margins": 1.2302197217941284, + "rewards/rejected": -1.336631417274475, + "step": 285 + }, + { + "epoch": 1.6136680613668062, + "grad_norm": 1.296229600906372, + "learning_rate": 2.5823045267489715e-06, + "logits/chosen": -6.245091438293457, + "logits/rejected": -6.184089660644531, + "logps/chosen": -420.3929138183594, + "logps/rejected": -484.88934326171875, + "loss": 0.28981173038482666, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03132909908890724, + "rewards/margins": 1.3171762228012085, + "rewards/rejected": -1.3485053777694702, + "step": 290 + }, + { + "epoch": 1.6415620641562065, + "grad_norm": 1.6442267894744873, + "learning_rate": 2.5308641975308646e-06, + "logits/chosen": -6.171110153198242, + "logits/rejected": -6.142795562744141, + "logps/chosen": -480.75726318359375, + "logps/rejected": -476.60552978515625, + "loss": 0.31265769004821775, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19172248244285583, + "rewards/margins": 1.2785747051239014, + "rewards/rejected": -1.0868523120880127, + "step": 295 + }, + { + "epoch": 1.6694560669456067, + "grad_norm": 1.4329712390899658, + "learning_rate": 2.4794238683127577e-06, + "logits/chosen": -6.2438530921936035, + "logits/rejected": -6.167972564697266, + "logps/chosen": -398.7960510253906, + "logps/rejected": -416.4847717285156, + "loss": 0.3146506786346436, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.06857718527317047, + "rewards/margins": 1.1940205097198486, + "rewards/rejected": -1.2625977993011475, + "step": 300 + }, + { + "epoch": 1.697350069735007, + "grad_norm": 1.2974578142166138, + "learning_rate": 2.4279835390946504e-06, + "logits/chosen": -6.2253522872924805, + "logits/rejected": -6.272657871246338, + "logps/chosen": -405.8463439941406, + "logps/rejected": -451.399169921875, + "loss": 0.2888355255126953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.040930021554231644, + "rewards/margins": 1.2757234573364258, + "rewards/rejected": -1.3166534900665283, + "step": 305 + }, + { + "epoch": 1.7252440725244074, + "grad_norm": 1.865118384361267, + "learning_rate": 2.3765432098765435e-06, + "logits/chosen": -6.285956382751465, + "logits/rejected": -6.284170627593994, + "logps/chosen": -376.63543701171875, + "logps/rejected": -404.28131103515625, + "loss": 0.34237470626831057, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.3045514225959778, + "rewards/margins": 1.1192312240600586, + "rewards/rejected": -1.4237825870513916, + "step": 310 + }, + { + "epoch": 1.7531380753138075, + "grad_norm": 1.3118988275527954, + "learning_rate": 2.3251028806584362e-06, + "logits/chosen": -6.330617904663086, + "logits/rejected": -6.2173357009887695, + "logps/chosen": -411.6089782714844, + "logps/rejected": -405.8570861816406, + "loss": 0.30266666412353516, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.08704517036676407, + "rewards/margins": 1.2953578233718872, + "rewards/rejected": -1.382403016090393, + "step": 315 + }, + { + "epoch": 1.7810320781032078, + "grad_norm": 2.04417085647583, + "learning_rate": 2.2736625514403294e-06, + "logits/chosen": -6.217926502227783, + "logits/rejected": -6.2158918380737305, + "logps/chosen": -472.285400390625, + "logps/rejected": -431.4971618652344, + "loss": 0.2793667078018188, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1601211428642273, + "rewards/margins": 1.4048590660095215, + "rewards/rejected": -1.564980149269104, + "step": 320 + }, + { + "epoch": 1.8089260808926082, + "grad_norm": 1.6911827325820923, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.251864910125732, + "logits/rejected": -6.211045742034912, + "logps/chosen": -341.28216552734375, + "logps/rejected": -376.18780517578125, + "loss": 0.3147707223892212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09754385054111481, + "rewards/margins": 1.1059117317199707, + "rewards/rejected": -1.2034555673599243, + "step": 325 + }, + { + "epoch": 1.8368200836820083, + "grad_norm": 1.974066138267517, + "learning_rate": 2.1707818930041156e-06, + "logits/chosen": -6.293444633483887, + "logits/rejected": -6.139300346374512, + "logps/chosen": -379.49871826171875, + "logps/rejected": -472.8863830566406, + "loss": 0.25607054233551024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1596871316432953, + "rewards/margins": 1.4329643249511719, + "rewards/rejected": -1.2732770442962646, + "step": 330 + }, + { + "epoch": 1.8647140864714087, + "grad_norm": 1.5956170558929443, + "learning_rate": 2.1193415637860083e-06, + "logits/chosen": -6.274291038513184, + "logits/rejected": -6.236454010009766, + "logps/chosen": -415.7079162597656, + "logps/rejected": -418.0286560058594, + "loss": 0.27625508308410646, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.059024881571531296, + "rewards/margins": 1.4343515634536743, + "rewards/rejected": -1.3753265142440796, + "step": 335 + }, + { + "epoch": 1.892608089260809, + "grad_norm": 1.2464466094970703, + "learning_rate": 2.0679012345679015e-06, + "logits/chosen": -6.2509446144104, + "logits/rejected": -6.182458400726318, + "logps/chosen": -478.2040100097656, + "logps/rejected": -437.84063720703125, + "loss": 0.2548848867416382, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.04282323271036148, + "rewards/margins": 1.5546890497207642, + "rewards/rejected": -1.597512125968933, + "step": 340 + }, + { + "epoch": 1.9205020920502092, + "grad_norm": 2.1779489517211914, + "learning_rate": 2.0164609053497946e-06, + "logits/chosen": -6.29345703125, + "logits/rejected": -6.269745826721191, + "logps/chosen": -481.49871826171875, + "logps/rejected": -510.9442443847656, + "loss": 0.24130442142486572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11563090234994888, + "rewards/margins": 1.6354728937149048, + "rewards/rejected": -1.7511036396026611, + "step": 345 + }, + { + "epoch": 1.9483960948396095, + "grad_norm": 1.3139598369598389, + "learning_rate": 1.9650205761316873e-06, + "logits/chosen": -6.220123291015625, + "logits/rejected": -6.242280006408691, + "logps/chosen": -407.80804443359375, + "logps/rejected": -401.532958984375, + "loss": 0.31305124759674074, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2018294781446457, + "rewards/margins": 1.1702749729156494, + "rewards/rejected": -1.372104525566101, + "step": 350 + }, + { + "epoch": 1.9762900976290099, + "grad_norm": 0.7809721827507019, + "learning_rate": 1.9135802469135804e-06, + "logits/chosen": -6.304191589355469, + "logits/rejected": -6.235474109649658, + "logps/chosen": -424.8037109375, + "logps/rejected": -488.02362060546875, + "loss": 0.20149641036987304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028687676414847374, + "rewards/margins": 1.7642078399658203, + "rewards/rejected": -1.7928955554962158, + "step": 355 + }, + { + "epoch": 2.0, + "grad_norm": 1.6750562191009521, + "learning_rate": 1.8621399176954735e-06, + "logits/chosen": -6.249261379241943, + "logits/rejected": -6.197325706481934, + "logps/chosen": -384.4549255371094, + "logps/rejected": -450.3216552734375, + "loss": 0.2427699089050293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29954269528388977, + "rewards/margins": 1.3911824226379395, + "rewards/rejected": -1.6907252073287964, + "step": 360 + }, + { + "epoch": 2.0278940027894, + "grad_norm": 1.4706112146377563, + "learning_rate": 1.8106995884773665e-06, + "logits/chosen": -6.396174430847168, + "logits/rejected": -6.274371147155762, + "logps/chosen": -365.41265869140625, + "logps/rejected": -433.7682189941406, + "loss": 0.2221092700958252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011003097519278526, + "rewards/margins": 1.695127248764038, + "rewards/rejected": -1.7061303853988647, + "step": 365 + }, + { + "epoch": 2.0557880055788007, + "grad_norm": 1.6888645887374878, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.299677848815918, + "logits/rejected": -6.338282108306885, + "logps/chosen": -460.0879821777344, + "logps/rejected": -489.38031005859375, + "loss": 0.19544492959976195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004593986086547375, + "rewards/margins": 1.8440967798233032, + "rewards/rejected": -1.8486907482147217, + "step": 370 + }, + { + "epoch": 2.083682008368201, + "grad_norm": 0.998672604560852, + "learning_rate": 1.7078189300411525e-06, + "logits/chosen": -6.379012584686279, + "logits/rejected": -6.325904846191406, + "logps/chosen": -421.4281311035156, + "logps/rejected": -519.4888916015625, + "loss": 0.198160719871521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06311459839344025, + "rewards/margins": 1.9342191219329834, + "rewards/rejected": -1.8711044788360596, + "step": 375 + }, + { + "epoch": 2.111576011157601, + "grad_norm": 1.0411016941070557, + "learning_rate": 1.6563786008230454e-06, + "logits/chosen": -6.331459999084473, + "logits/rejected": -6.230687141418457, + "logps/chosen": -381.6875915527344, + "logps/rejected": -436.54522705078125, + "loss": 0.17946820259094237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09231789410114288, + "rewards/margins": 1.843112587928772, + "rewards/rejected": -1.9354302883148193, + "step": 380 + }, + { + "epoch": 2.1394700139470015, + "grad_norm": 1.7208154201507568, + "learning_rate": 1.6049382716049383e-06, + "logits/chosen": -6.328245639801025, + "logits/rejected": -6.348081111907959, + "logps/chosen": -479.76763916015625, + "logps/rejected": -452.90863037109375, + "loss": 0.18842675685882568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0751882940530777, + "rewards/margins": 1.8472039699554443, + "rewards/rejected": -1.9223921298980713, + "step": 385 + }, + { + "epoch": 2.1673640167364017, + "grad_norm": 1.2389427423477173, + "learning_rate": 1.5534979423868312e-06, + "logits/chosen": -6.397286891937256, + "logits/rejected": -6.3191328048706055, + "logps/chosen": -385.9268493652344, + "logps/rejected": -412.65350341796875, + "loss": 0.2650484800338745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.056599367409944534, + "rewards/margins": 1.42960524559021, + "rewards/rejected": -1.4862048625946045, + "step": 390 + }, + { + "epoch": 2.195258019525802, + "grad_norm": 1.1179108619689941, + "learning_rate": 1.5020576131687246e-06, + "logits/chosen": -6.415247440338135, + "logits/rejected": -6.397636413574219, + "logps/chosen": -443.06640625, + "logps/rejected": -513.3284912109375, + "loss": 0.18407890796661378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1382242888212204, + "rewards/margins": 1.9546597003936768, + "rewards/rejected": -2.0928843021392822, + "step": 395 + }, + { + "epoch": 2.2231520223152024, + "grad_norm": 0.8621686697006226, + "learning_rate": 1.4506172839506175e-06, + "logits/chosen": -6.363873481750488, + "logits/rejected": -6.3192243576049805, + "logps/chosen": -446.64532470703125, + "logps/rejected": -479.0135192871094, + "loss": 0.1950451135635376, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.012487297877669334, + "rewards/margins": 1.913094162940979, + "rewards/rejected": -1.9255813360214233, + "step": 400 + }, + { + "epoch": 2.2510460251046025, + "grad_norm": 1.1525218486785889, + "learning_rate": 1.3991769547325104e-06, + "logits/chosen": -6.443626403808594, + "logits/rejected": -6.286838054656982, + "logps/chosen": -391.4197692871094, + "logps/rejected": -440.87335205078125, + "loss": 0.19900518655776978, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0584767684340477, + "rewards/margins": 1.853623390197754, + "rewards/rejected": -1.9121001958847046, + "step": 405 + }, + { + "epoch": 2.2789400278940026, + "grad_norm": 0.9601161479949951, + "learning_rate": 1.3477366255144033e-06, + "logits/chosen": -6.391339302062988, + "logits/rejected": -6.394341468811035, + "logps/chosen": -403.82275390625, + "logps/rejected": -426.35418701171875, + "loss": 0.1934623599052429, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.085336834192276, + "rewards/margins": 2.0006136894226074, + "rewards/rejected": -2.0859506130218506, + "step": 410 + }, + { + "epoch": 2.306834030683403, + "grad_norm": 0.9428881406784058, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.419486045837402, + "logits/rejected": -6.426458835601807, + "logps/chosen": -467.2015075683594, + "logps/rejected": -397.3327941894531, + "loss": 0.16998686790466308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2600168287754059, + "rewards/margins": 2.0186750888824463, + "rewards/rejected": -1.7586581707000732, + "step": 415 + }, + { + "epoch": 2.3347280334728033, + "grad_norm": 1.0415892601013184, + "learning_rate": 1.2448559670781894e-06, + "logits/chosen": -6.375484466552734, + "logits/rejected": -6.470736026763916, + "logps/chosen": -452.739990234375, + "logps/rejected": -439.50146484375, + "loss": 0.16452697515487671, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.1337260901927948, + "rewards/margins": 1.992404580116272, + "rewards/rejected": -1.8586784601211548, + "step": 420 + }, + { + "epoch": 2.3626220362622035, + "grad_norm": 0.9086716175079346, + "learning_rate": 1.1934156378600823e-06, + "logits/chosen": -6.495448112487793, + "logits/rejected": -6.403497219085693, + "logps/chosen": -396.10760498046875, + "logps/rejected": -430.85968017578125, + "loss": 0.18365414142608644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09083832800388336, + "rewards/margins": 1.8667247295379639, + "rewards/rejected": -1.9575631618499756, + "step": 425 + }, + { + "epoch": 2.390516039051604, + "grad_norm": 0.9541850090026855, + "learning_rate": 1.1419753086419754e-06, + "logits/chosen": -6.468809604644775, + "logits/rejected": -6.406615257263184, + "logps/chosen": -459.69097900390625, + "logps/rejected": -530.5374755859375, + "loss": 0.1429673194885254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14917811751365662, + "rewards/margins": 2.2435402870178223, + "rewards/rejected": -2.094362258911133, + "step": 430 + }, + { + "epoch": 2.418410041841004, + "grad_norm": 1.0821386575698853, + "learning_rate": 1.0905349794238683e-06, + "logits/chosen": -6.456778526306152, + "logits/rejected": -6.414129734039307, + "logps/chosen": -456.4554138183594, + "logps/rejected": -449.54827880859375, + "loss": 0.17326489686965943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02010413445532322, + "rewards/margins": 1.948240876197815, + "rewards/rejected": -1.9683449268341064, + "step": 435 + }, + { + "epoch": 2.4463040446304043, + "grad_norm": 0.9114539623260498, + "learning_rate": 1.0390946502057615e-06, + "logits/chosen": -6.498774528503418, + "logits/rejected": -6.3586835861206055, + "logps/chosen": -354.0647277832031, + "logps/rejected": -390.14697265625, + "loss": 0.17193338871002198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015950988978147507, + "rewards/margins": 1.9409234523773193, + "rewards/rejected": -1.9568744897842407, + "step": 440 + }, + { + "epoch": 2.474198047419805, + "grad_norm": 0.8584111332893372, + "learning_rate": 9.876543209876544e-07, + "logits/chosen": -6.4719557762146, + "logits/rejected": -6.405210018157959, + "logps/chosen": -412.19158935546875, + "logps/rejected": -474.6786193847656, + "loss": 0.2001863956451416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18442487716674805, + "rewards/margins": 1.7562973499298096, + "rewards/rejected": -1.9407222270965576, + "step": 445 + }, + { + "epoch": 2.502092050209205, + "grad_norm": 1.4773839712142944, + "learning_rate": 9.362139917695474e-07, + "logits/chosen": -6.447714805603027, + "logits/rejected": -6.4641876220703125, + "logps/chosen": -472.32025146484375, + "logps/rejected": -461.2222595214844, + "loss": 0.17260836362838744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16385754942893982, + "rewards/margins": 2.0242795944213867, + "rewards/rejected": -2.1881370544433594, + "step": 450 + }, + { + "epoch": 2.5299860529986056, + "grad_norm": 0.7697892785072327, + "learning_rate": 8.847736625514404e-07, + "logits/chosen": -6.366551399230957, + "logits/rejected": -6.3856306076049805, + "logps/chosen": -449.79620361328125, + "logps/rejected": -474.3729553222656, + "loss": 0.1455883264541626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01261213980615139, + "rewards/margins": 2.1382651329040527, + "rewards/rejected": -2.1508772373199463, + "step": 455 + }, + { + "epoch": 2.5578800557880057, + "grad_norm": 1.3464019298553467, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.469805717468262, + "logits/rejected": -6.392939567565918, + "logps/chosen": -445.14910888671875, + "logps/rejected": -535.4447021484375, + "loss": 0.11407300233840942, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019692901521921158, + "rewards/margins": 2.465336322784424, + "rewards/rejected": -2.4850289821624756, + "step": 460 + }, + { + "epoch": 2.585774058577406, + "grad_norm": 1.2962851524353027, + "learning_rate": 7.818930041152265e-07, + "logits/chosen": -6.485465049743652, + "logits/rejected": -6.357900619506836, + "logps/chosen": -374.263916015625, + "logps/rejected": -404.3548889160156, + "loss": 0.16066277027130127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31045863032341003, + "rewards/margins": 2.0328869819641113, + "rewards/rejected": -2.3433451652526855, + "step": 465 + }, + { + "epoch": 2.6136680613668064, + "grad_norm": 1.3673157691955566, + "learning_rate": 7.304526748971194e-07, + "logits/chosen": -6.476637840270996, + "logits/rejected": -6.440759181976318, + "logps/chosen": -411.898681640625, + "logps/rejected": -430.9825744628906, + "loss": 0.1950114130973816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10394623130559921, + "rewards/margins": 1.8426212072372437, + "rewards/rejected": -1.9465672969818115, + "step": 470 + }, + { + "epoch": 2.6415620641562065, + "grad_norm": 1.3282204866409302, + "learning_rate": 6.790123456790124e-07, + "logits/chosen": -6.6462297439575195, + "logits/rejected": -6.528528690338135, + "logps/chosen": -307.07763671875, + "logps/rejected": -401.9754943847656, + "loss": 0.178068208694458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15701228380203247, + "rewards/margins": 1.9842045307159424, + "rewards/rejected": -2.14121675491333, + "step": 475 + }, + { + "epoch": 2.6694560669456067, + "grad_norm": 1.3630396127700806, + "learning_rate": 6.275720164609054e-07, + "logits/chosen": -6.462084770202637, + "logits/rejected": -6.3894147872924805, + "logps/chosen": -430.40985107421875, + "logps/rejected": -479.05999755859375, + "loss": 0.1616993546485901, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22041141986846924, + "rewards/margins": 2.0579097270965576, + "rewards/rejected": -2.278320789337158, + "step": 480 + }, + { + "epoch": 2.6973500697350072, + "grad_norm": 1.253321647644043, + "learning_rate": 5.761316872427984e-07, + "logits/chosen": -6.495572566986084, + "logits/rejected": -6.418023109436035, + "logps/chosen": -425.351806640625, + "logps/rejected": -422.390625, + "loss": 0.17130649089813232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07747139036655426, + "rewards/margins": 2.088853597640991, + "rewards/rejected": -2.166325092315674, + "step": 485 + }, + { + "epoch": 2.7252440725244074, + "grad_norm": 1.2965136766433716, + "learning_rate": 5.246913580246914e-07, + "logits/chosen": -6.563371181488037, + "logits/rejected": -6.486718654632568, + "logps/chosen": -424.76898193359375, + "logps/rejected": -478.27984619140625, + "loss": 0.1441935896873474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07508904486894608, + "rewards/margins": 2.2910215854644775, + "rewards/rejected": -2.3661105632781982, + "step": 490 + }, + { + "epoch": 2.7531380753138075, + "grad_norm": 1.5180883407592773, + "learning_rate": 4.732510288065844e-07, + "logits/chosen": -6.455023288726807, + "logits/rejected": -6.4715986251831055, + "logps/chosen": -376.63836669921875, + "logps/rejected": -393.05029296875, + "loss": 0.16589791774749757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1081310510635376, + "rewards/margins": 2.047032356262207, + "rewards/rejected": -2.155163288116455, + "step": 495 + }, + { + "epoch": 2.781032078103208, + "grad_norm": 0.967851996421814, + "learning_rate": 4.2181069958847745e-07, + "logits/chosen": -6.565103054046631, + "logits/rejected": -6.433382987976074, + "logps/chosen": -457.95867919921875, + "logps/rejected": -495.66876220703125, + "loss": 0.15293949842453003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0993056520819664, + "rewards/margins": 2.18215012550354, + "rewards/rejected": -2.2814559936523438, + "step": 500 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-500/training_args.bin b/gemma-2b-dpo/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/checkpoint-540/README.md b/gemma-2b-dpo/checkpoint-540/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14d70987363c6b5210911e1b79d69ac05ee2d8b7 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-2b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-2b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-540/adapter_config.json b/gemma-2b-dpo/checkpoint-540/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbaa22368e6c52ef59d4559074221a5020aba608 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-2b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-540/adapter_model.safetensors b/gemma-2b-dpo/checkpoint-540/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..406f420568aa7a53fc6e0a3abbc91eef962cf49a --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc54ba320cd0d672d03020a5c3c95819f12346ad547967b2f3d1e3b2a4419a7 +size 83115256 diff --git a/gemma-2b-dpo/checkpoint-540/chat_template.jinja b/gemma-2b-dpo/checkpoint-540/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-2b-dpo/checkpoint-540/optimizer.pt b/gemma-2b-dpo/checkpoint-540/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0cacfd358d0d9e979dc3e6ddcdef00f3f000a3b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b0f28d58725ebf76f93fca7cf88f073f2ee3f6e5200cd8d06d7b09c7f57fc35 +size 42616772 diff --git a/gemma-2b-dpo/checkpoint-540/rng_state.pth b/gemma-2b-dpo/checkpoint-540/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e29ad0909e8c1afefa182e7ed890fa257c75af25 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf9097d4513154245c48236b6ec5137b7ee2a21c9f58f2cba798ea275c6026f +size 14244 diff --git a/gemma-2b-dpo/checkpoint-540/scheduler.pt b/gemma-2b-dpo/checkpoint-540/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..54eae5ee787f3322304070f4dbbfbd04d43219e0 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666302dedd69644f9a0191b4adcbf00102ef11aa2f2326eb3026b4af61f09104 +size 1064 diff --git a/gemma-2b-dpo/checkpoint-540/tokenizer.json b/gemma-2b-dpo/checkpoint-540/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-2b-dpo/checkpoint-540/tokenizer_config.json b/gemma-2b-dpo/checkpoint-540/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-dpo/checkpoint-540/trainer_state.json b/gemma-2b-dpo/checkpoint-540/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6e25a105dfcecc48ed00a92e244eb99635155c76 --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/trainer_state.json @@ -0,0 +1,1654 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 540, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02789400278940028, + "grad_norm": 2.7052793502807617, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.208017826080322, + "logits/rejected": -6.18649435043335, + "logps/chosen": -417.861328125, + "logps/rejected": -431.774169921875, + "loss": 0.6978574275970459, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0027643204666674137, + "rewards/margins": -0.00830078125, + "rewards/rejected": 0.011065103113651276, + "step": 5 + }, + { + "epoch": 0.05578800557880056, + "grad_norm": 1.9632341861724854, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.051701545715332, + "logits/rejected": -6.098549842834473, + "logps/chosen": -442.61126708984375, + "logps/rejected": -419.4737243652344, + "loss": 0.6965099811553955, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0022751614451408386, + "rewards/margins": -0.004312096629291773, + "rewards/rejected": 0.002036933321505785, + "step": 10 + }, + { + "epoch": 0.08368200836820083, + "grad_norm": 1.9358311891555786, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.1482648849487305, + "logits/rejected": -6.208896636962891, + "logps/chosen": -419.15155029296875, + "logps/rejected": -393.37322998046875, + "loss": 0.6971890449523925, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.004753150977194309, + "rewards/margins": -0.006633720360696316, + "rewards/rejected": 0.0113868722692132, + "step": 15 + }, + { + "epoch": 0.11157601115760112, + "grad_norm": 2.137960195541382, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.1889142990112305, + "logits/rejected": -6.147027015686035, + "logps/chosen": -449.2413024902344, + "logps/rejected": -387.8244934082031, + "loss": 0.694630479812622, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.01739494316279888, + "rewards/margins": -0.0010753620881587267, + "rewards/rejected": -0.016319578513503075, + "step": 20 + }, + { + "epoch": 0.1394700139470014, + "grad_norm": 2.610708475112915, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.098985195159912, + "logits/rejected": -6.146561145782471, + "logps/chosen": -528.6546020507812, + "logps/rejected": -517.2868041992188, + "loss": 0.6923945903778076, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013269426301121712, + "rewards/margins": 0.004172402434051037, + "rewards/rejected": 0.009097023867070675, + "step": 25 + }, + { + "epoch": 0.16736401673640167, + "grad_norm": 3.0792224407196045, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.156611442565918, + "logits/rejected": -6.146718502044678, + "logps/chosen": -427.1123962402344, + "logps/rejected": -413.99810791015625, + "loss": 0.6963389396667481, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0016099174972623587, + "rewards/margins": -0.0036583715118467808, + "rewards/rejected": 0.0052682883106172085, + "step": 30 + }, + { + "epoch": 0.19525801952580196, + "grad_norm": 2.40751051902771, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.270221710205078, + "logits/rejected": -6.222764492034912, + "logps/chosen": -433.89312744140625, + "logps/rejected": -442.81378173828125, + "loss": 0.6875874042510987, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003747978014871478, + "rewards/margins": 0.013033255934715271, + "rewards/rejected": -0.016781235113739967, + "step": 35 + }, + { + "epoch": 0.22315202231520223, + "grad_norm": 2.409308671951294, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.171980857849121, + "logits/rejected": -6.236737251281738, + "logps/chosen": -411.51092529296875, + "logps/rejected": -454.578857421875, + "loss": 0.6975872993469239, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0006048586219549179, + "rewards/margins": -0.006687240209430456, + "rewards/rejected": 0.0060823829844594, + "step": 40 + }, + { + "epoch": 0.2510460251046025, + "grad_norm": 2.8261911869049072, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.1633710861206055, + "logits/rejected": -6.245741367340088, + "logps/chosen": -373.363525390625, + "logps/rejected": -356.736572265625, + "loss": 0.6881499290466309, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.01782766357064247, + "rewards/margins": 0.015053692273795605, + "rewards/rejected": 0.002773971762508154, + "step": 45 + }, + { + "epoch": 0.2789400278940028, + "grad_norm": 2.457179546356201, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.270019054412842, + "logits/rejected": -6.3202104568481445, + "logps/chosen": -466.30609130859375, + "logps/rejected": -476.45550537109375, + "loss": 0.6831833839416503, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0046669007278978825, + "rewards/margins": 0.023042945191264153, + "rewards/rejected": -0.018376046791672707, + "step": 50 + }, + { + "epoch": 0.3068340306834031, + "grad_norm": 1.6770554780960083, + "learning_rate": 5e-06, + "logits/chosen": -6.253265380859375, + "logits/rejected": -6.15267276763916, + "logps/chosen": -352.24908447265625, + "logps/rejected": -447.11444091796875, + "loss": 0.6791603088378906, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010960197076201439, + "rewards/margins": 0.031198084354400635, + "rewards/rejected": -0.042158275842666626, + "step": 55 + }, + { + "epoch": 0.33472803347280333, + "grad_norm": 2.6027019023895264, + "learning_rate": 4.9485596707818935e-06, + "logits/chosen": -6.205387592315674, + "logits/rejected": -6.259293079376221, + "logps/chosen": -439.732421875, + "logps/rejected": -412.8099670410156, + "loss": 0.6736814975738525, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010224836878478527, + "rewards/margins": 0.04464374855160713, + "rewards/rejected": -0.054868586361408234, + "step": 60 + }, + { + "epoch": 0.36262203626220363, + "grad_norm": 2.1717166900634766, + "learning_rate": 4.897119341563787e-06, + "logits/chosen": -6.1334547996521, + "logits/rejected": -6.148266792297363, + "logps/chosen": -390.00433349609375, + "logps/rejected": -376.676513671875, + "loss": 0.6825191974639893, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.04612758383154869, + "rewards/margins": 0.03951488807797432, + "rewards/rejected": -0.08564247190952301, + "step": 65 + }, + { + "epoch": 0.3905160390516039, + "grad_norm": 2.2574119567871094, + "learning_rate": 4.845679012345679e-06, + "logits/chosen": -6.236250877380371, + "logits/rejected": -6.165186882019043, + "logps/chosen": -411.1315002441406, + "logps/rejected": -447.22100830078125, + "loss": 0.6402256488800049, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.017545931041240692, + "rewards/margins": 0.12250369787216187, + "rewards/rejected": -0.14004963636398315, + "step": 70 + }, + { + "epoch": 0.41841004184100417, + "grad_norm": 2.3837037086486816, + "learning_rate": 4.794238683127572e-06, + "logits/chosen": -6.256176948547363, + "logits/rejected": -6.213258266448975, + "logps/chosen": -437.463623046875, + "logps/rejected": -404.8554992675781, + "loss": 0.6703986167907715, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03428981825709343, + "rewards/margins": 0.05360151082277298, + "rewards/rejected": -0.08789133280515671, + "step": 75 + }, + { + "epoch": 0.44630404463040446, + "grad_norm": 3.304287910461426, + "learning_rate": 4.742798353909465e-06, + "logits/chosen": -6.2820305824279785, + "logits/rejected": -6.221312522888184, + "logps/chosen": -455.2318420410156, + "logps/rejected": -422.08837890625, + "loss": 0.7040590286254883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881255492568016, + "rewards/margins": -0.006679975427687168, + "rewards/rejected": -0.08144557476043701, + "step": 80 + }, + { + "epoch": 0.47419804741980476, + "grad_norm": 2.6312427520751953, + "learning_rate": 4.691358024691358e-06, + "logits/chosen": -6.1796159744262695, + "logits/rejected": -6.193436622619629, + "logps/chosen": -423.59930419921875, + "logps/rejected": -486.1690979003906, + "loss": 0.6397527694702149, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.043790053576231, + "rewards/margins": 0.12486596405506134, + "rewards/rejected": -0.16865602135658264, + "step": 85 + }, + { + "epoch": 0.502092050209205, + "grad_norm": 2.3493549823760986, + "learning_rate": 4.6399176954732515e-06, + "logits/chosen": -6.136630058288574, + "logits/rejected": -6.202858924865723, + "logps/chosen": -467.627685546875, + "logps/rejected": -441.41241455078125, + "loss": 0.5936434745788575, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.06891433894634247, + "rewards/margins": 0.26186972856521606, + "rewards/rejected": -0.19295534491539001, + "step": 90 + }, + { + "epoch": 0.5299860529986054, + "grad_norm": 2.4952447414398193, + "learning_rate": 4.588477366255145e-06, + "logits/chosen": -6.1503586769104, + "logits/rejected": -6.144400596618652, + "logps/chosen": -355.2735290527344, + "logps/rejected": -409.51702880859375, + "loss": 0.6157774925231934, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.0699276328086853, + "rewards/margins": 0.22694334387779236, + "rewards/rejected": -0.2968709468841553, + "step": 95 + }, + { + "epoch": 0.5578800557880056, + "grad_norm": 2.5470480918884277, + "learning_rate": 4.537037037037038e-06, + "logits/chosen": -6.14028263092041, + "logits/rejected": -6.104605197906494, + "logps/chosen": -429.1048889160156, + "logps/rejected": -454.7377014160156, + "loss": 0.6300024032592774, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.05769091844558716, + "rewards/margins": 0.14401891827583313, + "rewards/rejected": -0.2017098367214203, + "step": 100 + }, + { + "epoch": 0.5857740585774058, + "grad_norm": 2.6023478507995605, + "learning_rate": 4.485596707818931e-06, + "logits/chosen": -6.196796894073486, + "logits/rejected": -6.226622104644775, + "logps/chosen": -442.52685546875, + "logps/rejected": -516.7334594726562, + "loss": 0.6245638847351074, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.18519389629364014, + "rewards/margins": 0.24079546332359314, + "rewards/rejected": -0.42598938941955566, + "step": 105 + }, + { + "epoch": 0.6136680613668062, + "grad_norm": 2.0638511180877686, + "learning_rate": 4.434156378600823e-06, + "logits/chosen": -6.1991071701049805, + "logits/rejected": -6.119466781616211, + "logps/chosen": -410.86669921875, + "logps/rejected": -450.365478515625, + "loss": 0.6201879501342773, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0853937491774559, + "rewards/margins": 0.17646726965904236, + "rewards/rejected": -0.26186102628707886, + "step": 110 + }, + { + "epoch": 0.6415620641562064, + "grad_norm": 2.3625364303588867, + "learning_rate": 4.382716049382716e-06, + "logits/chosen": -6.220386505126953, + "logits/rejected": -6.223449230194092, + "logps/chosen": -435.92626953125, + "logps/rejected": -495.6065368652344, + "loss": 0.6151515483856201, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.2208656519651413, + "rewards/margins": 0.21172885596752167, + "rewards/rejected": -0.43259453773498535, + "step": 115 + }, + { + "epoch": 0.6694560669456067, + "grad_norm": 1.8082666397094727, + "learning_rate": 4.331275720164609e-06, + "logits/chosen": -6.262181282043457, + "logits/rejected": -6.250016212463379, + "logps/chosen": -354.21795654296875, + "logps/rejected": -389.14556884765625, + "loss": 0.6109379768371582, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.17473874986171722, + "rewards/margins": 0.19315743446350098, + "rewards/rejected": -0.367896169424057, + "step": 120 + }, + { + "epoch": 0.697350069735007, + "grad_norm": 1.9556658267974854, + "learning_rate": 4.2798353909465025e-06, + "logits/chosen": -6.167700290679932, + "logits/rejected": -6.1421003341674805, + "logps/chosen": -379.1827392578125, + "logps/rejected": -426.69549560546875, + "loss": 0.6202447414398193, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17535170912742615, + "rewards/margins": 0.18863627314567566, + "rewards/rejected": -0.3639879822731018, + "step": 125 + }, + { + "epoch": 0.7252440725244073, + "grad_norm": 3.001298666000366, + "learning_rate": 4.228395061728396e-06, + "logits/chosen": -6.2535905838012695, + "logits/rejected": -6.232400894165039, + "logps/chosen": -424.8458557128906, + "logps/rejected": -494.52960205078125, + "loss": 0.5493914127349854, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.22046081721782684, + "rewards/margins": 0.3785194754600525, + "rewards/rejected": -0.5989803075790405, + "step": 130 + }, + { + "epoch": 0.7531380753138075, + "grad_norm": 2.5210413932800293, + "learning_rate": 4.176954732510288e-06, + "logits/chosen": -6.078260898590088, + "logits/rejected": -6.0126447677612305, + "logps/chosen": -414.69940185546875, + "logps/rejected": -432.3282165527344, + "loss": 0.579456901550293, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15084640681743622, + "rewards/margins": 0.2960701882839203, + "rewards/rejected": -0.4469165802001953, + "step": 135 + }, + { + "epoch": 0.7810320781032078, + "grad_norm": 2.6807265281677246, + "learning_rate": 4.125514403292181e-06, + "logits/chosen": -6.243051052093506, + "logits/rejected": -6.220357418060303, + "logps/chosen": -400.6156311035156, + "logps/rejected": -450.0393981933594, + "loss": 0.5514531135559082, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.25727975368499756, + "rewards/margins": 0.3991738259792328, + "rewards/rejected": -0.6564534902572632, + "step": 140 + }, + { + "epoch": 0.8089260808926081, + "grad_norm": 2.4137353897094727, + "learning_rate": 4.074074074074074e-06, + "logits/chosen": -6.186558246612549, + "logits/rejected": -6.139374256134033, + "logps/chosen": -442.314453125, + "logps/rejected": -491.584716796875, + "loss": 0.5633067131042481, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35670942068099976, + "rewards/margins": 0.38038796186447144, + "rewards/rejected": -0.7370973825454712, + "step": 145 + }, + { + "epoch": 0.8368200836820083, + "grad_norm": 2.1043145656585693, + "learning_rate": 4.022633744855967e-06, + "logits/chosen": -6.177689552307129, + "logits/rejected": -6.167322635650635, + "logps/chosen": -435.2288513183594, + "logps/rejected": -469.41436767578125, + "loss": 0.5640112876892089, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.32313138246536255, + "rewards/margins": 0.45979684591293335, + "rewards/rejected": -0.7829282283782959, + "step": 150 + }, + { + "epoch": 0.8647140864714087, + "grad_norm": 1.9623620510101318, + "learning_rate": 3.97119341563786e-06, + "logits/chosen": -6.0590739250183105, + "logits/rejected": -6.033650875091553, + "logps/chosen": -421.5000915527344, + "logps/rejected": -370.30902099609375, + "loss": 0.6319089412689209, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29380694031715393, + "rewards/margins": 0.1507532149553299, + "rewards/rejected": -0.44456014037132263, + "step": 155 + }, + { + "epoch": 0.8926080892608089, + "grad_norm": 1.9432786703109741, + "learning_rate": 3.9197530864197535e-06, + "logits/chosen": -6.267019271850586, + "logits/rejected": -6.214621067047119, + "logps/chosen": -417.11724853515625, + "logps/rejected": -431.72698974609375, + "loss": 0.5186795234680176, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.18987610936164856, + "rewards/margins": 0.5616164803504944, + "rewards/rejected": -0.7514925599098206, + "step": 160 + }, + { + "epoch": 0.9205020920502092, + "grad_norm": 1.8146827220916748, + "learning_rate": 3.868312757201647e-06, + "logits/chosen": -6.230213165283203, + "logits/rejected": -6.109362602233887, + "logps/chosen": -376.6744384765625, + "logps/rejected": -376.6526184082031, + "loss": 0.5346522808074952, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19152329862117767, + "rewards/margins": 0.38060134649276733, + "rewards/rejected": -0.5721246004104614, + "step": 165 + }, + { + "epoch": 0.9483960948396095, + "grad_norm": 1.936680793762207, + "learning_rate": 3.81687242798354e-06, + "logits/chosen": -6.194340705871582, + "logits/rejected": -6.1444597244262695, + "logps/chosen": -389.017822265625, + "logps/rejected": -445.76544189453125, + "loss": 0.49420690536499023, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.055042725056409836, + "rewards/margins": 0.5315954685211182, + "rewards/rejected": -0.5866381525993347, + "step": 170 + }, + { + "epoch": 0.9762900976290098, + "grad_norm": 3.2391903400421143, + "learning_rate": 3.7654320987654325e-06, + "logits/chosen": -6.190616607666016, + "logits/rejected": -6.1138739585876465, + "logps/chosen": -421.6878356933594, + "logps/rejected": -460.1180114746094, + "loss": 0.5374621391296387, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.22659805417060852, + "rewards/margins": 0.43736472725868225, + "rewards/rejected": -0.6639627814292908, + "step": 175 + }, + { + "epoch": 1.0, + "grad_norm": 4.300363063812256, + "learning_rate": 3.7139917695473256e-06, + "logits/chosen": -6.227687358856201, + "logits/rejected": -6.129978179931641, + "logps/chosen": -449.4482116699219, + "logps/rejected": -430.434326171875, + "loss": 0.49641432762146, + "rewards/accuracies": 0.8529411554336548, + "rewards/chosen": -0.03447714447975159, + "rewards/margins": 0.5154433846473694, + "rewards/rejected": -0.5499205589294434, + "step": 180 + }, + { + "epoch": 1.0278940027894004, + "grad_norm": 1.5967351198196411, + "learning_rate": 3.6625514403292183e-06, + "logits/chosen": -6.067181587219238, + "logits/rejected": -6.06889533996582, + "logps/chosen": -399.98968505859375, + "logps/rejected": -447.72039794921875, + "loss": 0.4079257011413574, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.03092392347753048, + "rewards/margins": 0.7688177824020386, + "rewards/rejected": -0.7378939390182495, + "step": 185 + }, + { + "epoch": 1.0557880055788005, + "grad_norm": 1.7787078619003296, + "learning_rate": 3.6111111111111115e-06, + "logits/chosen": -6.146653175354004, + "logits/rejected": -6.154219627380371, + "logps/chosen": -419.3291015625, + "logps/rejected": -463.1239318847656, + "loss": 0.42812933921813967, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.007557299919426441, + "rewards/margins": 0.7340337634086609, + "rewards/rejected": -0.7415911555290222, + "step": 190 + }, + { + "epoch": 1.0836820083682008, + "grad_norm": 1.7694693803787231, + "learning_rate": 3.559670781893004e-06, + "logits/chosen": -6.160831928253174, + "logits/rejected": -6.151050567626953, + "logps/chosen": -379.6001281738281, + "logps/rejected": -394.00531005859375, + "loss": 0.41410012245178224, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.03558122366666794, + "rewards/margins": 0.8024934530258179, + "rewards/rejected": -0.7669121623039246, + "step": 195 + }, + { + "epoch": 1.1115760111576012, + "grad_norm": 1.3202013969421387, + "learning_rate": 3.5082304526748973e-06, + "logits/chosen": -6.110814094543457, + "logits/rejected": -6.176726341247559, + "logps/chosen": -400.85052490234375, + "logps/rejected": -405.44195556640625, + "loss": 0.391094446182251, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": -0.02833392843604088, + "rewards/margins": 0.8581112623214722, + "rewards/rejected": -0.8864452242851257, + "step": 200 + }, + { + "epoch": 1.1394700139470013, + "grad_norm": 1.9367257356643677, + "learning_rate": 3.4567901234567904e-06, + "logits/chosen": -6.311105728149414, + "logits/rejected": -6.179243087768555, + "logps/chosen": -440.97625732421875, + "logps/rejected": -463.1673278808594, + "loss": 0.3870258331298828, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.012000990100204945, + "rewards/margins": 0.9945917129516602, + "rewards/rejected": -1.0065927505493164, + "step": 205 + }, + { + "epoch": 1.1673640167364017, + "grad_norm": 2.4570703506469727, + "learning_rate": 3.405349794238683e-06, + "logits/chosen": -6.199883937835693, + "logits/rejected": -6.160645484924316, + "logps/chosen": -448.8758850097656, + "logps/rejected": -439.4584045410156, + "loss": 0.3908271551132202, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04756501317024231, + "rewards/margins": 0.8816453218460083, + "rewards/rejected": -0.8340802192687988, + "step": 210 + }, + { + "epoch": 1.195258019525802, + "grad_norm": 1.5992087125778198, + "learning_rate": 3.3539094650205767e-06, + "logits/chosen": -6.163644313812256, + "logits/rejected": -6.093722343444824, + "logps/chosen": -449.8214416503906, + "logps/rejected": -481.3743591308594, + "loss": 0.3612337350845337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09812992066144943, + "rewards/margins": 0.9562546014785767, + "rewards/rejected": -0.858124852180481, + "step": 215 + }, + { + "epoch": 1.2231520223152021, + "grad_norm": 1.4101840257644653, + "learning_rate": 3.30246913580247e-06, + "logits/chosen": -6.281071662902832, + "logits/rejected": -6.337766170501709, + "logps/chosen": -281.46795654296875, + "logps/rejected": -336.60845947265625, + "loss": 0.43022546768188474, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.1486438810825348, + "rewards/margins": 0.7064955234527588, + "rewards/rejected": -0.855139434337616, + "step": 220 + }, + { + "epoch": 1.2510460251046025, + "grad_norm": 1.5817450284957886, + "learning_rate": 3.2510288065843625e-06, + "logits/chosen": -6.1745758056640625, + "logits/rejected": -6.192706108093262, + "logps/chosen": -399.51190185546875, + "logps/rejected": -424.1844177246094, + "loss": 0.3992297887802124, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.051351286470890045, + "rewards/margins": 0.827163815498352, + "rewards/rejected": -0.8785150647163391, + "step": 225 + }, + { + "epoch": 1.2789400278940029, + "grad_norm": 1.3157438039779663, + "learning_rate": 3.1995884773662556e-06, + "logits/chosen": -6.1543779373168945, + "logits/rejected": -6.1732258796691895, + "logps/chosen": -425.80755615234375, + "logps/rejected": -447.39453125, + "loss": 0.35113141536712644, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.01585063710808754, + "rewards/margins": 1.0381678342819214, + "rewards/rejected": -1.0223171710968018, + "step": 230 + }, + { + "epoch": 1.3068340306834032, + "grad_norm": 1.4981003999710083, + "learning_rate": 3.1481481481481483e-06, + "logits/chosen": -6.220085620880127, + "logits/rejected": -6.215539455413818, + "logps/chosen": -393.77716064453125, + "logps/rejected": -474.4695739746094, + "loss": 0.35107009410858153, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.07319364696741104, + "rewards/margins": 1.0109622478485107, + "rewards/rejected": -0.9377686381340027, + "step": 235 + }, + { + "epoch": 1.3347280334728033, + "grad_norm": 1.6417901515960693, + "learning_rate": 3.0967078189300415e-06, + "logits/chosen": -6.223210334777832, + "logits/rejected": -6.187335968017578, + "logps/chosen": -454.0006408691406, + "logps/rejected": -439.3124084472656, + "loss": 0.3300657272338867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09006929397583008, + "rewards/margins": 1.089814305305481, + "rewards/rejected": -0.9997450709342957, + "step": 240 + }, + { + "epoch": 1.3626220362622037, + "grad_norm": 1.3642381429672241, + "learning_rate": 3.0452674897119346e-06, + "logits/chosen": -6.211455821990967, + "logits/rejected": -6.1171441078186035, + "logps/chosen": -402.8586730957031, + "logps/rejected": -431.7958984375, + "loss": 0.3634498119354248, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0397757962346077, + "rewards/margins": 1.0654242038726807, + "rewards/rejected": -1.1052000522613525, + "step": 245 + }, + { + "epoch": 1.390516039051604, + "grad_norm": 1.9008878469467163, + "learning_rate": 2.9938271604938273e-06, + "logits/chosen": -6.2566046714782715, + "logits/rejected": -6.222296714782715, + "logps/chosen": -433.3427734375, + "logps/rejected": -489.8169860839844, + "loss": 0.3385239839553833, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.04979880154132843, + "rewards/margins": 1.0801963806152344, + "rewards/rejected": -1.0303975343704224, + "step": 250 + }, + { + "epoch": 1.4184100418410042, + "grad_norm": 1.5302000045776367, + "learning_rate": 2.9423868312757204e-06, + "logits/chosen": -6.219883918762207, + "logits/rejected": -6.1428399085998535, + "logps/chosen": -428.93914794921875, + "logps/rejected": -493.17437744140625, + "loss": 0.3642880916595459, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09988997876644135, + "rewards/margins": 1.0157719850540161, + "rewards/rejected": -1.1156619787216187, + "step": 255 + }, + { + "epoch": 1.4463040446304045, + "grad_norm": 1.898163080215454, + "learning_rate": 2.890946502057613e-06, + "logits/chosen": -6.123999118804932, + "logits/rejected": -6.1102776527404785, + "logps/chosen": -494.8221740722656, + "logps/rejected": -464.3958435058594, + "loss": 0.31599912643432615, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03670971840620041, + "rewards/margins": 1.1374431848526, + "rewards/rejected": -1.1741528511047363, + "step": 260 + }, + { + "epoch": 1.4741980474198049, + "grad_norm": 1.7610703706741333, + "learning_rate": 2.8395061728395062e-06, + "logits/chosen": -6.1201653480529785, + "logits/rejected": -6.155616760253906, + "logps/chosen": -489.81201171875, + "logps/rejected": -511.11724853515625, + "loss": 0.30640478134155275, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.057888150215148926, + "rewards/margins": 1.2739557027816772, + "rewards/rejected": -1.3318438529968262, + "step": 265 + }, + { + "epoch": 1.502092050209205, + "grad_norm": 1.3035284280776978, + "learning_rate": 2.7880658436213994e-06, + "logits/chosen": -6.300120830535889, + "logits/rejected": -6.1527228355407715, + "logps/chosen": -433.002685546875, + "logps/rejected": -471.416015625, + "loss": 0.2971210956573486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03341478109359741, + "rewards/margins": 1.295318841934204, + "rewards/rejected": -1.261904001235962, + "step": 270 + }, + { + "epoch": 1.5299860529986054, + "grad_norm": 1.7996495962142944, + "learning_rate": 2.736625514403292e-06, + "logits/chosen": -6.244246482849121, + "logits/rejected": -6.148303985595703, + "logps/chosen": -403.05133056640625, + "logps/rejected": -380.7239990234375, + "loss": 0.36082763671875, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.016453832387924194, + "rewards/margins": 1.103393793106079, + "rewards/rejected": -1.1198475360870361, + "step": 275 + }, + { + "epoch": 1.5578800557880057, + "grad_norm": 2.193908929824829, + "learning_rate": 2.6851851851851856e-06, + "logits/chosen": -6.1752519607543945, + "logits/rejected": -6.249385833740234, + "logps/chosen": -482.1385803222656, + "logps/rejected": -504.50311279296875, + "loss": 0.3047311782836914, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.20222747325897217, + "rewards/margins": 1.3389281034469604, + "rewards/rejected": -1.1367007493972778, + "step": 280 + }, + { + "epoch": 1.5857740585774058, + "grad_norm": 1.3961774110794067, + "learning_rate": 2.6337448559670788e-06, + "logits/chosen": -6.2296528816223145, + "logits/rejected": -6.2554612159729, + "logps/chosen": -370.95428466796875, + "logps/rejected": -465.4556579589844, + "loss": 0.31553618907928466, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.10641174018383026, + "rewards/margins": 1.2302197217941284, + "rewards/rejected": -1.336631417274475, + "step": 285 + }, + { + "epoch": 1.6136680613668062, + "grad_norm": 1.296229600906372, + "learning_rate": 2.5823045267489715e-06, + "logits/chosen": -6.245091438293457, + "logits/rejected": -6.184089660644531, + "logps/chosen": -420.3929138183594, + "logps/rejected": -484.88934326171875, + "loss": 0.28981173038482666, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.03132909908890724, + "rewards/margins": 1.3171762228012085, + "rewards/rejected": -1.3485053777694702, + "step": 290 + }, + { + "epoch": 1.6415620641562065, + "grad_norm": 1.6442267894744873, + "learning_rate": 2.5308641975308646e-06, + "logits/chosen": -6.171110153198242, + "logits/rejected": -6.142795562744141, + "logps/chosen": -480.75726318359375, + "logps/rejected": -476.60552978515625, + "loss": 0.31265769004821775, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.19172248244285583, + "rewards/margins": 1.2785747051239014, + "rewards/rejected": -1.0868523120880127, + "step": 295 + }, + { + "epoch": 1.6694560669456067, + "grad_norm": 1.4329712390899658, + "learning_rate": 2.4794238683127577e-06, + "logits/chosen": -6.2438530921936035, + "logits/rejected": -6.167972564697266, + "logps/chosen": -398.7960510253906, + "logps/rejected": -416.4847717285156, + "loss": 0.3146506786346436, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.06857718527317047, + "rewards/margins": 1.1940205097198486, + "rewards/rejected": -1.2625977993011475, + "step": 300 + }, + { + "epoch": 1.697350069735007, + "grad_norm": 1.2974578142166138, + "learning_rate": 2.4279835390946504e-06, + "logits/chosen": -6.2253522872924805, + "logits/rejected": -6.272657871246338, + "logps/chosen": -405.8463439941406, + "logps/rejected": -451.399169921875, + "loss": 0.2888355255126953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.040930021554231644, + "rewards/margins": 1.2757234573364258, + "rewards/rejected": -1.3166534900665283, + "step": 305 + }, + { + "epoch": 1.7252440725244074, + "grad_norm": 1.865118384361267, + "learning_rate": 2.3765432098765435e-06, + "logits/chosen": -6.285956382751465, + "logits/rejected": -6.284170627593994, + "logps/chosen": -376.63543701171875, + "logps/rejected": -404.28131103515625, + "loss": 0.34237470626831057, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.3045514225959778, + "rewards/margins": 1.1192312240600586, + "rewards/rejected": -1.4237825870513916, + "step": 310 + }, + { + "epoch": 1.7531380753138075, + "grad_norm": 1.3118988275527954, + "learning_rate": 2.3251028806584362e-06, + "logits/chosen": -6.330617904663086, + "logits/rejected": -6.2173357009887695, + "logps/chosen": -411.6089782714844, + "logps/rejected": -405.8570861816406, + "loss": 0.30266666412353516, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.08704517036676407, + "rewards/margins": 1.2953578233718872, + "rewards/rejected": -1.382403016090393, + "step": 315 + }, + { + "epoch": 1.7810320781032078, + "grad_norm": 2.04417085647583, + "learning_rate": 2.2736625514403294e-06, + "logits/chosen": -6.217926502227783, + "logits/rejected": -6.2158918380737305, + "logps/chosen": -472.285400390625, + "logps/rejected": -431.4971618652344, + "loss": 0.2793667078018188, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.1601211428642273, + "rewards/margins": 1.4048590660095215, + "rewards/rejected": -1.564980149269104, + "step": 320 + }, + { + "epoch": 1.8089260808926082, + "grad_norm": 1.6911827325820923, + "learning_rate": 2.222222222222222e-06, + "logits/chosen": -6.251864910125732, + "logits/rejected": -6.211045742034912, + "logps/chosen": -341.28216552734375, + "logps/rejected": -376.18780517578125, + "loss": 0.3147707223892212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09754385054111481, + "rewards/margins": 1.1059117317199707, + "rewards/rejected": -1.2034555673599243, + "step": 325 + }, + { + "epoch": 1.8368200836820083, + "grad_norm": 1.974066138267517, + "learning_rate": 2.1707818930041156e-06, + "logits/chosen": -6.293444633483887, + "logits/rejected": -6.139300346374512, + "logps/chosen": -379.49871826171875, + "logps/rejected": -472.8863830566406, + "loss": 0.25607054233551024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1596871316432953, + "rewards/margins": 1.4329643249511719, + "rewards/rejected": -1.2732770442962646, + "step": 330 + }, + { + "epoch": 1.8647140864714087, + "grad_norm": 1.5956170558929443, + "learning_rate": 2.1193415637860083e-06, + "logits/chosen": -6.274291038513184, + "logits/rejected": -6.236454010009766, + "logps/chosen": -415.7079162597656, + "logps/rejected": -418.0286560058594, + "loss": 0.27625508308410646, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.059024881571531296, + "rewards/margins": 1.4343515634536743, + "rewards/rejected": -1.3753265142440796, + "step": 335 + }, + { + "epoch": 1.892608089260809, + "grad_norm": 1.2464466094970703, + "learning_rate": 2.0679012345679015e-06, + "logits/chosen": -6.2509446144104, + "logits/rejected": -6.182458400726318, + "logps/chosen": -478.2040100097656, + "logps/rejected": -437.84063720703125, + "loss": 0.2548848867416382, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.04282323271036148, + "rewards/margins": 1.5546890497207642, + "rewards/rejected": -1.597512125968933, + "step": 340 + }, + { + "epoch": 1.9205020920502092, + "grad_norm": 2.1779489517211914, + "learning_rate": 2.0164609053497946e-06, + "logits/chosen": -6.29345703125, + "logits/rejected": -6.269745826721191, + "logps/chosen": -481.49871826171875, + "logps/rejected": -510.9442443847656, + "loss": 0.24130442142486572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11563090234994888, + "rewards/margins": 1.6354728937149048, + "rewards/rejected": -1.7511036396026611, + "step": 345 + }, + { + "epoch": 1.9483960948396095, + "grad_norm": 1.3139598369598389, + "learning_rate": 1.9650205761316873e-06, + "logits/chosen": -6.220123291015625, + "logits/rejected": -6.242280006408691, + "logps/chosen": -407.80804443359375, + "logps/rejected": -401.532958984375, + "loss": 0.31305124759674074, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2018294781446457, + "rewards/margins": 1.1702749729156494, + "rewards/rejected": -1.372104525566101, + "step": 350 + }, + { + "epoch": 1.9762900976290099, + "grad_norm": 0.7809721827507019, + "learning_rate": 1.9135802469135804e-06, + "logits/chosen": -6.304191589355469, + "logits/rejected": -6.235474109649658, + "logps/chosen": -424.8037109375, + "logps/rejected": -488.02362060546875, + "loss": 0.20149641036987304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028687676414847374, + "rewards/margins": 1.7642078399658203, + "rewards/rejected": -1.7928955554962158, + "step": 355 + }, + { + "epoch": 2.0, + "grad_norm": 1.6750562191009521, + "learning_rate": 1.8621399176954735e-06, + "logits/chosen": -6.249261379241943, + "logits/rejected": -6.197325706481934, + "logps/chosen": -384.4549255371094, + "logps/rejected": -450.3216552734375, + "loss": 0.2427699089050293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29954269528388977, + "rewards/margins": 1.3911824226379395, + "rewards/rejected": -1.6907252073287964, + "step": 360 + }, + { + "epoch": 2.0278940027894, + "grad_norm": 1.4706112146377563, + "learning_rate": 1.8106995884773665e-06, + "logits/chosen": -6.396174430847168, + "logits/rejected": -6.274371147155762, + "logps/chosen": -365.41265869140625, + "logps/rejected": -433.7682189941406, + "loss": 0.2221092700958252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011003097519278526, + "rewards/margins": 1.695127248764038, + "rewards/rejected": -1.7061303853988647, + "step": 365 + }, + { + "epoch": 2.0557880055788007, + "grad_norm": 1.6888645887374878, + "learning_rate": 1.7592592592592594e-06, + "logits/chosen": -6.299677848815918, + "logits/rejected": -6.338282108306885, + "logps/chosen": -460.0879821777344, + "logps/rejected": -489.38031005859375, + "loss": 0.19544492959976195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004593986086547375, + "rewards/margins": 1.8440967798233032, + "rewards/rejected": -1.8486907482147217, + "step": 370 + }, + { + "epoch": 2.083682008368201, + "grad_norm": 0.998672604560852, + "learning_rate": 1.7078189300411525e-06, + "logits/chosen": -6.379012584686279, + "logits/rejected": -6.325904846191406, + "logps/chosen": -421.4281311035156, + "logps/rejected": -519.4888916015625, + "loss": 0.198160719871521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06311459839344025, + "rewards/margins": 1.9342191219329834, + "rewards/rejected": -1.8711044788360596, + "step": 375 + }, + { + "epoch": 2.111576011157601, + "grad_norm": 1.0411016941070557, + "learning_rate": 1.6563786008230454e-06, + "logits/chosen": -6.331459999084473, + "logits/rejected": -6.230687141418457, + "logps/chosen": -381.6875915527344, + "logps/rejected": -436.54522705078125, + "loss": 0.17946820259094237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09231789410114288, + "rewards/margins": 1.843112587928772, + "rewards/rejected": -1.9354302883148193, + "step": 380 + }, + { + "epoch": 2.1394700139470015, + "grad_norm": 1.7208154201507568, + "learning_rate": 1.6049382716049383e-06, + "logits/chosen": -6.328245639801025, + "logits/rejected": -6.348081111907959, + "logps/chosen": -479.76763916015625, + "logps/rejected": -452.90863037109375, + "loss": 0.18842675685882568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0751882940530777, + "rewards/margins": 1.8472039699554443, + "rewards/rejected": -1.9223921298980713, + "step": 385 + }, + { + "epoch": 2.1673640167364017, + "grad_norm": 1.2389427423477173, + "learning_rate": 1.5534979423868312e-06, + "logits/chosen": -6.397286891937256, + "logits/rejected": -6.3191328048706055, + "logps/chosen": -385.9268493652344, + "logps/rejected": -412.65350341796875, + "loss": 0.2650484800338745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.056599367409944534, + "rewards/margins": 1.42960524559021, + "rewards/rejected": -1.4862048625946045, + "step": 390 + }, + { + "epoch": 2.195258019525802, + "grad_norm": 1.1179108619689941, + "learning_rate": 1.5020576131687246e-06, + "logits/chosen": -6.415247440338135, + "logits/rejected": -6.397636413574219, + "logps/chosen": -443.06640625, + "logps/rejected": -513.3284912109375, + "loss": 0.18407890796661378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1382242888212204, + "rewards/margins": 1.9546597003936768, + "rewards/rejected": -2.0928843021392822, + "step": 395 + }, + { + "epoch": 2.2231520223152024, + "grad_norm": 0.8621686697006226, + "learning_rate": 1.4506172839506175e-06, + "logits/chosen": -6.363873481750488, + "logits/rejected": -6.3192243576049805, + "logps/chosen": -446.64532470703125, + "logps/rejected": -479.0135192871094, + "loss": 0.1950451135635376, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.012487297877669334, + "rewards/margins": 1.913094162940979, + "rewards/rejected": -1.9255813360214233, + "step": 400 + }, + { + "epoch": 2.2510460251046025, + "grad_norm": 1.1525218486785889, + "learning_rate": 1.3991769547325104e-06, + "logits/chosen": -6.443626403808594, + "logits/rejected": -6.286838054656982, + "logps/chosen": -391.4197692871094, + "logps/rejected": -440.87335205078125, + "loss": 0.19900518655776978, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.0584767684340477, + "rewards/margins": 1.853623390197754, + "rewards/rejected": -1.9121001958847046, + "step": 405 + }, + { + "epoch": 2.2789400278940026, + "grad_norm": 0.9601161479949951, + "learning_rate": 1.3477366255144033e-06, + "logits/chosen": -6.391339302062988, + "logits/rejected": -6.394341468811035, + "logps/chosen": -403.82275390625, + "logps/rejected": -426.35418701171875, + "loss": 0.1934623599052429, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.085336834192276, + "rewards/margins": 2.0006136894226074, + "rewards/rejected": -2.0859506130218506, + "step": 410 + }, + { + "epoch": 2.306834030683403, + "grad_norm": 0.9428881406784058, + "learning_rate": 1.2962962962962962e-06, + "logits/chosen": -6.419486045837402, + "logits/rejected": -6.426458835601807, + "logps/chosen": -467.2015075683594, + "logps/rejected": -397.3327941894531, + "loss": 0.16998686790466308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2600168287754059, + "rewards/margins": 2.0186750888824463, + "rewards/rejected": -1.7586581707000732, + "step": 415 + }, + { + "epoch": 2.3347280334728033, + "grad_norm": 1.0415892601013184, + "learning_rate": 1.2448559670781894e-06, + "logits/chosen": -6.375484466552734, + "logits/rejected": -6.470736026763916, + "logps/chosen": -452.739990234375, + "logps/rejected": -439.50146484375, + "loss": 0.16452697515487671, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.1337260901927948, + "rewards/margins": 1.992404580116272, + "rewards/rejected": -1.8586784601211548, + "step": 420 + }, + { + "epoch": 2.3626220362622035, + "grad_norm": 0.9086716175079346, + "learning_rate": 1.1934156378600823e-06, + "logits/chosen": -6.495448112487793, + "logits/rejected": -6.403497219085693, + "logps/chosen": -396.10760498046875, + "logps/rejected": -430.85968017578125, + "loss": 0.18365414142608644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09083832800388336, + "rewards/margins": 1.8667247295379639, + "rewards/rejected": -1.9575631618499756, + "step": 425 + }, + { + "epoch": 2.390516039051604, + "grad_norm": 0.9541850090026855, + "learning_rate": 1.1419753086419754e-06, + "logits/chosen": -6.468809604644775, + "logits/rejected": -6.406615257263184, + "logps/chosen": -459.69097900390625, + "logps/rejected": -530.5374755859375, + "loss": 0.1429673194885254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14917811751365662, + "rewards/margins": 2.2435402870178223, + "rewards/rejected": -2.094362258911133, + "step": 430 + }, + { + "epoch": 2.418410041841004, + "grad_norm": 1.0821386575698853, + "learning_rate": 1.0905349794238683e-06, + "logits/chosen": -6.456778526306152, + "logits/rejected": -6.414129734039307, + "logps/chosen": -456.4554138183594, + "logps/rejected": -449.54827880859375, + "loss": 0.17326489686965943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02010413445532322, + "rewards/margins": 1.948240876197815, + "rewards/rejected": -1.9683449268341064, + "step": 435 + }, + { + "epoch": 2.4463040446304043, + "grad_norm": 0.9114539623260498, + "learning_rate": 1.0390946502057615e-06, + "logits/chosen": -6.498774528503418, + "logits/rejected": -6.3586835861206055, + "logps/chosen": -354.0647277832031, + "logps/rejected": -390.14697265625, + "loss": 0.17193338871002198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015950988978147507, + "rewards/margins": 1.9409234523773193, + "rewards/rejected": -1.9568744897842407, + "step": 440 + }, + { + "epoch": 2.474198047419805, + "grad_norm": 0.8584111332893372, + "learning_rate": 9.876543209876544e-07, + "logits/chosen": -6.4719557762146, + "logits/rejected": -6.405210018157959, + "logps/chosen": -412.19158935546875, + "logps/rejected": -474.6786193847656, + "loss": 0.2001863956451416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18442487716674805, + "rewards/margins": 1.7562973499298096, + "rewards/rejected": -1.9407222270965576, + "step": 445 + }, + { + "epoch": 2.502092050209205, + "grad_norm": 1.4773839712142944, + "learning_rate": 9.362139917695474e-07, + "logits/chosen": -6.447714805603027, + "logits/rejected": -6.4641876220703125, + "logps/chosen": -472.32025146484375, + "logps/rejected": -461.2222595214844, + "loss": 0.17260836362838744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16385754942893982, + "rewards/margins": 2.0242795944213867, + "rewards/rejected": -2.1881370544433594, + "step": 450 + }, + { + "epoch": 2.5299860529986056, + "grad_norm": 0.7697892785072327, + "learning_rate": 8.847736625514404e-07, + "logits/chosen": -6.366551399230957, + "logits/rejected": -6.3856306076049805, + "logps/chosen": -449.79620361328125, + "logps/rejected": -474.3729553222656, + "loss": 0.1455883264541626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01261213980615139, + "rewards/margins": 2.1382651329040527, + "rewards/rejected": -2.1508772373199463, + "step": 455 + }, + { + "epoch": 2.5578800557880057, + "grad_norm": 1.3464019298553467, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -6.469805717468262, + "logits/rejected": -6.392939567565918, + "logps/chosen": -445.14910888671875, + "logps/rejected": -535.4447021484375, + "loss": 0.11407300233840942, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019692901521921158, + "rewards/margins": 2.465336322784424, + "rewards/rejected": -2.4850289821624756, + "step": 460 + }, + { + "epoch": 2.585774058577406, + "grad_norm": 1.2962851524353027, + "learning_rate": 7.818930041152265e-07, + "logits/chosen": -6.485465049743652, + "logits/rejected": -6.357900619506836, + "logps/chosen": -374.263916015625, + "logps/rejected": -404.3548889160156, + "loss": 0.16066277027130127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31045863032341003, + "rewards/margins": 2.0328869819641113, + "rewards/rejected": -2.3433451652526855, + "step": 465 + }, + { + "epoch": 2.6136680613668064, + "grad_norm": 1.3673157691955566, + "learning_rate": 7.304526748971194e-07, + "logits/chosen": -6.476637840270996, + "logits/rejected": -6.440759181976318, + "logps/chosen": -411.898681640625, + "logps/rejected": -430.9825744628906, + "loss": 0.1950114130973816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10394623130559921, + "rewards/margins": 1.8426212072372437, + "rewards/rejected": -1.9465672969818115, + "step": 470 + }, + { + "epoch": 2.6415620641562065, + "grad_norm": 1.3282204866409302, + "learning_rate": 6.790123456790124e-07, + "logits/chosen": -6.6462297439575195, + "logits/rejected": -6.528528690338135, + "logps/chosen": -307.07763671875, + "logps/rejected": -401.9754943847656, + "loss": 0.178068208694458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15701228380203247, + "rewards/margins": 1.9842045307159424, + "rewards/rejected": -2.14121675491333, + "step": 475 + }, + { + "epoch": 2.6694560669456067, + "grad_norm": 1.3630396127700806, + "learning_rate": 6.275720164609054e-07, + "logits/chosen": -6.462084770202637, + "logits/rejected": -6.3894147872924805, + "logps/chosen": -430.40985107421875, + "logps/rejected": -479.05999755859375, + "loss": 0.1616993546485901, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22041141986846924, + "rewards/margins": 2.0579097270965576, + "rewards/rejected": -2.278320789337158, + "step": 480 + }, + { + "epoch": 2.6973500697350072, + "grad_norm": 1.253321647644043, + "learning_rate": 5.761316872427984e-07, + "logits/chosen": -6.495572566986084, + "logits/rejected": -6.418023109436035, + "logps/chosen": -425.351806640625, + "logps/rejected": -422.390625, + "loss": 0.17130649089813232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07747139036655426, + "rewards/margins": 2.088853597640991, + "rewards/rejected": -2.166325092315674, + "step": 485 + }, + { + "epoch": 2.7252440725244074, + "grad_norm": 1.2965136766433716, + "learning_rate": 5.246913580246914e-07, + "logits/chosen": -6.563371181488037, + "logits/rejected": -6.486718654632568, + "logps/chosen": -424.76898193359375, + "logps/rejected": -478.27984619140625, + "loss": 0.1441935896873474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07508904486894608, + "rewards/margins": 2.2910215854644775, + "rewards/rejected": -2.3661105632781982, + "step": 490 + }, + { + "epoch": 2.7531380753138075, + "grad_norm": 1.5180883407592773, + "learning_rate": 4.732510288065844e-07, + "logits/chosen": -6.455023288726807, + "logits/rejected": -6.4715986251831055, + "logps/chosen": -376.63836669921875, + "logps/rejected": -393.05029296875, + "loss": 0.16589791774749757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1081310510635376, + "rewards/margins": 2.047032356262207, + "rewards/rejected": -2.155163288116455, + "step": 495 + }, + { + "epoch": 2.781032078103208, + "grad_norm": 0.967851996421814, + "learning_rate": 4.2181069958847745e-07, + "logits/chosen": -6.565103054046631, + "logits/rejected": -6.433382987976074, + "logps/chosen": -457.95867919921875, + "logps/rejected": -495.66876220703125, + "loss": 0.15293949842453003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0993056520819664, + "rewards/margins": 2.18215012550354, + "rewards/rejected": -2.2814559936523438, + "step": 500 + }, + { + "epoch": 2.808926080892608, + "grad_norm": 0.9596635103225708, + "learning_rate": 3.7037037037037036e-07, + "logits/chosen": -6.508576393127441, + "logits/rejected": -6.446290016174316, + "logps/chosen": -452.46881103515625, + "logps/rejected": -481.0126037597656, + "loss": 0.14948637485504152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.044221777468919754, + "rewards/margins": 2.2208831310272217, + "rewards/rejected": -2.2651052474975586, + "step": 505 + }, + { + "epoch": 2.8368200836820083, + "grad_norm": 1.2698094844818115, + "learning_rate": 3.189300411522634e-07, + "logits/chosen": -6.4514312744140625, + "logits/rejected": -6.44903564453125, + "logps/chosen": -426.76202392578125, + "logps/rejected": -452.93023681640625, + "loss": 0.1897855281829834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3248792290687561, + "rewards/margins": 1.9335721731185913, + "rewards/rejected": -2.258451223373413, + "step": 510 + }, + { + "epoch": 2.864714086471409, + "grad_norm": 1.0263173580169678, + "learning_rate": 2.674897119341564e-07, + "logits/chosen": -6.51064395904541, + "logits/rejected": -6.531114101409912, + "logps/chosen": -448.24066162109375, + "logps/rejected": -500.0907287597656, + "loss": 0.1473880171775818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4316147267818451, + "rewards/margins": 2.161513090133667, + "rewards/rejected": -2.593127727508545, + "step": 515 + }, + { + "epoch": 2.892608089260809, + "grad_norm": 1.0177369117736816, + "learning_rate": 2.160493827160494e-07, + "logits/chosen": -6.5584306716918945, + "logits/rejected": -6.453854560852051, + "logps/chosen": -433.27886962890625, + "logps/rejected": -467.13787841796875, + "loss": 0.13629883527755737, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10721828788518906, + "rewards/margins": 2.274982452392578, + "rewards/rejected": -2.3822007179260254, + "step": 520 + }, + { + "epoch": 2.920502092050209, + "grad_norm": 1.171775460243225, + "learning_rate": 1.6460905349794242e-07, + "logits/chosen": -6.684114933013916, + "logits/rejected": -6.569178581237793, + "logps/chosen": -355.95965576171875, + "logps/rejected": -424.7032165527344, + "loss": 0.19023069143295288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3019373416900635, + "rewards/margins": 1.8694591522216797, + "rewards/rejected": -2.171396493911743, + "step": 525 + }, + { + "epoch": 2.9483960948396097, + "grad_norm": 1.2015697956085205, + "learning_rate": 1.131687242798354e-07, + "logits/chosen": -6.493105411529541, + "logits/rejected": -6.49573278427124, + "logps/chosen": -463.3268127441406, + "logps/rejected": -463.3141174316406, + "loss": 0.19591643810272216, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.42680415511131287, + "rewards/margins": 1.9670032262802124, + "rewards/rejected": -2.3938076496124268, + "step": 530 + }, + { + "epoch": 2.97629009762901, + "grad_norm": 0.9612927436828613, + "learning_rate": 6.17283950617284e-08, + "logits/chosen": -6.6177263259887695, + "logits/rejected": -6.610577583312988, + "logps/chosen": -389.6258850097656, + "logps/rejected": -433.6327209472656, + "loss": 0.14655786752700806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09456322342157364, + "rewards/margins": 2.2411980628967285, + "rewards/rejected": -2.1466352939605713, + "step": 535 + }, + { + "epoch": 3.0, + "grad_norm": 1.8610484600067139, + "learning_rate": 1.0288065843621401e-08, + "logits/chosen": -6.715764045715332, + "logits/rejected": -6.720335483551025, + "logps/chosen": -417.45428466796875, + "logps/rejected": -469.5900573730469, + "loss": 0.1421959638595581, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.144243061542511, + "rewards/margins": 2.29262638092041, + "rewards/rejected": -2.4368691444396973, + "step": 540 + } + ], + "logging_steps": 5, + "max_steps": 540, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-dpo/checkpoint-540/training_args.bin b/gemma-2b-dpo/checkpoint-540/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/checkpoint-540/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688 diff --git a/gemma-2b-dpo/dpo_training_info.json b/gemma-2b-dpo/dpo_training_info.json new file mode 100644 index 0000000000000000000000000000000000000000..d746ac89acbf7b7055baa20d176f8650a38f185f --- /dev/null +++ b/gemma-2b-dpo/dpo_training_info.json @@ -0,0 +1,10 @@ +{ + "model_id": "google/gemma-2-2b-it", + "base_model": "google/gemma-2-2b-it", + "dpo_data": "data/distillation/dpo_training_600.jsonl", + "n_pairs": 1434, + "epochs": 3, + "learning_rate": 5e-06, + "beta": 0.1, + "timestamp": "2026-02-22T09:15:29.249618" +} \ No newline at end of file diff --git a/gemma-2b-dpo/training_args.bin b/gemma-2b-dpo/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c02f6b48421cea8396dfcaf6d8319b0150c14d7b --- /dev/null +++ b/gemma-2b-dpo/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9 +size 5688