diff --git a/.gitattributes b/.gitattributes index 1dba7c7d2e921db436f5052c6ff93540831aec56..befb7db6729e3a45701735498d8fa2123d7d9198 100644 --- a/.gitattributes +++ b/.gitattributes @@ -59,3 +59,8 @@ sft_devstral_24B_v2/wandb/run-20251226_180613-i1cmzyri/run-i1cmzyri.wandb filter sft_devstral_24B_v2/wandb/run-20251226_180702-oordmylf/run-oordmylf.wandb filter=lfs diff=lfs merge=lfs -text sft_devstral_24B_v2/wandb/run-20251226_180808-ny9q48hd/run-ny9q48hd.wandb filter=lfs diff=lfs merge=lfs -text sft_qwen_14B_v2/wandb/run-20251226_181544-upub1jan/run-upub1jan.wandb filter=lfs diff=lfs merge=lfs -text +dpo_qwen_14B/best_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text +dpo_qwen_14B/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/run-r9hfat2g.wandb filter=lfs diff=lfs merge=lfs -text +dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/run-r1nptay8.wandb filter=lfs diff=lfs merge=lfs -text +dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/run-wbzoafvt.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/dpo_qwen_14B/README.md b/dpo_qwen_14B/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6498ba2e748532d0156176225a09d4246836d51e --- /dev/null +++ b/dpo_qwen_14B/README.md @@ -0,0 +1,68 @@ +--- +library_name: transformers +model_name: dpo_run_14b_v1 +tags: +- generated_from_trainer +- trl +- dpo +licence: license +--- + +# Model Card for dpo_run_14b_v1 + +This model is a fine-tuned version of [None](https://huggingface.co/None). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/sirajuddin-shaik-007/dpo-training/runs/wbzoafvt) + + +This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290). + +### Framework versions + +- TRL: 0.26.2 +- Transformers: 5.0.0.dev0 +- Pytorch: 2.5.1+cu121 +- Datasets: 4.4.2 +- Tokenizers: 0.22.1 + +## Citations + +Cite DPO as: + +```bibtex +@inproceedings{rafailov2023direct, + title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}}, + author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn}, + year = 2023, + booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, + url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html}, + editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, +} +``` + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/dpo_qwen_14B/best_adapter/README.md b/dpo_qwen_14B/best_adapter/README.md new file mode 100644 index 0000000000000000000000000000000000000000..aee08061c0257bf9157a4ed03b986a8bb6e55091 --- /dev/null +++ b/dpo_qwen_14B/best_adapter/README.md @@ -0,0 +1,209 @@ +--- +base_model: ../../Models/Qwen2.5-Coder-14B-CPT-SFT +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:../../Models/Qwen2.5-Coder-14B-CPT-SFT +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/dpo_qwen_14B/best_adapter/adapter_config.json b/dpo_qwen_14B/best_adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2d5bce66a2e3008140afc9fe5840d5c249ea31bc --- /dev/null +++ b/dpo_qwen_14B/best_adapter/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "../../Models/Qwen2.5-Coder-14B-CPT-SFT", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "v_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/dpo_qwen_14B/best_adapter/adapter_model.safetensors b/dpo_qwen_14B/best_adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f98af50a550b683ac4c340b2e698a8d719d3d1f5 --- /dev/null +++ b/dpo_qwen_14B/best_adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6315595d0613ab1a98a34db46bcd956ffbcca002ea96096ef585ffbd10b082c9 +size 100715016 diff --git a/dpo_qwen_14B/best_adapter/chat_template.jinja b/dpo_qwen_14B/best_adapter/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..28028c056af412405debd878cdda0171e35fa5d1 --- /dev/null +++ b/dpo_qwen_14B/best_adapter/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/dpo_qwen_14B/best_adapter/optimizer.pt b/dpo_qwen_14B/best_adapter/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f10f51d6c800014f98f15e41726ecb3d284c116 --- /dev/null +++ b/dpo_qwen_14B/best_adapter/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802de0809f197ada0a2f762d41b8a0c8e007ece14785be2ac75521db604c729b +size 201650194 diff --git a/dpo_qwen_14B/best_adapter/rng_state.pth b/dpo_qwen_14B/best_adapter/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f477ae319f7890dbe85f94681f64bf10e690d69 --- /dev/null +++ b/dpo_qwen_14B/best_adapter/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecefbb3f17bb76b6655eb0157c98b5287c17fa4b4c72a6b9068b0823ce9fd18d +size 14244 diff --git a/dpo_qwen_14B/best_adapter/scheduler.pt b/dpo_qwen_14B/best_adapter/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c03a8545273144a1798e14d789e40ee65be98e6 --- /dev/null +++ b/dpo_qwen_14B/best_adapter/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2d3d5485f7a1cfe5d5e69f9e55a45f72f0a8b17e757d0ca412c96a2d472fbf +size 1064 diff --git a/dpo_qwen_14B/best_adapter/tokenizer.json b/dpo_qwen_14B/best_adapter/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/dpo_qwen_14B/best_adapter/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/dpo_qwen_14B/best_adapter/tokenizer_config.json b/dpo_qwen_14B/best_adapter/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..217274ef8275420e4bf3b976f3948901cd3d176f --- /dev/null +++ b/dpo_qwen_14B/best_adapter/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": true, + "model_max_length": 32768, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/dpo_qwen_14B/best_adapter/trainer_state.json b/dpo_qwen_14B/best_adapter/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cbd752bd93688bcd5ef1580462b7ce1df7794a5a --- /dev/null +++ b/dpo_qwen_14B/best_adapter/trainer_state.json @@ -0,0 +1,857 @@ +{ + "best_global_step": 100, + "best_metric": 0.04428481683135033, + "best_model_checkpoint": "runs/dpo_run_14b_v1/checkpoint-100", + "epoch": 0.11678832116788321, + "eval_steps": 25, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0023357664233576644, + "grad_norm": 1.242694616317749, + "learning_rate": 1.9379844961240311e-07, + "logits/chosen": 5.179401397705078, + "logits/rejected": 5.192930698394775, + "logps/chosen": -368.911865234375, + "logps/rejected": -398.83880615234375, + "loss": 0.6931473016738892, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 2 + }, + { + "epoch": 0.004671532846715329, + "grad_norm": 1.392399787902832, + "learning_rate": 5.813953488372093e-07, + "logits/chosen": 5.403897762298584, + "logits/rejected": 5.4565606117248535, + "logps/chosen": -338.43792724609375, + "logps/rejected": -367.03057861328125, + "loss": 0.6949559450149536, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.004504585638642311, + "rewards/margins": -0.003222561441361904, + "rewards/rejected": 0.007727146148681641, + "step": 4 + }, + { + "epoch": 0.0070072992700729924, + "grad_norm": 1.066603183746338, + "learning_rate": 9.689922480620155e-07, + "logits/chosen": 5.291868209838867, + "logits/rejected": 5.328356742858887, + "logps/chosen": -362.3431701660156, + "logps/rejected": -387.5829772949219, + "loss": 0.689236581325531, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.0034066196531057358, + "rewards/margins": 0.008255671709775925, + "rewards/rejected": -0.01166229322552681, + "step": 6 + }, + { + "epoch": 0.009343065693430658, + "grad_norm": 1.0005714893341064, + "learning_rate": 1.3565891472868218e-06, + "logits/chosen": 5.323437690734863, + "logits/rejected": 5.410858631134033, + "logps/chosen": -379.9283447265625, + "logps/rejected": -389.0852355957031, + "loss": 0.6943775415420532, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.014657974243164062, + "rewards/margins": -0.0012350091710686684, + "rewards/rejected": 0.015892982482910156, + "step": 8 + }, + { + "epoch": 0.01167883211678832, + "grad_norm": 1.2461222410202026, + "learning_rate": 1.744186046511628e-06, + "logits/chosen": 5.435908317565918, + "logits/rejected": 5.494542121887207, + "logps/chosen": -363.2003479003906, + "logps/rejected": -389.67376708984375, + "loss": 0.693260908126831, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.028497030958533287, + "rewards/margins": 0.00012636138126254082, + "rewards/rejected": -0.028623390942811966, + "step": 10 + }, + { + "epoch": 0.014014598540145985, + "grad_norm": 1.4030137062072754, + "learning_rate": 2.131782945736434e-06, + "logits/chosen": 5.3550801277160645, + "logits/rejected": 5.375768661499023, + "logps/chosen": -370.96429443359375, + "logps/rejected": -402.4786071777344, + "loss": 0.6882913112640381, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01622028276324272, + "rewards/margins": 0.010086631402373314, + "rewards/rejected": 0.006133650429546833, + "step": 12 + }, + { + "epoch": 0.01635036496350365, + "grad_norm": 1.1157702207565308, + "learning_rate": 2.5193798449612402e-06, + "logits/chosen": 5.515308380126953, + "logits/rejected": 5.561104774475098, + "logps/chosen": -336.7254333496094, + "logps/rejected": -357.52203369140625, + "loss": 0.6896716356277466, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017319394275546074, + "rewards/margins": 0.007328510750085115, + "rewards/rejected": -0.024647902697324753, + "step": 14 + }, + { + "epoch": 0.018686131386861315, + "grad_norm": 0.9470655918121338, + "learning_rate": 2.9069767441860468e-06, + "logits/chosen": 5.553088665008545, + "logits/rejected": 5.582851886749268, + "logps/chosen": -415.6842041015625, + "logps/rejected": -441.1054992675781, + "loss": 0.6904245018959045, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.03270244598388672, + "rewards/margins": 0.005826758686453104, + "rewards/rejected": 0.026875685900449753, + "step": 16 + }, + { + "epoch": 0.021021897810218976, + "grad_norm": 1.4397331476211548, + "learning_rate": 3.2945736434108533e-06, + "logits/chosen": 5.440742015838623, + "logits/rejected": 5.489529132843018, + "logps/chosen": -392.46221923828125, + "logps/rejected": -420.1712341308594, + "loss": 0.683630108833313, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.011020278558135033, + "rewards/margins": 0.01951923407614231, + "rewards/rejected": -0.008498954586684704, + "step": 18 + }, + { + "epoch": 0.02335766423357664, + "grad_norm": 1.5941083431243896, + "learning_rate": 3.6821705426356594e-06, + "logits/chosen": 5.318347930908203, + "logits/rejected": 5.397945404052734, + "logps/chosen": -345.2221374511719, + "logps/rejected": -365.9537048339844, + "loss": 0.6902388334274292, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.006536484230309725, + "rewards/margins": 0.006013393402099609, + "rewards/rejected": 0.0005230908282101154, + "step": 20 + }, + { + "epoch": 0.025693430656934305, + "grad_norm": 1.1363905668258667, + "learning_rate": 4.0697674418604655e-06, + "logits/chosen": 5.632981300354004, + "logits/rejected": 5.7265520095825195, + "logps/chosen": -347.9439697265625, + "logps/rejected": -370.65777587890625, + "loss": 0.691262423992157, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.011908342130482197, + "rewards/margins": 0.004538153763860464, + "rewards/rejected": 0.007370188366621733, + "step": 22 + }, + { + "epoch": 0.02802919708029197, + "grad_norm": 1.0684627294540405, + "learning_rate": 4.457364341085272e-06, + "logits/chosen": 5.35699987411499, + "logits/rejected": 5.405580520629883, + "logps/chosen": -347.1539001464844, + "logps/rejected": -377.6044921875, + "loss": 0.6769475936889648, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.01244144607335329, + "rewards/margins": 0.03289356082677841, + "rewards/rejected": -0.020452119410037994, + "step": 24 + }, + { + "epoch": 0.029197080291970802, + "eval_logits/chosen": 5.295141220092773, + "eval_logits/rejected": 5.345211029052734, + "eval_logps/chosen": -370.1607666015625, + "eval_logps/rejected": -395.7251892089844, + "eval_loss": 0.6836819648742676, + "eval_rewards/accuracies": 0.665354311466217, + "eval_rewards/chosen": 0.024636391550302505, + "eval_rewards/margins": 0.019555427134037018, + "eval_rewards/rejected": 0.005080964416265488, + "eval_runtime": 454.4375, + "eval_samples_per_second": 1.677, + "eval_steps_per_second": 1.677, + "step": 25 + }, + { + "epoch": 0.030364963503649634, + "grad_norm": 1.592353105545044, + "learning_rate": 4.844961240310078e-06, + "logits/chosen": 5.157042026519775, + "logits/rejected": 5.244912147521973, + "logps/chosen": -387.54876708984375, + "logps/rejected": -412.0630187988281, + "loss": 0.6849788427352905, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.026385309174656868, + "rewards/margins": 0.016966437920928, + "rewards/rejected": 0.009418869391083717, + "step": 26 + }, + { + "epoch": 0.0327007299270073, + "grad_norm": 1.3181558847427368, + "learning_rate": 5.232558139534884e-06, + "logits/chosen": 5.545513153076172, + "logits/rejected": 5.54400110244751, + "logps/chosen": -360.41650390625, + "logps/rejected": -391.2162170410156, + "loss": 0.675189733505249, + "rewards/accuracies": 0.8125, + "rewards/chosen": 0.045946408063173294, + "rewards/margins": 0.03675585240125656, + "rewards/rejected": 0.009190557524561882, + "step": 28 + }, + { + "epoch": 0.035036496350364967, + "grad_norm": 1.443650722503662, + "learning_rate": 5.620155038759691e-06, + "logits/chosen": 5.136168003082275, + "logits/rejected": 5.239327907562256, + "logps/chosen": -378.6293640136719, + "logps/rejected": -405.3665466308594, + "loss": 0.6752142310142517, + "rewards/accuracies": 0.8125, + "rewards/chosen": 0.04194517061114311, + "rewards/margins": 0.03668833151459694, + "rewards/rejected": 0.005256845150142908, + "step": 30 + }, + { + "epoch": 0.03737226277372263, + "grad_norm": 1.379568338394165, + "learning_rate": 6.007751937984497e-06, + "logits/chosen": 5.411487579345703, + "logits/rejected": 5.427243232727051, + "logps/chosen": -358.5367736816406, + "logps/rejected": -382.4181213378906, + "loss": 0.6700581312179565, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.06658173352479935, + "rewards/margins": 0.047193337231874466, + "rewards/rejected": 0.019388392567634583, + "step": 32 + }, + { + "epoch": 0.039708029197080295, + "grad_norm": 1.3260451555252075, + "learning_rate": 6.395348837209303e-06, + "logits/chosen": 5.207217216491699, + "logits/rejected": 5.254848480224609, + "logps/chosen": -326.9423828125, + "logps/rejected": -346.52081298828125, + "loss": 0.6610866785049438, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.07038869708776474, + "rewards/margins": 0.06587495654821396, + "rewards/rejected": 0.0045137410052120686, + "step": 34 + }, + { + "epoch": 0.04204379562043795, + "grad_norm": 1.5776340961456299, + "learning_rate": 6.782945736434108e-06, + "logits/chosen": 5.550538063049316, + "logits/rejected": 5.6374335289001465, + "logps/chosen": -359.9613952636719, + "logps/rejected": -384.31683349609375, + "loss": 0.6281551718711853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11738375574350357, + "rewards/margins": 0.1363767683506012, + "rewards/rejected": -0.018992995843291283, + "step": 36 + }, + { + "epoch": 0.04437956204379562, + "grad_norm": 1.8589071035385132, + "learning_rate": 7.170542635658915e-06, + "logits/chosen": 5.39143180847168, + "logits/rejected": 5.412029266357422, + "logps/chosen": -325.8544616699219, + "logps/rejected": -351.9772644042969, + "loss": 0.6270830631256104, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.1617884635925293, + "rewards/margins": 0.1388537436723709, + "rewards/rejected": 0.022934721782803535, + "step": 38 + }, + { + "epoch": 0.04671532846715328, + "grad_norm": 1.3231571912765503, + "learning_rate": 7.558139534883721e-06, + "logits/chosen": 5.189720153808594, + "logits/rejected": 5.203127384185791, + "logps/chosen": -343.3839111328125, + "logps/rejected": -374.7848205566406, + "loss": 0.641180157661438, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.15248623490333557, + "rewards/margins": 0.11158552765846252, + "rewards/rejected": 0.04090070724487305, + "step": 40 + }, + { + "epoch": 0.049051094890510946, + "grad_norm": 2.5331315994262695, + "learning_rate": 7.945736434108528e-06, + "logits/chosen": 5.420182228088379, + "logits/rejected": 5.45302677154541, + "logps/chosen": -341.813720703125, + "logps/rejected": -372.44952392578125, + "loss": 0.6093671321868896, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.2898235321044922, + "rewards/margins": 0.18158456683158875, + "rewards/rejected": 0.10823898762464523, + "step": 42 + }, + { + "epoch": 0.05138686131386861, + "grad_norm": 1.5247384309768677, + "learning_rate": 8.333333333333334e-06, + "logits/chosen": 5.383636951446533, + "logits/rejected": 5.397551536560059, + "logps/chosen": -354.49627685546875, + "logps/rejected": -376.88818359375, + "loss": 0.5815833210945129, + "rewards/accuracies": 0.8125, + "rewards/chosen": 0.32459571957588196, + "rewards/margins": 0.2510552406311035, + "rewards/rejected": 0.07354050129652023, + "step": 44 + }, + { + "epoch": 0.053722627737226275, + "grad_norm": 2.0814144611358643, + "learning_rate": 8.72093023255814e-06, + "logits/chosen": 5.269731044769287, + "logits/rejected": 5.287116050720215, + "logps/chosen": -331.1025390625, + "logps/rejected": -362.90118408203125, + "loss": 0.5269681215286255, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.6465227603912354, + "rewards/margins": 0.37582656741142273, + "rewards/rejected": 0.27069616317749023, + "step": 46 + }, + { + "epoch": 0.05605839416058394, + "grad_norm": 1.769063115119934, + "learning_rate": 9.108527131782946e-06, + "logits/chosen": 5.472540855407715, + "logits/rejected": 5.465417861938477, + "logps/chosen": -369.40283203125, + "logps/rejected": -400.18438720703125, + "loss": 0.5066201686859131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6377636194229126, + "rewards/margins": 0.42650213837623596, + "rewards/rejected": 0.21126146614551544, + "step": 48 + }, + { + "epoch": 0.058394160583941604, + "grad_norm": 2.84169602394104, + "learning_rate": 9.496124031007753e-06, + "logits/chosen": 5.050387382507324, + "logits/rejected": 5.112288951873779, + "logps/chosen": -363.4556579589844, + "logps/rejected": -397.8169860839844, + "loss": 0.529259979724884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7923164367675781, + "rewards/margins": 0.3787059783935547, + "rewards/rejected": 0.4136104881763458, + "step": 50 + }, + { + "epoch": 0.058394160583941604, + "eval_logits/chosen": 5.22359037399292, + "eval_logits/rejected": 5.286833763122559, + "eval_logps/chosen": -361.462890625, + "eval_logps/rejected": -392.5708312988281, + "eval_loss": 0.4610801041126251, + "eval_rewards/accuracies": 0.9619422554969788, + "eval_rewards/chosen": 0.8944254517555237, + "eval_rewards/margins": 0.5739086270332336, + "eval_rewards/rejected": 0.3205168545246124, + "eval_runtime": 454.5598, + "eval_samples_per_second": 1.676, + "eval_steps_per_second": 1.676, + "step": 50 + }, + { + "epoch": 0.06072992700729927, + "grad_norm": 1.6907895803451538, + "learning_rate": 9.883720930232558e-06, + "logits/chosen": 5.486469268798828, + "logits/rejected": 5.541717529296875, + "logps/chosen": -343.4534606933594, + "logps/rejected": -379.39508056640625, + "loss": 0.44602835178375244, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.9869746565818787, + "rewards/margins": 0.6056646108627319, + "rewards/rejected": 0.3813100755214691, + "step": 52 + }, + { + "epoch": 0.06306569343065693, + "grad_norm": 1.9458682537078857, + "learning_rate": 1.0271317829457365e-05, + "logits/chosen": 5.169528961181641, + "logits/rejected": 5.2688751220703125, + "logps/chosen": -379.5437316894531, + "logps/rejected": -401.5587463378906, + "loss": 0.43609702587127686, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7794930934906006, + "rewards/margins": 0.6265671253204346, + "rewards/rejected": 0.15292587876319885, + "step": 54 + }, + { + "epoch": 0.0654014598540146, + "grad_norm": 2.1266520023345947, + "learning_rate": 1.065891472868217e-05, + "logits/chosen": 5.097426414489746, + "logits/rejected": 5.15327262878418, + "logps/chosen": -378.0788269042969, + "logps/rejected": -413.27392578125, + "loss": 0.3928414583206177, + "rewards/accuracies": 0.9375, + "rewards/chosen": 1.274291753768921, + "rewards/margins": 0.7864217758178711, + "rewards/rejected": 0.4878700375556946, + "step": 56 + }, + { + "epoch": 0.06773722627737226, + "grad_norm": 1.5381489992141724, + "learning_rate": 1.1046511627906977e-05, + "logits/chosen": 5.138954162597656, + "logits/rejected": 5.20254373550415, + "logps/chosen": -372.93438720703125, + "logps/rejected": -401.8287658691406, + "loss": 0.35855019092559814, + "rewards/accuracies": 0.875, + "rewards/chosen": 1.2897911071777344, + "rewards/margins": 0.9354276061058044, + "rewards/rejected": 0.35436347126960754, + "step": 58 + }, + { + "epoch": 0.07007299270072993, + "grad_norm": 2.358330726623535, + "learning_rate": 1.1434108527131783e-05, + "logits/chosen": 5.071888446807861, + "logits/rejected": 5.187964916229248, + "logps/chosen": -360.984619140625, + "logps/rejected": -392.3192138671875, + "loss": 0.42801612615585327, + "rewards/accuracies": 0.875, + "rewards/chosen": 1.3823509216308594, + "rewards/margins": 0.729066014289856, + "rewards/rejected": 0.6532848477363586, + "step": 60 + }, + { + "epoch": 0.07240875912408759, + "grad_norm": 2.177586317062378, + "learning_rate": 1.182170542635659e-05, + "logits/chosen": 5.264093399047852, + "logits/rejected": 5.310842990875244, + "logps/chosen": -364.808349609375, + "logps/rejected": -401.0321044921875, + "loss": 0.31365492939949036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6637591123580933, + "rewards/margins": 1.0887457132339478, + "rewards/rejected": 0.5750135183334351, + "step": 62 + }, + { + "epoch": 0.07474452554744526, + "grad_norm": 1.697789192199707, + "learning_rate": 1.2209302325581395e-05, + "logits/chosen": 5.191982269287109, + "logits/rejected": 5.261416912078857, + "logps/chosen": -359.8249816894531, + "logps/rejected": -397.2122497558594, + "loss": 0.3037749230861664, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6470392942428589, + "rewards/margins": 1.114844799041748, + "rewards/rejected": 0.5321945548057556, + "step": 64 + }, + { + "epoch": 0.07708029197080292, + "grad_norm": 1.3219914436340332, + "learning_rate": 1.2596899224806202e-05, + "logits/chosen": 5.293405532836914, + "logits/rejected": 5.3094048500061035, + "logps/chosen": -352.3752136230469, + "logps/rejected": -392.6779479980469, + "loss": 0.25026455521583557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5671364068984985, + "rewards/margins": 1.4098074436187744, + "rewards/rejected": 0.15732917189598083, + "step": 66 + }, + { + "epoch": 0.07941605839416059, + "grad_norm": 1.8173967599868774, + "learning_rate": 1.2984496124031009e-05, + "logits/chosen": 5.025746822357178, + "logits/rejected": 5.114965438842773, + "logps/chosen": -319.99700927734375, + "logps/rejected": -364.115234375, + "loss": 0.3108353912830353, + "rewards/accuracies": 0.9375, + "rewards/chosen": 1.4788665771484375, + "rewards/margins": 1.2637410163879395, + "rewards/rejected": 0.2151254564523697, + "step": 68 + }, + { + "epoch": 0.08175182481751825, + "grad_norm": 1.0658400058746338, + "learning_rate": 1.3372093023255814e-05, + "logits/chosen": 4.945235729217529, + "logits/rejected": 4.959147930145264, + "logps/chosen": -383.84033203125, + "logps/rejected": -431.7752685546875, + "loss": 0.22991834580898285, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3950352668762207, + "rewards/margins": 1.4965243339538574, + "rewards/rejected": -0.1014888733625412, + "step": 70 + }, + { + "epoch": 0.0840875912408759, + "grad_norm": 1.0350896120071411, + "learning_rate": 1.375968992248062e-05, + "logits/chosen": 5.00426721572876, + "logits/rejected": 5.120238780975342, + "logps/chosen": -350.9471435546875, + "logps/rejected": -382.6837158203125, + "loss": 0.22603684663772583, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2978975772857666, + "rewards/margins": 1.644275426864624, + "rewards/rejected": -0.34637776017189026, + "step": 72 + }, + { + "epoch": 0.08642335766423358, + "grad_norm": 1.1595423221588135, + "learning_rate": 1.4147286821705426e-05, + "logits/chosen": 4.890130043029785, + "logits/rejected": 4.9504714012146, + "logps/chosen": -352.34967041015625, + "logps/rejected": -399.23028564453125, + "loss": 0.18921935558319092, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1984589099884033, + "rewards/margins": 1.7495291233062744, + "rewards/rejected": -0.5510700941085815, + "step": 74 + }, + { + "epoch": 0.08759124087591241, + "eval_logits/chosen": 4.930174827575684, + "eval_logits/rejected": 5.032296657562256, + "eval_logps/chosen": -359.19647216796875, + "eval_logps/rejected": -405.1120300292969, + "eval_loss": 0.16020436584949493, + "eval_rewards/accuracies": 0.9960629940032959, + "eval_rewards/chosen": 1.1210675239562988, + "eval_rewards/margins": 2.0546727180480957, + "eval_rewards/rejected": -0.9336051344871521, + "eval_runtime": 454.3435, + "eval_samples_per_second": 1.677, + "eval_steps_per_second": 1.677, + "step": 75 + }, + { + "epoch": 0.08875912408759123, + "grad_norm": 1.1433167457580566, + "learning_rate": 1.4534883720930233e-05, + "logits/chosen": 5.037275314331055, + "logits/rejected": 5.1315507888793945, + "logps/chosen": -313.110595703125, + "logps/rejected": -356.1000061035156, + "loss": 0.15998858213424683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2128857374191284, + "rewards/margins": 2.0945115089416504, + "rewards/rejected": -0.8816256523132324, + "step": 76 + }, + { + "epoch": 0.0910948905109489, + "grad_norm": 0.9839214086532593, + "learning_rate": 1.4922480620155039e-05, + "logits/chosen": 4.817085266113281, + "logits/rejected": 4.874035835266113, + "logps/chosen": -366.2629089355469, + "logps/rejected": -405.7989196777344, + "loss": 0.1894684135913849, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0605502128601074, + "rewards/margins": 1.90762460231781, + "rewards/rejected": -0.8470743894577026, + "step": 78 + }, + { + "epoch": 0.09343065693430656, + "grad_norm": 0.9212782979011536, + "learning_rate": 1.5310077519379846e-05, + "logits/chosen": 5.046716690063477, + "logits/rejected": 5.157979965209961, + "logps/chosen": -348.0658264160156, + "logps/rejected": -395.23870849609375, + "loss": 0.15948188304901123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.676516056060791, + "rewards/margins": 2.167430877685547, + "rewards/rejected": -1.4909145832061768, + "step": 80 + }, + { + "epoch": 0.09576642335766423, + "grad_norm": 0.9820688366889954, + "learning_rate": 1.569767441860465e-05, + "logits/chosen": 4.690741539001465, + "logits/rejected": 4.771791458129883, + "logps/chosen": -378.8666076660156, + "logps/rejected": -436.9100036621094, + "loss": 0.12085139006376266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8719685077667236, + "rewards/margins": 2.646538257598877, + "rewards/rejected": -1.7745698690414429, + "step": 82 + }, + { + "epoch": 0.09810218978102189, + "grad_norm": 0.66785728931427, + "learning_rate": 1.608527131782946e-05, + "logits/chosen": 4.880465984344482, + "logits/rejected": 4.961792945861816, + "logps/chosen": -346.51214599609375, + "logps/rejected": -400.1110534667969, + "loss": 0.08720710873603821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1337480545043945, + "rewards/margins": 2.903944253921509, + "rewards/rejected": -1.7701961994171143, + "step": 84 + }, + { + "epoch": 0.10043795620437956, + "grad_norm": 0.5760660767555237, + "learning_rate": 1.647286821705426e-05, + "logits/chosen": 4.464397430419922, + "logits/rejected": 4.680055618286133, + "logps/chosen": -341.7489318847656, + "logps/rejected": -398.322021484375, + "loss": 0.07942983508110046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2459325790405273, + "rewards/margins": 3.0152552127838135, + "rewards/rejected": -1.7693227529525757, + "step": 86 + }, + { + "epoch": 0.10277372262773722, + "grad_norm": 1.6020294427871704, + "learning_rate": 1.686046511627907e-05, + "logits/chosen": 4.563863277435303, + "logits/rejected": 4.680974960327148, + "logps/chosen": -344.9147644042969, + "logps/rejected": -395.4453125, + "loss": 0.1258174479007721, + "rewards/accuracies": 0.9375, + "rewards/chosen": 1.0706769227981567, + "rewards/margins": 3.118717670440674, + "rewards/rejected": -2.0480403900146484, + "step": 88 + }, + { + "epoch": 0.10510948905109489, + "grad_norm": 0.46413859724998474, + "learning_rate": 1.7248062015503875e-05, + "logits/chosen": 4.4989237785339355, + "logits/rejected": 4.673248291015625, + "logps/chosen": -326.9678649902344, + "logps/rejected": -388.4164123535156, + "loss": 0.06663060188293457, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4128761291503906, + "rewards/margins": 3.760685920715332, + "rewards/rejected": -2.3478102684020996, + "step": 90 + }, + { + "epoch": 0.10744525547445255, + "grad_norm": 0.6699568629264832, + "learning_rate": 1.7635658914728684e-05, + "logits/chosen": 4.7294535636901855, + "logits/rejected": 4.813880920410156, + "logps/chosen": -362.7267150878906, + "logps/rejected": -439.2985534667969, + "loss": 0.04481709748506546, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.477597713470459, + "rewards/margins": 4.37883186340332, + "rewards/rejected": -2.9012341499328613, + "step": 92 + }, + { + "epoch": 0.10978102189781022, + "grad_norm": 0.4152977168560028, + "learning_rate": 1.802325581395349e-05, + "logits/chosen": 4.785149574279785, + "logits/rejected": 4.891542434692383, + "logps/chosen": -381.59246826171875, + "logps/rejected": -444.2817687988281, + "loss": 0.05632612109184265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.71366286277771, + "rewards/margins": 3.4584720134735107, + "rewards/rejected": -2.744809150695801, + "step": 94 + }, + { + "epoch": 0.11211678832116788, + "grad_norm": 0.3152717649936676, + "learning_rate": 1.8410852713178295e-05, + "logits/chosen": 4.603940486907959, + "logits/rejected": 4.804995536804199, + "logps/chosen": -356.7286376953125, + "logps/rejected": -414.69635009765625, + "loss": 0.040920041501522064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7566397190093994, + "rewards/margins": 4.020595550537109, + "rewards/rejected": -2.263956069946289, + "step": 96 + }, + { + "epoch": 0.11445255474452555, + "grad_norm": 0.37698569893836975, + "learning_rate": 1.8798449612403103e-05, + "logits/chosen": 4.558542728424072, + "logits/rejected": 4.690641403198242, + "logps/chosen": -339.794189453125, + "logps/rejected": -413.8865966796875, + "loss": 0.025794224813580513, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3867536783218384, + "rewards/margins": 4.6542744636535645, + "rewards/rejected": -3.2675204277038574, + "step": 98 + }, + { + "epoch": 0.11678832116788321, + "grad_norm": 0.15023073554039001, + "learning_rate": 1.918604651162791e-05, + "logits/chosen": 4.387497425079346, + "logits/rejected": 4.494588375091553, + "logps/chosen": -346.2568054199219, + "logps/rejected": -418.9315185546875, + "loss": 0.015155203640460968, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7938623428344727, + "rewards/margins": 4.942529201507568, + "rewards/rejected": -3.1486666202545166, + "step": 100 + }, + { + "epoch": 0.11678832116788321, + "eval_logits/chosen": 4.285891056060791, + "eval_logits/rejected": 4.425926208496094, + "eval_logps/chosen": -353.15850830078125, + "eval_logps/rejected": -424.4124755859375, + "eval_loss": 0.04428481683135033, + "eval_rewards/accuracies": 0.9921259880065918, + "eval_rewards/chosen": 1.7248634099960327, + "eval_rewards/margins": 4.588510513305664, + "eval_rewards/rejected": -2.863647222518921, + "eval_runtime": 454.7251, + "eval_samples_per_second": 1.676, + "eval_steps_per_second": 1.676, + "step": 100 + } + ], + "logging_steps": 2, + "max_steps": 2571, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 5, + "early_stopping_threshold": 0.001 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/dpo_qwen_14B/best_adapter/training_args.bin b/dpo_qwen_14B/best_adapter/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd3891cc402abfdcd660f4a6f0aa91d707951765 --- /dev/null +++ b/dpo_qwen_14B/best_adapter/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21875ef630d3e8f528dce67596a0d783fd5cf223e6e245a98026996d1f3d3ade +size 5752 diff --git a/dpo_qwen_14B/checkpoint-100/README.md b/dpo_qwen_14B/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..aee08061c0257bf9157a4ed03b986a8bb6e55091 --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/README.md @@ -0,0 +1,209 @@ +--- +base_model: ../../Models/Qwen2.5-Coder-14B-CPT-SFT +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:../../Models/Qwen2.5-Coder-14B-CPT-SFT +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/dpo_qwen_14B/checkpoint-100/adapter_config.json b/dpo_qwen_14B/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2d5bce66a2e3008140afc9fe5840d5c249ea31bc --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "../../Models/Qwen2.5-Coder-14B-CPT-SFT", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "v_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/dpo_qwen_14B/checkpoint-100/adapter_model.safetensors b/dpo_qwen_14B/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f98af50a550b683ac4c340b2e698a8d719d3d1f5 --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6315595d0613ab1a98a34db46bcd956ffbcca002ea96096ef585ffbd10b082c9 +size 100715016 diff --git a/dpo_qwen_14B/checkpoint-100/chat_template.jinja b/dpo_qwen_14B/checkpoint-100/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..28028c056af412405debd878cdda0171e35fa5d1 --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/dpo_qwen_14B/checkpoint-100/optimizer.pt b/dpo_qwen_14B/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f10f51d6c800014f98f15e41726ecb3d284c116 --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802de0809f197ada0a2f762d41b8a0c8e007ece14785be2ac75521db604c729b +size 201650194 diff --git a/dpo_qwen_14B/checkpoint-100/rng_state.pth b/dpo_qwen_14B/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f477ae319f7890dbe85f94681f64bf10e690d69 --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecefbb3f17bb76b6655eb0157c98b5287c17fa4b4c72a6b9068b0823ce9fd18d +size 14244 diff --git a/dpo_qwen_14B/checkpoint-100/scheduler.pt b/dpo_qwen_14B/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c03a8545273144a1798e14d789e40ee65be98e6 --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2d3d5485f7a1cfe5d5e69f9e55a45f72f0a8b17e757d0ca412c96a2d472fbf +size 1064 diff --git a/dpo_qwen_14B/checkpoint-100/tokenizer.json b/dpo_qwen_14B/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/dpo_qwen_14B/checkpoint-100/tokenizer_config.json b/dpo_qwen_14B/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..217274ef8275420e4bf3b976f3948901cd3d176f --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/tokenizer_config.json @@ -0,0 +1,29 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": true, + "model_max_length": 32768, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/dpo_qwen_14B/checkpoint-100/trainer_state.json b/dpo_qwen_14B/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cbd752bd93688bcd5ef1580462b7ce1df7794a5a --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/trainer_state.json @@ -0,0 +1,857 @@ +{ + "best_global_step": 100, + "best_metric": 0.04428481683135033, + "best_model_checkpoint": "runs/dpo_run_14b_v1/checkpoint-100", + "epoch": 0.11678832116788321, + "eval_steps": 25, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0023357664233576644, + "grad_norm": 1.242694616317749, + "learning_rate": 1.9379844961240311e-07, + "logits/chosen": 5.179401397705078, + "logits/rejected": 5.192930698394775, + "logps/chosen": -368.911865234375, + "logps/rejected": -398.83880615234375, + "loss": 0.6931473016738892, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 2 + }, + { + "epoch": 0.004671532846715329, + "grad_norm": 1.392399787902832, + "learning_rate": 5.813953488372093e-07, + "logits/chosen": 5.403897762298584, + "logits/rejected": 5.4565606117248535, + "logps/chosen": -338.43792724609375, + "logps/rejected": -367.03057861328125, + "loss": 0.6949559450149536, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.004504585638642311, + "rewards/margins": -0.003222561441361904, + "rewards/rejected": 0.007727146148681641, + "step": 4 + }, + { + "epoch": 0.0070072992700729924, + "grad_norm": 1.066603183746338, + "learning_rate": 9.689922480620155e-07, + "logits/chosen": 5.291868209838867, + "logits/rejected": 5.328356742858887, + "logps/chosen": -362.3431701660156, + "logps/rejected": -387.5829772949219, + "loss": 0.689236581325531, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.0034066196531057358, + "rewards/margins": 0.008255671709775925, + "rewards/rejected": -0.01166229322552681, + "step": 6 + }, + { + "epoch": 0.009343065693430658, + "grad_norm": 1.0005714893341064, + "learning_rate": 1.3565891472868218e-06, + "logits/chosen": 5.323437690734863, + "logits/rejected": 5.410858631134033, + "logps/chosen": -379.9283447265625, + "logps/rejected": -389.0852355957031, + "loss": 0.6943775415420532, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.014657974243164062, + "rewards/margins": -0.0012350091710686684, + "rewards/rejected": 0.015892982482910156, + "step": 8 + }, + { + "epoch": 0.01167883211678832, + "grad_norm": 1.2461222410202026, + "learning_rate": 1.744186046511628e-06, + "logits/chosen": 5.435908317565918, + "logits/rejected": 5.494542121887207, + "logps/chosen": -363.2003479003906, + "logps/rejected": -389.67376708984375, + "loss": 0.693260908126831, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.028497030958533287, + "rewards/margins": 0.00012636138126254082, + "rewards/rejected": -0.028623390942811966, + "step": 10 + }, + { + "epoch": 0.014014598540145985, + "grad_norm": 1.4030137062072754, + "learning_rate": 2.131782945736434e-06, + "logits/chosen": 5.3550801277160645, + "logits/rejected": 5.375768661499023, + "logps/chosen": -370.96429443359375, + "logps/rejected": -402.4786071777344, + "loss": 0.6882913112640381, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01622028276324272, + "rewards/margins": 0.010086631402373314, + "rewards/rejected": 0.006133650429546833, + "step": 12 + }, + { + "epoch": 0.01635036496350365, + "grad_norm": 1.1157702207565308, + "learning_rate": 2.5193798449612402e-06, + "logits/chosen": 5.515308380126953, + "logits/rejected": 5.561104774475098, + "logps/chosen": -336.7254333496094, + "logps/rejected": -357.52203369140625, + "loss": 0.6896716356277466, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.017319394275546074, + "rewards/margins": 0.007328510750085115, + "rewards/rejected": -0.024647902697324753, + "step": 14 + }, + { + "epoch": 0.018686131386861315, + "grad_norm": 0.9470655918121338, + "learning_rate": 2.9069767441860468e-06, + "logits/chosen": 5.553088665008545, + "logits/rejected": 5.582851886749268, + "logps/chosen": -415.6842041015625, + "logps/rejected": -441.1054992675781, + "loss": 0.6904245018959045, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.03270244598388672, + "rewards/margins": 0.005826758686453104, + "rewards/rejected": 0.026875685900449753, + "step": 16 + }, + { + "epoch": 0.021021897810218976, + "grad_norm": 1.4397331476211548, + "learning_rate": 3.2945736434108533e-06, + "logits/chosen": 5.440742015838623, + "logits/rejected": 5.489529132843018, + "logps/chosen": -392.46221923828125, + "logps/rejected": -420.1712341308594, + "loss": 0.683630108833313, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.011020278558135033, + "rewards/margins": 0.01951923407614231, + "rewards/rejected": -0.008498954586684704, + "step": 18 + }, + { + "epoch": 0.02335766423357664, + "grad_norm": 1.5941083431243896, + "learning_rate": 3.6821705426356594e-06, + "logits/chosen": 5.318347930908203, + "logits/rejected": 5.397945404052734, + "logps/chosen": -345.2221374511719, + "logps/rejected": -365.9537048339844, + "loss": 0.6902388334274292, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.006536484230309725, + "rewards/margins": 0.006013393402099609, + "rewards/rejected": 0.0005230908282101154, + "step": 20 + }, + { + "epoch": 0.025693430656934305, + "grad_norm": 1.1363905668258667, + "learning_rate": 4.0697674418604655e-06, + "logits/chosen": 5.632981300354004, + "logits/rejected": 5.7265520095825195, + "logps/chosen": -347.9439697265625, + "logps/rejected": -370.65777587890625, + "loss": 0.691262423992157, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.011908342130482197, + "rewards/margins": 0.004538153763860464, + "rewards/rejected": 0.007370188366621733, + "step": 22 + }, + { + "epoch": 0.02802919708029197, + "grad_norm": 1.0684627294540405, + "learning_rate": 4.457364341085272e-06, + "logits/chosen": 5.35699987411499, + "logits/rejected": 5.405580520629883, + "logps/chosen": -347.1539001464844, + "logps/rejected": -377.6044921875, + "loss": 0.6769475936889648, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.01244144607335329, + "rewards/margins": 0.03289356082677841, + "rewards/rejected": -0.020452119410037994, + "step": 24 + }, + { + "epoch": 0.029197080291970802, + "eval_logits/chosen": 5.295141220092773, + "eval_logits/rejected": 5.345211029052734, + "eval_logps/chosen": -370.1607666015625, + "eval_logps/rejected": -395.7251892089844, + "eval_loss": 0.6836819648742676, + "eval_rewards/accuracies": 0.665354311466217, + "eval_rewards/chosen": 0.024636391550302505, + "eval_rewards/margins": 0.019555427134037018, + "eval_rewards/rejected": 0.005080964416265488, + "eval_runtime": 454.4375, + "eval_samples_per_second": 1.677, + "eval_steps_per_second": 1.677, + "step": 25 + }, + { + "epoch": 0.030364963503649634, + "grad_norm": 1.592353105545044, + "learning_rate": 4.844961240310078e-06, + "logits/chosen": 5.157042026519775, + "logits/rejected": 5.244912147521973, + "logps/chosen": -387.54876708984375, + "logps/rejected": -412.0630187988281, + "loss": 0.6849788427352905, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.026385309174656868, + "rewards/margins": 0.016966437920928, + "rewards/rejected": 0.009418869391083717, + "step": 26 + }, + { + "epoch": 0.0327007299270073, + "grad_norm": 1.3181558847427368, + "learning_rate": 5.232558139534884e-06, + "logits/chosen": 5.545513153076172, + "logits/rejected": 5.54400110244751, + "logps/chosen": -360.41650390625, + "logps/rejected": -391.2162170410156, + "loss": 0.675189733505249, + "rewards/accuracies": 0.8125, + "rewards/chosen": 0.045946408063173294, + "rewards/margins": 0.03675585240125656, + "rewards/rejected": 0.009190557524561882, + "step": 28 + }, + { + "epoch": 0.035036496350364967, + "grad_norm": 1.443650722503662, + "learning_rate": 5.620155038759691e-06, + "logits/chosen": 5.136168003082275, + "logits/rejected": 5.239327907562256, + "logps/chosen": -378.6293640136719, + "logps/rejected": -405.3665466308594, + "loss": 0.6752142310142517, + "rewards/accuracies": 0.8125, + "rewards/chosen": 0.04194517061114311, + "rewards/margins": 0.03668833151459694, + "rewards/rejected": 0.005256845150142908, + "step": 30 + }, + { + "epoch": 0.03737226277372263, + "grad_norm": 1.379568338394165, + "learning_rate": 6.007751937984497e-06, + "logits/chosen": 5.411487579345703, + "logits/rejected": 5.427243232727051, + "logps/chosen": -358.5367736816406, + "logps/rejected": -382.4181213378906, + "loss": 0.6700581312179565, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.06658173352479935, + "rewards/margins": 0.047193337231874466, + "rewards/rejected": 0.019388392567634583, + "step": 32 + }, + { + "epoch": 0.039708029197080295, + "grad_norm": 1.3260451555252075, + "learning_rate": 6.395348837209303e-06, + "logits/chosen": 5.207217216491699, + "logits/rejected": 5.254848480224609, + "logps/chosen": -326.9423828125, + "logps/rejected": -346.52081298828125, + "loss": 0.6610866785049438, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.07038869708776474, + "rewards/margins": 0.06587495654821396, + "rewards/rejected": 0.0045137410052120686, + "step": 34 + }, + { + "epoch": 0.04204379562043795, + "grad_norm": 1.5776340961456299, + "learning_rate": 6.782945736434108e-06, + "logits/chosen": 5.550538063049316, + "logits/rejected": 5.6374335289001465, + "logps/chosen": -359.9613952636719, + "logps/rejected": -384.31683349609375, + "loss": 0.6281551718711853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11738375574350357, + "rewards/margins": 0.1363767683506012, + "rewards/rejected": -0.018992995843291283, + "step": 36 + }, + { + "epoch": 0.04437956204379562, + "grad_norm": 1.8589071035385132, + "learning_rate": 7.170542635658915e-06, + "logits/chosen": 5.39143180847168, + "logits/rejected": 5.412029266357422, + "logps/chosen": -325.8544616699219, + "logps/rejected": -351.9772644042969, + "loss": 0.6270830631256104, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.1617884635925293, + "rewards/margins": 0.1388537436723709, + "rewards/rejected": 0.022934721782803535, + "step": 38 + }, + { + "epoch": 0.04671532846715328, + "grad_norm": 1.3231571912765503, + "learning_rate": 7.558139534883721e-06, + "logits/chosen": 5.189720153808594, + "logits/rejected": 5.203127384185791, + "logps/chosen": -343.3839111328125, + "logps/rejected": -374.7848205566406, + "loss": 0.641180157661438, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.15248623490333557, + "rewards/margins": 0.11158552765846252, + "rewards/rejected": 0.04090070724487305, + "step": 40 + }, + { + "epoch": 0.049051094890510946, + "grad_norm": 2.5331315994262695, + "learning_rate": 7.945736434108528e-06, + "logits/chosen": 5.420182228088379, + "logits/rejected": 5.45302677154541, + "logps/chosen": -341.813720703125, + "logps/rejected": -372.44952392578125, + "loss": 0.6093671321868896, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.2898235321044922, + "rewards/margins": 0.18158456683158875, + "rewards/rejected": 0.10823898762464523, + "step": 42 + }, + { + "epoch": 0.05138686131386861, + "grad_norm": 1.5247384309768677, + "learning_rate": 8.333333333333334e-06, + "logits/chosen": 5.383636951446533, + "logits/rejected": 5.397551536560059, + "logps/chosen": -354.49627685546875, + "logps/rejected": -376.88818359375, + "loss": 0.5815833210945129, + "rewards/accuracies": 0.8125, + "rewards/chosen": 0.32459571957588196, + "rewards/margins": 0.2510552406311035, + "rewards/rejected": 0.07354050129652023, + "step": 44 + }, + { + "epoch": 0.053722627737226275, + "grad_norm": 2.0814144611358643, + "learning_rate": 8.72093023255814e-06, + "logits/chosen": 5.269731044769287, + "logits/rejected": 5.287116050720215, + "logps/chosen": -331.1025390625, + "logps/rejected": -362.90118408203125, + "loss": 0.5269681215286255, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.6465227603912354, + "rewards/margins": 0.37582656741142273, + "rewards/rejected": 0.27069616317749023, + "step": 46 + }, + { + "epoch": 0.05605839416058394, + "grad_norm": 1.769063115119934, + "learning_rate": 9.108527131782946e-06, + "logits/chosen": 5.472540855407715, + "logits/rejected": 5.465417861938477, + "logps/chosen": -369.40283203125, + "logps/rejected": -400.18438720703125, + "loss": 0.5066201686859131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6377636194229126, + "rewards/margins": 0.42650213837623596, + "rewards/rejected": 0.21126146614551544, + "step": 48 + }, + { + "epoch": 0.058394160583941604, + "grad_norm": 2.84169602394104, + "learning_rate": 9.496124031007753e-06, + "logits/chosen": 5.050387382507324, + "logits/rejected": 5.112288951873779, + "logps/chosen": -363.4556579589844, + "logps/rejected": -397.8169860839844, + "loss": 0.529259979724884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7923164367675781, + "rewards/margins": 0.3787059783935547, + "rewards/rejected": 0.4136104881763458, + "step": 50 + }, + { + "epoch": 0.058394160583941604, + "eval_logits/chosen": 5.22359037399292, + "eval_logits/rejected": 5.286833763122559, + "eval_logps/chosen": -361.462890625, + "eval_logps/rejected": -392.5708312988281, + "eval_loss": 0.4610801041126251, + "eval_rewards/accuracies": 0.9619422554969788, + "eval_rewards/chosen": 0.8944254517555237, + "eval_rewards/margins": 0.5739086270332336, + "eval_rewards/rejected": 0.3205168545246124, + "eval_runtime": 454.5598, + "eval_samples_per_second": 1.676, + "eval_steps_per_second": 1.676, + "step": 50 + }, + { + "epoch": 0.06072992700729927, + "grad_norm": 1.6907895803451538, + "learning_rate": 9.883720930232558e-06, + "logits/chosen": 5.486469268798828, + "logits/rejected": 5.541717529296875, + "logps/chosen": -343.4534606933594, + "logps/rejected": -379.39508056640625, + "loss": 0.44602835178375244, + "rewards/accuracies": 0.9375, + "rewards/chosen": 0.9869746565818787, + "rewards/margins": 0.6056646108627319, + "rewards/rejected": 0.3813100755214691, + "step": 52 + }, + { + "epoch": 0.06306569343065693, + "grad_norm": 1.9458682537078857, + "learning_rate": 1.0271317829457365e-05, + "logits/chosen": 5.169528961181641, + "logits/rejected": 5.2688751220703125, + "logps/chosen": -379.5437316894531, + "logps/rejected": -401.5587463378906, + "loss": 0.43609702587127686, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7794930934906006, + "rewards/margins": 0.6265671253204346, + "rewards/rejected": 0.15292587876319885, + "step": 54 + }, + { + "epoch": 0.0654014598540146, + "grad_norm": 2.1266520023345947, + "learning_rate": 1.065891472868217e-05, + "logits/chosen": 5.097426414489746, + "logits/rejected": 5.15327262878418, + "logps/chosen": -378.0788269042969, + "logps/rejected": -413.27392578125, + "loss": 0.3928414583206177, + "rewards/accuracies": 0.9375, + "rewards/chosen": 1.274291753768921, + "rewards/margins": 0.7864217758178711, + "rewards/rejected": 0.4878700375556946, + "step": 56 + }, + { + "epoch": 0.06773722627737226, + "grad_norm": 1.5381489992141724, + "learning_rate": 1.1046511627906977e-05, + "logits/chosen": 5.138954162597656, + "logits/rejected": 5.20254373550415, + "logps/chosen": -372.93438720703125, + "logps/rejected": -401.8287658691406, + "loss": 0.35855019092559814, + "rewards/accuracies": 0.875, + "rewards/chosen": 1.2897911071777344, + "rewards/margins": 0.9354276061058044, + "rewards/rejected": 0.35436347126960754, + "step": 58 + }, + { + "epoch": 0.07007299270072993, + "grad_norm": 2.358330726623535, + "learning_rate": 1.1434108527131783e-05, + "logits/chosen": 5.071888446807861, + "logits/rejected": 5.187964916229248, + "logps/chosen": -360.984619140625, + "logps/rejected": -392.3192138671875, + "loss": 0.42801612615585327, + "rewards/accuracies": 0.875, + "rewards/chosen": 1.3823509216308594, + "rewards/margins": 0.729066014289856, + "rewards/rejected": 0.6532848477363586, + "step": 60 + }, + { + "epoch": 0.07240875912408759, + "grad_norm": 2.177586317062378, + "learning_rate": 1.182170542635659e-05, + "logits/chosen": 5.264093399047852, + "logits/rejected": 5.310842990875244, + "logps/chosen": -364.808349609375, + "logps/rejected": -401.0321044921875, + "loss": 0.31365492939949036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6637591123580933, + "rewards/margins": 1.0887457132339478, + "rewards/rejected": 0.5750135183334351, + "step": 62 + }, + { + "epoch": 0.07474452554744526, + "grad_norm": 1.697789192199707, + "learning_rate": 1.2209302325581395e-05, + "logits/chosen": 5.191982269287109, + "logits/rejected": 5.261416912078857, + "logps/chosen": -359.8249816894531, + "logps/rejected": -397.2122497558594, + "loss": 0.3037749230861664, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6470392942428589, + "rewards/margins": 1.114844799041748, + "rewards/rejected": 0.5321945548057556, + "step": 64 + }, + { + "epoch": 0.07708029197080292, + "grad_norm": 1.3219914436340332, + "learning_rate": 1.2596899224806202e-05, + "logits/chosen": 5.293405532836914, + "logits/rejected": 5.3094048500061035, + "logps/chosen": -352.3752136230469, + "logps/rejected": -392.6779479980469, + "loss": 0.25026455521583557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5671364068984985, + "rewards/margins": 1.4098074436187744, + "rewards/rejected": 0.15732917189598083, + "step": 66 + }, + { + "epoch": 0.07941605839416059, + "grad_norm": 1.8173967599868774, + "learning_rate": 1.2984496124031009e-05, + "logits/chosen": 5.025746822357178, + "logits/rejected": 5.114965438842773, + "logps/chosen": -319.99700927734375, + "logps/rejected": -364.115234375, + "loss": 0.3108353912830353, + "rewards/accuracies": 0.9375, + "rewards/chosen": 1.4788665771484375, + "rewards/margins": 1.2637410163879395, + "rewards/rejected": 0.2151254564523697, + "step": 68 + }, + { + "epoch": 0.08175182481751825, + "grad_norm": 1.0658400058746338, + "learning_rate": 1.3372093023255814e-05, + "logits/chosen": 4.945235729217529, + "logits/rejected": 4.959147930145264, + "logps/chosen": -383.84033203125, + "logps/rejected": -431.7752685546875, + "loss": 0.22991834580898285, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3950352668762207, + "rewards/margins": 1.4965243339538574, + "rewards/rejected": -0.1014888733625412, + "step": 70 + }, + { + "epoch": 0.0840875912408759, + "grad_norm": 1.0350896120071411, + "learning_rate": 1.375968992248062e-05, + "logits/chosen": 5.00426721572876, + "logits/rejected": 5.120238780975342, + "logps/chosen": -350.9471435546875, + "logps/rejected": -382.6837158203125, + "loss": 0.22603684663772583, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2978975772857666, + "rewards/margins": 1.644275426864624, + "rewards/rejected": -0.34637776017189026, + "step": 72 + }, + { + "epoch": 0.08642335766423358, + "grad_norm": 1.1595423221588135, + "learning_rate": 1.4147286821705426e-05, + "logits/chosen": 4.890130043029785, + "logits/rejected": 4.9504714012146, + "logps/chosen": -352.34967041015625, + "logps/rejected": -399.23028564453125, + "loss": 0.18921935558319092, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1984589099884033, + "rewards/margins": 1.7495291233062744, + "rewards/rejected": -0.5510700941085815, + "step": 74 + }, + { + "epoch": 0.08759124087591241, + "eval_logits/chosen": 4.930174827575684, + "eval_logits/rejected": 5.032296657562256, + "eval_logps/chosen": -359.19647216796875, + "eval_logps/rejected": -405.1120300292969, + "eval_loss": 0.16020436584949493, + "eval_rewards/accuracies": 0.9960629940032959, + "eval_rewards/chosen": 1.1210675239562988, + "eval_rewards/margins": 2.0546727180480957, + "eval_rewards/rejected": -0.9336051344871521, + "eval_runtime": 454.3435, + "eval_samples_per_second": 1.677, + "eval_steps_per_second": 1.677, + "step": 75 + }, + { + "epoch": 0.08875912408759123, + "grad_norm": 1.1433167457580566, + "learning_rate": 1.4534883720930233e-05, + "logits/chosen": 5.037275314331055, + "logits/rejected": 5.1315507888793945, + "logps/chosen": -313.110595703125, + "logps/rejected": -356.1000061035156, + "loss": 0.15998858213424683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2128857374191284, + "rewards/margins": 2.0945115089416504, + "rewards/rejected": -0.8816256523132324, + "step": 76 + }, + { + "epoch": 0.0910948905109489, + "grad_norm": 0.9839214086532593, + "learning_rate": 1.4922480620155039e-05, + "logits/chosen": 4.817085266113281, + "logits/rejected": 4.874035835266113, + "logps/chosen": -366.2629089355469, + "logps/rejected": -405.7989196777344, + "loss": 0.1894684135913849, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0605502128601074, + "rewards/margins": 1.90762460231781, + "rewards/rejected": -0.8470743894577026, + "step": 78 + }, + { + "epoch": 0.09343065693430656, + "grad_norm": 0.9212782979011536, + "learning_rate": 1.5310077519379846e-05, + "logits/chosen": 5.046716690063477, + "logits/rejected": 5.157979965209961, + "logps/chosen": -348.0658264160156, + "logps/rejected": -395.23870849609375, + "loss": 0.15948188304901123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.676516056060791, + "rewards/margins": 2.167430877685547, + "rewards/rejected": -1.4909145832061768, + "step": 80 + }, + { + "epoch": 0.09576642335766423, + "grad_norm": 0.9820688366889954, + "learning_rate": 1.569767441860465e-05, + "logits/chosen": 4.690741539001465, + "logits/rejected": 4.771791458129883, + "logps/chosen": -378.8666076660156, + "logps/rejected": -436.9100036621094, + "loss": 0.12085139006376266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8719685077667236, + "rewards/margins": 2.646538257598877, + "rewards/rejected": -1.7745698690414429, + "step": 82 + }, + { + "epoch": 0.09810218978102189, + "grad_norm": 0.66785728931427, + "learning_rate": 1.608527131782946e-05, + "logits/chosen": 4.880465984344482, + "logits/rejected": 4.961792945861816, + "logps/chosen": -346.51214599609375, + "logps/rejected": -400.1110534667969, + "loss": 0.08720710873603821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1337480545043945, + "rewards/margins": 2.903944253921509, + "rewards/rejected": -1.7701961994171143, + "step": 84 + }, + { + "epoch": 0.10043795620437956, + "grad_norm": 0.5760660767555237, + "learning_rate": 1.647286821705426e-05, + "logits/chosen": 4.464397430419922, + "logits/rejected": 4.680055618286133, + "logps/chosen": -341.7489318847656, + "logps/rejected": -398.322021484375, + "loss": 0.07942983508110046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2459325790405273, + "rewards/margins": 3.0152552127838135, + "rewards/rejected": -1.7693227529525757, + "step": 86 + }, + { + "epoch": 0.10277372262773722, + "grad_norm": 1.6020294427871704, + "learning_rate": 1.686046511627907e-05, + "logits/chosen": 4.563863277435303, + "logits/rejected": 4.680974960327148, + "logps/chosen": -344.9147644042969, + "logps/rejected": -395.4453125, + "loss": 0.1258174479007721, + "rewards/accuracies": 0.9375, + "rewards/chosen": 1.0706769227981567, + "rewards/margins": 3.118717670440674, + "rewards/rejected": -2.0480403900146484, + "step": 88 + }, + { + "epoch": 0.10510948905109489, + "grad_norm": 0.46413859724998474, + "learning_rate": 1.7248062015503875e-05, + "logits/chosen": 4.4989237785339355, + "logits/rejected": 4.673248291015625, + "logps/chosen": -326.9678649902344, + "logps/rejected": -388.4164123535156, + "loss": 0.06663060188293457, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4128761291503906, + "rewards/margins": 3.760685920715332, + "rewards/rejected": -2.3478102684020996, + "step": 90 + }, + { + "epoch": 0.10744525547445255, + "grad_norm": 0.6699568629264832, + "learning_rate": 1.7635658914728684e-05, + "logits/chosen": 4.7294535636901855, + "logits/rejected": 4.813880920410156, + "logps/chosen": -362.7267150878906, + "logps/rejected": -439.2985534667969, + "loss": 0.04481709748506546, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.477597713470459, + "rewards/margins": 4.37883186340332, + "rewards/rejected": -2.9012341499328613, + "step": 92 + }, + { + "epoch": 0.10978102189781022, + "grad_norm": 0.4152977168560028, + "learning_rate": 1.802325581395349e-05, + "logits/chosen": 4.785149574279785, + "logits/rejected": 4.891542434692383, + "logps/chosen": -381.59246826171875, + "logps/rejected": -444.2817687988281, + "loss": 0.05632612109184265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.71366286277771, + "rewards/margins": 3.4584720134735107, + "rewards/rejected": -2.744809150695801, + "step": 94 + }, + { + "epoch": 0.11211678832116788, + "grad_norm": 0.3152717649936676, + "learning_rate": 1.8410852713178295e-05, + "logits/chosen": 4.603940486907959, + "logits/rejected": 4.804995536804199, + "logps/chosen": -356.7286376953125, + "logps/rejected": -414.69635009765625, + "loss": 0.040920041501522064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7566397190093994, + "rewards/margins": 4.020595550537109, + "rewards/rejected": -2.263956069946289, + "step": 96 + }, + { + "epoch": 0.11445255474452555, + "grad_norm": 0.37698569893836975, + "learning_rate": 1.8798449612403103e-05, + "logits/chosen": 4.558542728424072, + "logits/rejected": 4.690641403198242, + "logps/chosen": -339.794189453125, + "logps/rejected": -413.8865966796875, + "loss": 0.025794224813580513, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3867536783218384, + "rewards/margins": 4.6542744636535645, + "rewards/rejected": -3.2675204277038574, + "step": 98 + }, + { + "epoch": 0.11678832116788321, + "grad_norm": 0.15023073554039001, + "learning_rate": 1.918604651162791e-05, + "logits/chosen": 4.387497425079346, + "logits/rejected": 4.494588375091553, + "logps/chosen": -346.2568054199219, + "logps/rejected": -418.9315185546875, + "loss": 0.015155203640460968, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7938623428344727, + "rewards/margins": 4.942529201507568, + "rewards/rejected": -3.1486666202545166, + "step": 100 + }, + { + "epoch": 0.11678832116788321, + "eval_logits/chosen": 4.285891056060791, + "eval_logits/rejected": 4.425926208496094, + "eval_logps/chosen": -353.15850830078125, + "eval_logps/rejected": -424.4124755859375, + "eval_loss": 0.04428481683135033, + "eval_rewards/accuracies": 0.9921259880065918, + "eval_rewards/chosen": 1.7248634099960327, + "eval_rewards/margins": 4.588510513305664, + "eval_rewards/rejected": -2.863647222518921, + "eval_runtime": 454.7251, + "eval_samples_per_second": 1.676, + "eval_steps_per_second": 1.676, + "step": 100 + } + ], + "logging_steps": 2, + "max_steps": 2571, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 5, + "early_stopping_threshold": 0.001 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/dpo_qwen_14B/checkpoint-100/training_args.bin b/dpo_qwen_14B/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd3891cc402abfdcd660f4a6f0aa91d707951765 --- /dev/null +++ b/dpo_qwen_14B/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21875ef630d3e8f528dce67596a0d783fd5cf223e6e245a98026996d1f3d3ade +size 5752 diff --git a/dpo_qwen_14B/config_resolved.yaml b/dpo_qwen_14B/config_resolved.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d41a2ec68bdc6fa075c7599791d43a706b31b82c --- /dev/null +++ b/dpo_qwen_14B/config_resolved.yaml @@ -0,0 +1,93 @@ +run: + run_dir: ./runs/dpo_run_14b_v1 + seed: 42 +wandb: + enabled: true + project: dpo-training + entity: null + name: null + tags: + - dpo-lora + - preference-optimization + notes: null +model: + repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT + revision: null + base_local_dir: base_model + trust_remote_code: true + tokenizer_use_fast: true + device_map: auto + torch_dtype: bfloat16 + use_4bit: false + bnb_4bit_quant_type: nf4 + bnb_4bit_use_double_quant: false + bnb_4bit_compute_dtype: bfloat16 + attn_implementation: null +data: + train_jsonl: dpo_pairs_generated.jsonl + eval_jsonl: null + eval_split_ratio: 0.1 + prompt_field: prompt + chosen_field: chosen + rejected_field: rejected + score_field: f1_score + format_type: chatml + system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\ + \ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\ + \ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\ + \ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\ + \ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\ + \ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\ + add::crates/another/file.rs::function::AnotherComponent\n\n\n## Rules\n\n\ + 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\ + \ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\ + \ because\" and \"without this\"\n3. Types of components: function, struct, enum,\ + \ impl, trait\n4. If there is extra information (e.g., enum variants), include\ + \ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with \n" + max_length: 2048 + shuffle: true + num_proc: 4 +peft: + enabled: true + r: 16 + lora_alpha: 32 + lora_dropout: 0.05 + bias: none + target_modules: auto +dpo: + beta: 0.1 + label_smoothing: 0.0 + loss_type: sigmoid + use_reference_model: true + reference_free: false +train: + num_train_epochs: 3 + per_device_train_batch_size: 1 + per_device_eval_batch_size: 1 + gradient_accumulation_steps: 8 + learning_rate: 5e-5 + weight_decay: 0.0 + warmup_ratio: 0.1 + lr_scheduler_type: cosine + optim: adamw_torch + max_grad_norm: 1.0 + gradient_checkpointing: true + logging_steps: 2 + save_strategy: steps + save_steps: 100 + save_total_limit: 10 + evaluation_strategy: steps + eval_steps: 25 + load_best_model_at_end: true + early_stopping: + enabled: true + patience: 5 + min_delta: 0.001 + metric: eval_loss + mode: min + resume_from_checkpoint: auto +merge: + enabled: true + merged_dtype: float16 + max_shard_size: 2GB + output_dir: ./merged_14b_dpo_lora diff --git a/dpo_qwen_14B/logs/eval.jsonl b/dpo_qwen_14B/logs/eval.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5d44cdce696bae474d17239c2204cf54ea5589d4 --- /dev/null +++ b/dpo_qwen_14B/logs/eval.jsonl @@ -0,0 +1,5 @@ +{"ts": "2025-12-26T16:09:16", "event": "eval", "step": 25, "epoch": 0.029197080291970802, "eval_loss": 0.6836819648742676, "eval_runtime": 454.4375, "eval_samples_per_second": 1.677, "eval_steps_per_second": 1.677, "eval_rewards/chosen": 0.024636391550302505, "eval_rewards/rejected": 0.005080964416265488, "eval_rewards/accuracies": 0.665354311466217, "eval_rewards/margins": 0.019555427134037018, "eval_logps/chosen": -370.1607666015625, "eval_logps/rejected": -395.7251892089844, "eval_logits/chosen": 5.295141220092773, "eval_logits/rejected": 5.345211029052734} +{"ts": "2025-12-26T16:20:56", "event": "eval", "step": 50, "epoch": 0.058394160583941604, "eval_loss": 0.4610801041126251, "eval_runtime": 454.5598, "eval_samples_per_second": 1.676, "eval_steps_per_second": 1.676, "eval_rewards/chosen": 0.8944254517555237, "eval_rewards/rejected": 0.3205168545246124, "eval_rewards/accuracies": 0.9619422554969788, "eval_rewards/margins": 0.5739086270332336, "eval_logps/chosen": -361.462890625, "eval_logps/rejected": -392.5708312988281, "eval_logits/chosen": 5.22359037399292, "eval_logits/rejected": 5.286833763122559} +{"ts": "2025-12-26T16:32:39", "event": "eval", "step": 75, "epoch": 0.08759124087591241, "eval_loss": 0.16020436584949493, "eval_runtime": 454.3435, "eval_samples_per_second": 1.677, "eval_steps_per_second": 1.677, "eval_rewards/chosen": 1.1210675239562988, "eval_rewards/rejected": -0.9336051344871521, "eval_rewards/accuracies": 0.9960629940032959, "eval_rewards/margins": 2.0546727180480957, "eval_logps/chosen": -359.19647216796875, "eval_logps/rejected": -405.1120300292969, "eval_logits/chosen": 4.930174827575684, "eval_logits/rejected": 5.032296657562256} +{"ts": "2025-12-26T16:44:21", "event": "eval", "step": 100, "epoch": 0.11678832116788321, "eval_loss": 0.04428481683135033, "eval_runtime": 454.7251, "eval_samples_per_second": 1.676, "eval_steps_per_second": 1.676, "eval_rewards/chosen": 1.7248634099960327, "eval_rewards/rejected": -2.863647222518921, "eval_rewards/accuracies": 0.9921259880065918, "eval_rewards/margins": 4.588510513305664, "eval_logps/chosen": -353.15850830078125, "eval_logps/rejected": -424.4124755859375, "eval_logits/chosen": 4.285891056060791, "eval_logits/rejected": 4.425926208496094} +{"ts": "2025-12-26T16:56:05", "event": "eval", "step": 125, "epoch": 0.145985401459854, "eval_loss": 0.024107323959469795, "eval_runtime": 454.8045, "eval_samples_per_second": 1.675, "eval_steps_per_second": 1.675, "eval_rewards/chosen": 0.5319492816925049, "eval_rewards/rejected": -6.150709629058838, "eval_rewards/accuracies": 0.9934383034706116, "eval_rewards/margins": 6.682660102844238, "eval_logps/chosen": -365.087646484375, "eval_logps/rejected": -457.28314208984375, "eval_logits/chosen": 3.6694726943969727, "eval_logits/rejected": 3.8436598777770996} diff --git a/dpo_qwen_14B/logs/train.jsonl b/dpo_qwen_14B/logs/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..be18c6c5df0923c85a5951802a612febb33a291f --- /dev/null +++ b/dpo_qwen_14B/logs/train.jsonl @@ -0,0 +1,78 @@ +{"ts": "2025-12-26T15:24:36", "event": "train_log", "step": 2, "epoch": 0.0023357664233576644, "progress_pct": 0.08, "epoch_pct": 0.08, "eta": "07:30:29", "max_grad_norm": 1.0, "loss": 0.6931473016738892, "grad_norm": 1.2424817085266113, "learning_rate": 1.9379844961240311e-07, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -368.911865234375, "logps/rejected": -398.83880615234375, "logits/chosen": 5.179401397705078, "logits/rejected": 5.192930698394775} +{"ts": "2025-12-26T15:24:56", "event": "train_log", "step": 4, "epoch": 0.004671532846715329, "progress_pct": 0.16, "epoch_pct": 0.16, "eta": "07:14:49", "max_grad_norm": 1.0, "loss": 0.693317174911499, "grad_norm": 1.3884541988372803, "learning_rate": 5.813953488372093e-07, "rewards/chosen": 0.022540951147675514, "rewards/rejected": 0.022656824439764023, "rewards/accuracies": 0.5, "rewards/margins": -0.00011587224435061216, "logps/chosen": -338.257568359375, "logps/rejected": -366.88128662109375, "logits/chosen": 5.405174255371094, "logits/rejected": 5.456291675567627} +{"ts": "2025-12-26T15:57:54", "event": "train_log", "step": 2, "epoch": 0.0023357664233576644, "progress_pct": 0.08, "epoch_pct": 0.08, "eta": "07:30:57", "max_grad_norm": 1.0, "loss": 0.6931473016738892, "grad_norm": 1.242694616317749, "learning_rate": 1.9379844961240311e-07, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -368.911865234375, "logps/rejected": -398.83880615234375, "logits/chosen": 5.179401397705078, "logits/rejected": 5.192930698394775} +{"ts": "2025-12-26T15:58:14", "event": "train_log", "step": 4, "epoch": 0.004671532846715329, "progress_pct": 0.16, "epoch_pct": 0.16, "eta": "07:15:10", "max_grad_norm": 1.0, "loss": 0.6949559450149536, "grad_norm": 1.392399787902832, "learning_rate": 5.813953488372093e-07, "rewards/chosen": 0.004504585638642311, "rewards/rejected": 0.007727146148681641, "rewards/accuracies": 0.625, "rewards/margins": -0.003222561441361904, "logps/chosen": -338.43792724609375, "logps/rejected": -367.03057861328125, "logits/chosen": 5.403897762298584, "logits/rejected": 5.4565606117248535} +{"ts": "2025-12-26T15:58:34", "event": "train_log", "step": 6, "epoch": 0.0070072992700729924, "progress_pct": 0.23, "epoch_pct": 0.23, "eta": "07:13:22", "max_grad_norm": 1.0, "loss": 0.689236581325531, "grad_norm": 1.066603183746338, "learning_rate": 9.689922480620155e-07, "rewards/chosen": -0.0034066196531057358, "rewards/rejected": -0.01166229322552681, "rewards/accuracies": 0.5625, "rewards/margins": 0.008255671709775925, "logps/chosen": -362.3431701660156, "logps/rejected": -387.5829772949219, "logits/chosen": 5.291868209838867, "logits/rejected": 5.328356742858887} +{"ts": "2025-12-26T15:58:54", "event": "train_log", "step": 8, "epoch": 0.009343065693430658, "progress_pct": 0.31, "epoch_pct": 0.31, "eta": "07:08:27", "max_grad_norm": 1.0, "loss": 0.6943775415420532, "grad_norm": 1.0005714893341064, "learning_rate": 1.3565891472868218e-06, "rewards/chosen": 0.014657974243164062, "rewards/rejected": 0.015892982482910156, "rewards/accuracies": 0.375, "rewards/margins": -0.0012350091710686684, "logps/chosen": -379.9283447265625, "logps/rejected": -389.0852355957031, "logits/chosen": 5.323437690734863, "logits/rejected": 5.410858631134033} +{"ts": "2025-12-26T15:59:13", "event": "train_log", "step": 10, "epoch": 0.01167883211678832, "progress_pct": 0.39, "epoch_pct": 0.39, "eta": "07:07:21", "max_grad_norm": 1.0, "loss": 0.693260908126831, "grad_norm": 1.2461222410202026, "learning_rate": 1.744186046511628e-06, "rewards/chosen": -0.028497030958533287, "rewards/rejected": -0.028623390942811966, "rewards/accuracies": 0.625, "rewards/margins": 0.00012636138126254082, "logps/chosen": -363.2003479003906, "logps/rejected": -389.67376708984375, "logits/chosen": 5.435908317565918, "logits/rejected": 5.494542121887207} +{"ts": "2025-12-26T15:59:34", "event": "train_log", "step": 12, "epoch": 0.014014598540145985, "progress_pct": 0.47, "epoch_pct": 0.47, "eta": "07:08:38", "max_grad_norm": 1.0, "loss": 0.6882913112640381, "grad_norm": 1.4030137062072754, "learning_rate": 2.131782945736434e-06, "rewards/chosen": 0.01622028276324272, "rewards/rejected": 0.006133650429546833, "rewards/accuracies": 0.5, "rewards/margins": 0.010086631402373314, "logps/chosen": -370.96429443359375, "logps/rejected": -402.4786071777344, "logits/chosen": 5.3550801277160645, "logits/rejected": 5.375768661499023} +{"ts": "2025-12-26T15:59:52", "event": "train_log", "step": 14, "epoch": 0.01635036496350365, "progress_pct": 0.54, "epoch_pct": 0.55, "eta": "07:02:49", "max_grad_norm": 1.0, "loss": 0.6896716356277466, "grad_norm": 1.1157702207565308, "learning_rate": 2.5193798449612402e-06, "rewards/chosen": -0.017319394275546074, "rewards/rejected": -0.024647902697324753, "rewards/accuracies": 0.625, "rewards/margins": 0.007328510750085115, "logps/chosen": -336.7254333496094, "logps/rejected": -357.52203369140625, "logits/chosen": 5.515308380126953, "logits/rejected": 5.561104774475098} +{"ts": "2025-12-26T16:00:12", "event": "train_log", "step": 16, "epoch": 0.018686131386861315, "progress_pct": 0.62, "epoch_pct": 0.62, "eta": "07:03:25", "max_grad_norm": 1.0, "loss": 0.6904245018959045, "grad_norm": 0.9470655918121338, "learning_rate": 2.9069767441860468e-06, "rewards/chosen": 0.03270244598388672, "rewards/rejected": 0.026875685900449753, "rewards/accuracies": 0.5625, "rewards/margins": 0.005826758686453104, "logps/chosen": -415.6842041015625, "logps/rejected": -441.1054992675781, "logits/chosen": 5.553088665008545, "logits/rejected": 5.582851886749268} +{"ts": "2025-12-26T16:00:33", "event": "train_log", "step": 18, "epoch": 0.021021897810218976, "progress_pct": 0.7, "epoch_pct": 0.7, "eta": "07:04:54", "max_grad_norm": 1.0, "loss": 0.683630108833313, "grad_norm": 1.4397331476211548, "learning_rate": 3.2945736434108533e-06, "rewards/chosen": 0.011020278558135033, "rewards/rejected": -0.008498954586684704, "rewards/accuracies": 0.5625, "rewards/margins": 0.01951923407614231, "logps/chosen": -392.46221923828125, "logps/rejected": -420.1712341308594, "logits/chosen": 5.440742015838623, "logits/rejected": 5.489529132843018} +{"ts": "2025-12-26T16:00:52", "event": "train_log", "step": 20, "epoch": 0.02335766423357664, "progress_pct": 0.78, "epoch_pct": 0.78, "eta": "07:03:26", "max_grad_norm": 1.0, "loss": 0.6902388334274292, "grad_norm": 1.5941083431243896, "learning_rate": 3.6821705426356594e-06, "rewards/chosen": 0.006536484230309725, "rewards/rejected": 0.0005230908282101154, "rewards/accuracies": 0.5625, "rewards/margins": 0.006013393402099609, "logps/chosen": -345.2221374511719, "logps/rejected": -365.9537048339844, "logits/chosen": 5.318347930908203, "logits/rejected": 5.397945404052734} +{"ts": "2025-12-26T16:01:12", "event": "train_log", "step": 22, "epoch": 0.025693430656934305, "progress_pct": 0.86, "epoch_pct": 0.86, "eta": "07:03:12", "max_grad_norm": 1.0, "loss": 0.691262423992157, "grad_norm": 1.1363905668258667, "learning_rate": 4.0697674418604655e-06, "rewards/chosen": 0.011908342130482197, "rewards/rejected": 0.007370188366621733, "rewards/accuracies": 0.5, "rewards/margins": 0.004538153763860464, "logps/chosen": -347.9439697265625, "logps/rejected": -370.65777587890625, "logits/chosen": 5.632981300354004, "logits/rejected": 5.7265520095825195} +{"ts": "2025-12-26T16:01:32", "event": "train_log", "step": 24, "epoch": 0.02802919708029197, "progress_pct": 0.93, "epoch_pct": 0.93, "eta": "07:02:08", "max_grad_norm": 1.0, "loss": 0.6769475936889648, "grad_norm": 1.0684627294540405, "learning_rate": 4.457364341085272e-06, "rewards/chosen": 0.01244144607335329, "rewards/rejected": -0.020452119410037994, "rewards/accuracies": 0.875, "rewards/margins": 0.03289356082677841, "logps/chosen": -347.1539001464844, "logps/rejected": -377.6044921875, "logits/chosen": 5.35699987411499, "logits/rejected": 5.405580520629883} +{"ts": "2025-12-26T16:09:16", "event": "train_log", "step": 25, "epoch": 0.029197080291970802, "progress_pct": 0.97, "epoch_pct": 0.97, "eta": "19:53:32", "max_grad_norm": 1.0, "eval_loss": 0.6836819648742676, "eval_runtime": 454.4375, "eval_samples_per_second": 1.677, "eval_steps_per_second": 1.677, "eval_rewards/chosen": 0.024636391550302505, "eval_rewards/rejected": 0.005080964416265488, "eval_rewards/accuracies": 0.665354311466217, "eval_rewards/margins": 0.019555427134037018, "eval_logps/chosen": -370.1607666015625, "eval_logps/rejected": -395.7251892089844, "eval_logits/chosen": 5.295141220092773, "eval_logits/rejected": 5.345211029052734} +{"ts": "2025-12-26T16:09:27", "event": "train_log", "step": 26, "epoch": 0.030364963503649634, "progress_pct": 1.01, "epoch_pct": 1.01, "eta": "19:24:28", "max_grad_norm": 1.0, "loss": 0.6849788427352905, "grad_norm": 1.592353105545044, "learning_rate": 4.844961240310078e-06, "rewards/chosen": 0.026385309174656868, "rewards/rejected": 0.009418869391083717, "rewards/accuracies": 0.625, "rewards/margins": 0.016966437920928, "logps/chosen": -387.54876708984375, "logps/rejected": -412.0630187988281, "logits/chosen": 5.157042026519775, "logits/rejected": 5.244912147521973} +{"ts": "2025-12-26T16:09:47", "event": "train_log", "step": 28, "epoch": 0.0327007299270073, "progress_pct": 1.09, "epoch_pct": 1.09, "eta": "18:30:11", "max_grad_norm": 1.0, "loss": 0.675189733505249, "grad_norm": 1.3181558847427368, "learning_rate": 5.232558139534884e-06, "rewards/chosen": 0.045946408063173294, "rewards/rejected": 0.009190557524561882, "rewards/accuracies": 0.8125, "rewards/margins": 0.03675585240125656, "logps/chosen": -360.41650390625, "logps/rejected": -391.2162170410156, "logits/chosen": 5.545513153076172, "logits/rejected": 5.54400110244751} +{"ts": "2025-12-26T16:10:07", "event": "train_log", "step": 30, "epoch": 0.035036496350364967, "progress_pct": 1.17, "epoch_pct": 1.17, "eta": "17:43:26", "max_grad_norm": 1.0, "loss": 0.6752142310142517, "grad_norm": 1.443650722503662, "learning_rate": 5.620155038759691e-06, "rewards/chosen": 0.04194517061114311, "rewards/rejected": 0.005256845150142908, "rewards/accuracies": 0.8125, "rewards/margins": 0.03668833151459694, "logps/chosen": -378.6293640136719, "logps/rejected": -405.3665466308594, "logits/chosen": 5.136168003082275, "logits/rejected": 5.239327907562256} +{"ts": "2025-12-26T16:10:26", "event": "train_log", "step": 32, "epoch": 0.03737226277372263, "progress_pct": 1.24, "epoch_pct": 1.25, "eta": "17:01:56", "max_grad_norm": 1.0, "loss": 0.6700581312179565, "grad_norm": 1.379568338394165, "learning_rate": 6.007751937984497e-06, "rewards/chosen": 0.06658173352479935, "rewards/rejected": 0.019388392567634583, "rewards/accuracies": 0.875, "rewards/margins": 0.047193337231874466, "logps/chosen": -358.5367736816406, "logps/rejected": -382.4181213378906, "logits/chosen": 5.411487579345703, "logits/rejected": 5.427243232727051} +{"ts": "2025-12-26T16:10:44", "event": "train_log", "step": 34, "epoch": 0.039708029197080295, "progress_pct": 1.32, "epoch_pct": 1.32, "eta": "16:23:25", "max_grad_norm": 1.0, "loss": 0.6610866785049438, "grad_norm": 1.3260451555252075, "learning_rate": 6.395348837209303e-06, "rewards/chosen": 0.07038869708776474, "rewards/rejected": 0.0045137410052120686, "rewards/accuracies": 0.9375, "rewards/margins": 0.06587495654821396, "logps/chosen": -326.9423828125, "logps/rejected": -346.52081298828125, "logits/chosen": 5.207217216491699, "logits/rejected": 5.254848480224609} +{"ts": "2025-12-26T16:11:04", "event": "train_log", "step": 36, "epoch": 0.04204379562043795, "progress_pct": 1.4, "epoch_pct": 1.4, "eta": "15:51:44", "max_grad_norm": 1.0, "loss": 0.6281551718711853, "grad_norm": 1.5776340961456299, "learning_rate": 6.782945736434108e-06, "rewards/chosen": 0.11738375574350357, "rewards/rejected": -0.018992995843291283, "rewards/accuracies": 1.0, "rewards/margins": 0.1363767683506012, "logps/chosen": -359.9613952636719, "logps/rejected": -384.31683349609375, "logits/chosen": 5.550538063049316, "logits/rejected": 5.6374335289001465} +{"ts": "2025-12-26T16:11:23", "event": "train_log", "step": 38, "epoch": 0.04437956204379562, "progress_pct": 1.48, "epoch_pct": 1.48, "eta": "15:21:57", "max_grad_norm": 1.0, "loss": 0.6270830631256104, "grad_norm": 1.8589071035385132, "learning_rate": 7.170542635658915e-06, "rewards/chosen": 0.1617884635925293, "rewards/rejected": 0.022934721782803535, "rewards/accuracies": 0.9375, "rewards/margins": 0.1388537436723709, "logps/chosen": -325.8544616699219, "logps/rejected": -351.9772644042969, "logits/chosen": 5.39143180847168, "logits/rejected": 5.412029266357422} +{"ts": "2025-12-26T16:11:43", "event": "train_log", "step": 40, "epoch": 0.04671532846715328, "progress_pct": 1.56, "epoch_pct": 1.56, "eta": "14:56:25", "max_grad_norm": 1.0, "loss": 0.641180157661438, "grad_norm": 1.3231571912765503, "learning_rate": 7.558139534883721e-06, "rewards/chosen": 0.15248623490333557, "rewards/rejected": 0.04090070724487305, "rewards/accuracies": 0.875, "rewards/margins": 0.11158552765846252, "logps/chosen": -343.3839111328125, "logps/rejected": -374.7848205566406, "logits/chosen": 5.189720153808594, "logits/rejected": 5.203127384185791} +{"ts": "2025-12-26T16:12:02", "event": "train_log", "step": 42, "epoch": 0.049051094890510946, "progress_pct": 1.63, "epoch_pct": 1.64, "eta": "14:31:31", "max_grad_norm": 1.0, "loss": 0.6093671321868896, "grad_norm": 2.5331315994262695, "learning_rate": 7.945736434108528e-06, "rewards/chosen": 0.2898235321044922, "rewards/rejected": 0.10823898762464523, "rewards/accuracies": 0.9375, "rewards/margins": 0.18158456683158875, "logps/chosen": -341.813720703125, "logps/rejected": -372.44952392578125, "logits/chosen": 5.420182228088379, "logits/rejected": 5.45302677154541} +{"ts": "2025-12-26T16:12:20", "event": "train_log", "step": 44, "epoch": 0.05138686131386861, "progress_pct": 1.71, "epoch_pct": 1.71, "eta": "14:08:47", "max_grad_norm": 1.0, "loss": 0.5815833210945129, "grad_norm": 1.5247384309768677, "learning_rate": 8.333333333333334e-06, "rewards/chosen": 0.32459571957588196, "rewards/rejected": 0.07354050129652023, "rewards/accuracies": 0.8125, "rewards/margins": 0.2510552406311035, "logps/chosen": -354.49627685546875, "logps/rejected": -376.88818359375, "logits/chosen": 5.383636951446533, "logits/rejected": 5.397551536560059} +{"ts": "2025-12-26T16:12:40", "event": "train_log", "step": 46, "epoch": 0.053722627737226275, "progress_pct": 1.79, "epoch_pct": 1.79, "eta": "13:49:08", "max_grad_norm": 1.0, "loss": 0.5269681215286255, "grad_norm": 2.0814144611358643, "learning_rate": 8.72093023255814e-06, "rewards/chosen": 0.6465227603912354, "rewards/rejected": 0.27069616317749023, "rewards/accuracies": 0.9375, "rewards/margins": 0.37582656741142273, "logps/chosen": -331.1025390625, "logps/rejected": -362.90118408203125, "logits/chosen": 5.269731044769287, "logits/rejected": 5.287116050720215} +{"ts": "2025-12-26T16:12:59", "event": "train_log", "step": 48, "epoch": 0.05605839416058394, "progress_pct": 1.87, "epoch_pct": 1.87, "eta": "13:31:19", "max_grad_norm": 1.0, "loss": 0.5066201686859131, "grad_norm": 1.769063115119934, "learning_rate": 9.108527131782946e-06, "rewards/chosen": 0.6377636194229126, "rewards/rejected": 0.21126146614551544, "rewards/accuracies": 1.0, "rewards/margins": 0.42650213837623596, "logps/chosen": -369.40283203125, "logps/rejected": -400.18438720703125, "logits/chosen": 5.472540855407715, "logits/rejected": 5.465417861938477} +{"ts": "2025-12-26T16:13:21", "event": "train_log", "step": 50, "epoch": 0.058394160583941604, "progress_pct": 1.94, "epoch_pct": 1.95, "eta": "13:16:33", "max_grad_norm": 1.0, "loss": 0.529259979724884, "grad_norm": 2.84169602394104, "learning_rate": 9.496124031007753e-06, "rewards/chosen": 0.7923164367675781, "rewards/rejected": 0.4136104881763458, "rewards/accuracies": 1.0, "rewards/margins": 0.3787059783935547, "logps/chosen": -363.4556579589844, "logps/rejected": -397.8169860839844, "logits/chosen": 5.050387382507324, "logits/rejected": 5.112288951873779} +{"ts": "2025-12-26T16:20:56", "event": "train_log", "step": 50, "epoch": 0.058394160583941604, "progress_pct": 1.94, "epoch_pct": 1.95, "eta": "19:38:32", "max_grad_norm": 1.0, "eval_loss": 0.4610801041126251, "eval_runtime": 454.5598, "eval_samples_per_second": 1.676, "eval_steps_per_second": 1.676, "eval_rewards/chosen": 0.8944254517555237, "eval_rewards/rejected": 0.3205168545246124, "eval_rewards/accuracies": 0.9619422554969788, "eval_rewards/margins": 0.5739086270332336, "eval_logps/chosen": -361.462890625, "eval_logps/rejected": -392.5708312988281, "eval_logits/chosen": 5.22359037399292, "eval_logits/rejected": 5.286833763122559} +{"ts": "2025-12-26T16:21:15", "event": "train_log", "step": 52, "epoch": 0.06072992700729927, "progress_pct": 2.02, "epoch_pct": 2.02, "eta": "19:08:07", "max_grad_norm": 1.0, "loss": 0.44602835178375244, "grad_norm": 1.6907895803451538, "learning_rate": 9.883720930232558e-06, "rewards/chosen": 0.9869746565818787, "rewards/rejected": 0.3813100755214691, "rewards/accuracies": 0.9375, "rewards/margins": 0.6056646108627319, "logps/chosen": -343.4534606933594, "logps/rejected": -379.39508056640625, "logits/chosen": 5.486469268798828, "logits/rejected": 5.541717529296875} +{"ts": "2025-12-26T16:21:36", "event": "train_log", "step": 54, "epoch": 0.06306569343065693, "progress_pct": 2.1, "epoch_pct": 2.1, "eta": "18:41:01", "max_grad_norm": 1.0, "loss": 0.43609702587127686, "grad_norm": 1.9458682537078857, "learning_rate": 1.0271317829457365e-05, "rewards/chosen": 0.7794930934906006, "rewards/rejected": 0.15292587876319885, "rewards/accuracies": 1.0, "rewards/margins": 0.6265671253204346, "logps/chosen": -379.5437316894531, "logps/rejected": -401.5587463378906, "logits/chosen": 5.169528961181641, "logits/rejected": 5.2688751220703125} +{"ts": "2025-12-26T16:21:57", "event": "train_log", "step": 56, "epoch": 0.0654014598540146, "progress_pct": 2.18, "epoch_pct": 2.18, "eta": "18:15:44", "max_grad_norm": 1.0, "loss": 0.3928414583206177, "grad_norm": 2.1266520023345947, "learning_rate": 1.065891472868217e-05, "rewards/chosen": 1.274291753768921, "rewards/rejected": 0.4878700375556946, "rewards/accuracies": 0.9375, "rewards/margins": 0.7864217758178711, "logps/chosen": -378.0788269042969, "logps/rejected": -413.27392578125, "logits/chosen": 5.097426414489746, "logits/rejected": 5.15327262878418} +{"ts": "2025-12-26T16:22:18", "event": "train_log", "step": 58, "epoch": 0.06773722627737226, "progress_pct": 2.26, "epoch_pct": 2.26, "eta": "17:52:03", "max_grad_norm": 1.0, "loss": 0.35855019092559814, "grad_norm": 1.5381489992141724, "learning_rate": 1.1046511627906977e-05, "rewards/chosen": 1.2897911071777344, "rewards/rejected": 0.35436347126960754, "rewards/accuracies": 0.875, "rewards/margins": 0.9354276061058044, "logps/chosen": -372.93438720703125, "logps/rejected": -401.8287658691406, "logits/chosen": 5.138954162597656, "logits/rejected": 5.20254373550415} +{"ts": "2025-12-26T16:22:39", "event": "train_log", "step": 60, "epoch": 0.07007299270072993, "progress_pct": 2.33, "epoch_pct": 2.34, "eta": "17:29:56", "max_grad_norm": 1.0, "loss": 0.42801612615585327, "grad_norm": 2.358330726623535, "learning_rate": 1.1434108527131783e-05, "rewards/chosen": 1.3823509216308594, "rewards/rejected": 0.6532848477363586, "rewards/accuracies": 0.875, "rewards/margins": 0.729066014289856, "logps/chosen": -360.984619140625, "logps/rejected": -392.3192138671875, "logits/chosen": 5.071888446807861, "logits/rejected": 5.187964916229248} +{"ts": "2025-12-26T16:22:58", "event": "train_log", "step": 62, "epoch": 0.07240875912408759, "progress_pct": 2.41, "epoch_pct": 2.41, "eta": "17:08:40", "max_grad_norm": 1.0, "loss": 0.31365492939949036, "grad_norm": 2.177586317062378, "learning_rate": 1.182170542635659e-05, "rewards/chosen": 1.6637591123580933, "rewards/rejected": 0.5750135183334351, "rewards/accuracies": 1.0, "rewards/margins": 1.0887457132339478, "logps/chosen": -364.808349609375, "logps/rejected": -401.0321044921875, "logits/chosen": 5.264093399047852, "logits/rejected": 5.310842990875244} +{"ts": "2025-12-26T16:23:19", "event": "train_log", "step": 64, "epoch": 0.07474452554744526, "progress_pct": 2.49, "epoch_pct": 2.49, "eta": "16:48:50", "max_grad_norm": 1.0, "loss": 0.3037749230861664, "grad_norm": 1.697789192199707, "learning_rate": 1.2209302325581395e-05, "rewards/chosen": 1.6470392942428589, "rewards/rejected": 0.5321945548057556, "rewards/accuracies": 1.0, "rewards/margins": 1.114844799041748, "logps/chosen": -359.8249816894531, "logps/rejected": -397.2122497558594, "logits/chosen": 5.191982269287109, "logits/rejected": 5.261416912078857} +{"ts": "2025-12-26T16:23:38", "event": "train_log", "step": 66, "epoch": 0.07708029197080292, "progress_pct": 2.57, "epoch_pct": 2.57, "eta": "16:29:40", "max_grad_norm": 1.0, "loss": 0.25026455521583557, "grad_norm": 1.3219914436340332, "learning_rate": 1.2596899224806202e-05, "rewards/chosen": 1.5671364068984985, "rewards/rejected": 0.15732917189598083, "rewards/accuracies": 1.0, "rewards/margins": 1.4098074436187744, "logps/chosen": -352.3752136230469, "logps/rejected": -392.6779479980469, "logits/chosen": 5.293405532836914, "logits/rejected": 5.3094048500061035} +{"ts": "2025-12-26T16:23:57", "event": "train_log", "step": 68, "epoch": 0.07941605839416059, "progress_pct": 2.64, "epoch_pct": 2.65, "eta": "16:11:16", "max_grad_norm": 1.0, "loss": 0.3108353912830353, "grad_norm": 1.8173967599868774, "learning_rate": 1.2984496124031009e-05, "rewards/chosen": 1.4788665771484375, "rewards/rejected": 0.2151254564523697, "rewards/accuracies": 0.9375, "rewards/margins": 1.2637410163879395, "logps/chosen": -319.99700927734375, "logps/rejected": -364.115234375, "logits/chosen": 5.025746822357178, "logits/rejected": 5.114965438842773} +{"ts": "2025-12-26T16:24:16", "event": "train_log", "step": 70, "epoch": 0.08175182481751825, "progress_pct": 2.72, "epoch_pct": 2.73, "eta": "15:54:17", "max_grad_norm": 1.0, "loss": 0.22991834580898285, "grad_norm": 1.0658400058746338, "learning_rate": 1.3372093023255814e-05, "rewards/chosen": 1.3950352668762207, "rewards/rejected": -0.1014888733625412, "rewards/accuracies": 1.0, "rewards/margins": 1.4965243339538574, "logps/chosen": -383.84033203125, "logps/rejected": -431.7752685546875, "logits/chosen": 4.945235729217529, "logits/rejected": 4.959147930145264} +{"ts": "2025-12-26T16:24:35", "event": "train_log", "step": 72, "epoch": 0.0840875912408759, "progress_pct": 2.8, "epoch_pct": 2.8, "eta": "15:38:16", "max_grad_norm": 1.0, "loss": 0.22603684663772583, "grad_norm": 1.0350896120071411, "learning_rate": 1.375968992248062e-05, "rewards/chosen": 1.2978975772857666, "rewards/rejected": -0.34637776017189026, "rewards/accuracies": 1.0, "rewards/margins": 1.644275426864624, "logps/chosen": -350.9471435546875, "logps/rejected": -382.6837158203125, "logits/chosen": 5.00426721572876, "logits/rejected": 5.120238780975342} +{"ts": "2025-12-26T16:24:56", "event": "train_log", "step": 74, "epoch": 0.08642335766423358, "progress_pct": 2.88, "epoch_pct": 2.88, "eta": "15:23:40", "max_grad_norm": 1.0, "loss": 0.18921935558319092, "grad_norm": 1.1595423221588135, "learning_rate": 1.4147286821705426e-05, "rewards/chosen": 1.1984589099884033, "rewards/rejected": -0.5510700941085815, "rewards/accuracies": 1.0, "rewards/margins": 1.7495291233062744, "logps/chosen": -352.34967041015625, "logps/rejected": -399.23028564453125, "logits/chosen": 4.890130043029785, "logits/rejected": 4.9504714012146} +{"ts": "2025-12-26T16:32:39", "event": "train_log", "step": 75, "epoch": 0.08759124087591241, "progress_pct": 2.92, "epoch_pct": 2.92, "eta": "19:27:59", "max_grad_norm": 1.0, "eval_loss": 0.16020436584949493, "eval_runtime": 454.3435, "eval_samples_per_second": 1.677, "eval_steps_per_second": 1.677, "eval_rewards/chosen": 1.1210675239562988, "eval_rewards/rejected": -0.9336051344871521, "eval_rewards/accuracies": 0.9960629940032959, "eval_rewards/margins": 2.0546727180480957, "eval_logps/chosen": -359.19647216796875, "eval_logps/rejected": -405.1120300292969, "eval_logits/chosen": 4.930174827575684, "eval_logits/rejected": 5.032296657562256} +{"ts": "2025-12-26T16:32:49", "event": "train_log", "step": 76, "epoch": 0.08875912408759123, "progress_pct": 2.96, "epoch_pct": 2.96, "eta": "19:17:37", "max_grad_norm": 1.0, "loss": 0.15998858213424683, "grad_norm": 1.1433167457580566, "learning_rate": 1.4534883720930233e-05, "rewards/chosen": 1.2128857374191284, "rewards/rejected": -0.8816256523132324, "rewards/accuracies": 1.0, "rewards/margins": 2.0945115089416504, "logps/chosen": -313.110595703125, "logps/rejected": -356.1000061035156, "logits/chosen": 5.037275314331055, "logits/rejected": 5.1315507888793945} +{"ts": "2025-12-26T16:33:09", "event": "train_log", "step": 78, "epoch": 0.0910948905109489, "progress_pct": 3.03, "epoch_pct": 3.04, "eta": "18:57:48", "max_grad_norm": 1.0, "loss": 0.1894684135913849, "grad_norm": 0.9839214086532593, "learning_rate": 1.4922480620155039e-05, "rewards/chosen": 1.0605502128601074, "rewards/rejected": -0.8470743894577026, "rewards/accuracies": 1.0, "rewards/margins": 1.90762460231781, "logps/chosen": -366.2629089355469, "logps/rejected": -405.7989196777344, "logits/chosen": 4.817085266113281, "logits/rejected": 4.874035835266113} +{"ts": "2025-12-26T16:33:29", "event": "train_log", "step": 80, "epoch": 0.09343065693430656, "progress_pct": 3.11, "epoch_pct": 3.11, "eta": "18:38:40", "max_grad_norm": 1.0, "loss": 0.15948188304901123, "grad_norm": 0.9212782979011536, "learning_rate": 1.5310077519379846e-05, "rewards/chosen": 0.676516056060791, "rewards/rejected": -1.4909145832061768, "rewards/accuracies": 1.0, "rewards/margins": 2.167430877685547, "logps/chosen": -348.0658264160156, "logps/rejected": -395.23870849609375, "logits/chosen": 5.046716690063477, "logits/rejected": 5.157979965209961} +{"ts": "2025-12-26T16:33:49", "event": "train_log", "step": 82, "epoch": 0.09576642335766423, "progress_pct": 3.19, "epoch_pct": 3.19, "eta": "18:20:47", "max_grad_norm": 1.0, "loss": 0.12085139006376266, "grad_norm": 0.9820688366889954, "learning_rate": 1.569767441860465e-05, "rewards/chosen": 0.8719685077667236, "rewards/rejected": -1.7745698690414429, "rewards/accuracies": 1.0, "rewards/margins": 2.646538257598877, "logps/chosen": -378.8666076660156, "logps/rejected": -436.9100036621094, "logits/chosen": 4.690741539001465, "logits/rejected": 4.771791458129883} +{"ts": "2025-12-26T16:34:09", "event": "train_log", "step": 84, "epoch": 0.09810218978102189, "progress_pct": 3.27, "epoch_pct": 3.27, "eta": "18:03:33", "max_grad_norm": 1.0, "loss": 0.08720710873603821, "grad_norm": 0.66785728931427, "learning_rate": 1.608527131782946e-05, "rewards/chosen": 1.1337480545043945, "rewards/rejected": -1.7701961994171143, "rewards/accuracies": 1.0, "rewards/margins": 2.903944253921509, "logps/chosen": -346.51214599609375, "logps/rejected": -400.1110534667969, "logits/chosen": 4.880465984344482, "logits/rejected": 4.961792945861816} +{"ts": "2025-12-26T16:34:28", "event": "train_log", "step": 86, "epoch": 0.10043795620437956, "progress_pct": 3.35, "epoch_pct": 3.35, "eta": "17:46:40", "max_grad_norm": 1.0, "loss": 0.07942983508110046, "grad_norm": 0.5760660767555237, "learning_rate": 1.647286821705426e-05, "rewards/chosen": 1.2459325790405273, "rewards/rejected": -1.7693227529525757, "rewards/accuracies": 1.0, "rewards/margins": 3.0152552127838135, "logps/chosen": -341.7489318847656, "logps/rejected": -398.322021484375, "logits/chosen": 4.464397430419922, "logits/rejected": 4.680055618286133} +{"ts": "2025-12-26T16:34:48", "event": "train_log", "step": 88, "epoch": 0.10277372262773722, "progress_pct": 3.42, "epoch_pct": 3.43, "eta": "17:30:52", "max_grad_norm": 1.0, "loss": 0.1258174479007721, "grad_norm": 1.6020294427871704, "learning_rate": 1.686046511627907e-05, "rewards/chosen": 1.0706769227981567, "rewards/rejected": -2.0480403900146484, "rewards/accuracies": 0.9375, "rewards/margins": 3.118717670440674, "logps/chosen": -344.9147644042969, "logps/rejected": -395.4453125, "logits/chosen": 4.563863277435303, "logits/rejected": 4.680974960327148} +{"ts": "2025-12-26T16:35:06", "event": "train_log", "step": 90, "epoch": 0.10510948905109489, "progress_pct": 3.5, "epoch_pct": 3.5, "eta": "17:14:58", "max_grad_norm": 1.0, "loss": 0.06663060188293457, "grad_norm": 0.46413859724998474, "learning_rate": 1.7248062015503875e-05, "rewards/chosen": 1.4128761291503906, "rewards/rejected": -2.3478102684020996, "rewards/accuracies": 1.0, "rewards/margins": 3.760685920715332, "logps/chosen": -326.9678649902344, "logps/rejected": -388.4164123535156, "logits/chosen": 4.4989237785339355, "logits/rejected": 4.673248291015625} +{"ts": "2025-12-26T16:35:26", "event": "train_log", "step": 92, "epoch": 0.10744525547445255, "progress_pct": 3.58, "epoch_pct": 3.58, "eta": "17:00:34", "max_grad_norm": 1.0, "loss": 0.04481709748506546, "grad_norm": 0.6699568629264832, "learning_rate": 1.7635658914728684e-05, "rewards/chosen": 1.477597713470459, "rewards/rejected": -2.9012341499328613, "rewards/accuracies": 1.0, "rewards/margins": 4.37883186340332, "logps/chosen": -362.7267150878906, "logps/rejected": -439.2985534667969, "logits/chosen": 4.7294535636901855, "logits/rejected": 4.813880920410156} +{"ts": "2025-12-26T16:35:47", "event": "train_log", "step": 94, "epoch": 0.10978102189781022, "progress_pct": 3.66, "epoch_pct": 3.66, "eta": "16:47:17", "max_grad_norm": 1.0, "loss": 0.05632612109184265, "grad_norm": 0.4152977168560028, "learning_rate": 1.802325581395349e-05, "rewards/chosen": 0.71366286277771, "rewards/rejected": -2.744809150695801, "rewards/accuracies": 1.0, "rewards/margins": 3.4584720134735107, "logps/chosen": -381.59246826171875, "logps/rejected": -444.2817687988281, "logits/chosen": 4.785149574279785, "logits/rejected": 4.891542434692383} +{"ts": "2025-12-26T16:36:07", "event": "train_log", "step": 96, "epoch": 0.11211678832116788, "progress_pct": 3.73, "epoch_pct": 3.74, "eta": "16:34:09", "max_grad_norm": 1.0, "loss": 0.040920041501522064, "grad_norm": 0.3152717649936676, "learning_rate": 1.8410852713178295e-05, "rewards/chosen": 1.7566397190093994, "rewards/rejected": -2.263956069946289, "rewards/accuracies": 1.0, "rewards/margins": 4.020595550537109, "logps/chosen": -356.7286376953125, "logps/rejected": -414.69635009765625, "logits/chosen": 4.603940486907959, "logits/rejected": 4.804995536804199} +{"ts": "2025-12-26T16:36:26", "event": "train_log", "step": 98, "epoch": 0.11445255474452555, "progress_pct": 3.81, "epoch_pct": 3.82, "eta": "16:21:07", "max_grad_norm": 1.0, "loss": 0.025794224813580513, "grad_norm": 0.37698569893836975, "learning_rate": 1.8798449612403103e-05, "rewards/chosen": 1.3867536783218384, "rewards/rejected": -3.2675204277038574, "rewards/accuracies": 1.0, "rewards/margins": 4.6542744636535645, "logps/chosen": -339.794189453125, "logps/rejected": -413.8865966796875, "logits/chosen": 4.558542728424072, "logits/rejected": 4.690641403198242} +{"ts": "2025-12-26T16:36:46", "event": "train_log", "step": 100, "epoch": 0.11678832116788321, "progress_pct": 3.89, "epoch_pct": 3.89, "eta": "16:09:03", "max_grad_norm": 1.0, "loss": 0.015155203640460968, "grad_norm": 0.15023073554039001, "learning_rate": 1.918604651162791e-05, "rewards/chosen": 1.7938623428344727, "rewards/rejected": -3.1486666202545166, "rewards/accuracies": 1.0, "rewards/margins": 4.942529201507568, "logps/chosen": -346.2568054199219, "logps/rejected": -418.9315185546875, "logits/chosen": 4.387497425079346, "logits/rejected": 4.494588375091553} +{"ts": "2025-12-26T16:44:21", "event": "train_log", "step": 100, "epoch": 0.11678832116788321, "progress_pct": 3.89, "epoch_pct": 3.89, "eta": "19:16:19", "max_grad_norm": 1.0, "eval_loss": 0.04428481683135033, "eval_runtime": 454.7251, "eval_samples_per_second": 1.676, "eval_steps_per_second": 1.676, "eval_rewards/chosen": 1.7248634099960327, "eval_rewards/rejected": -2.863647222518921, "eval_rewards/accuracies": 0.9921259880065918, "eval_rewards/margins": 4.588510513305664, "eval_logps/chosen": -353.15850830078125, "eval_logps/rejected": -424.4124755859375, "eval_logits/chosen": 4.285891056060791, "eval_logits/rejected": 4.425926208496094} +{"ts": "2025-12-26T16:44:42", "event": "train_log", "step": 102, "epoch": 0.11912408759124088, "progress_pct": 3.97, "epoch_pct": 3.97, "eta": "19:01:01", "max_grad_norm": 1.0, "loss": 0.01589718647301197, "grad_norm": 0.21237261593341827, "learning_rate": 1.9573643410852714e-05, "rewards/chosen": 1.7697646617889404, "rewards/rejected": -3.025937557220459, "rewards/accuracies": 1.0, "rewards/margins": 4.79570198059082, "logps/chosen": -305.01165771484375, "logps/rejected": -384.8538818359375, "logits/chosen": 4.197369575500488, "logits/rejected": 4.352917671203613} +{"ts": "2025-12-26T16:45:01", "event": "train_log", "step": 104, "epoch": 0.12145985401459854, "progress_pct": 4.05, "epoch_pct": 4.05, "eta": "18:45:50", "max_grad_norm": 1.0, "loss": 0.038177840411663055, "grad_norm": 1.1960583925247192, "learning_rate": 1.996124031007752e-05, "rewards/chosen": 1.556309461593628, "rewards/rejected": -3.2670814990997314, "rewards/accuracies": 1.0, "rewards/margins": 4.823390960693359, "logps/chosen": -341.10675048828125, "logps/rejected": -417.59613037109375, "logits/chosen": 4.184627056121826, "logits/rejected": 4.280352592468262} +{"ts": "2025-12-26T16:45:21", "event": "train_log", "step": 106, "epoch": 0.12379562043795621, "progress_pct": 4.12, "epoch_pct": 4.13, "eta": "18:31:17", "max_grad_norm": 1.0, "loss": 0.056792374700307846, "grad_norm": 1.3021241426467896, "learning_rate": 2.0348837209302328e-05, "rewards/chosen": 1.6538318395614624, "rewards/rejected": -3.0760293006896973, "rewards/accuracies": 1.0, "rewards/margins": 4.729861736297607, "logps/chosen": -358.1336669921875, "logps/rejected": -426.8945617675781, "logits/chosen": 4.32430362701416, "logits/rejected": 4.451810359954834} +{"ts": "2025-12-26T16:45:41", "event": "train_log", "step": 108, "epoch": 0.12613138686131387, "progress_pct": 4.2, "epoch_pct": 4.2, "eta": "18:17:24", "max_grad_norm": 1.0, "loss": 0.07614695280790329, "grad_norm": 0.3007296025753021, "learning_rate": 2.0736434108527133e-05, "rewards/chosen": 1.4121378660202026, "rewards/rejected": -3.331850051879883, "rewards/accuracies": 0.9375, "rewards/margins": 4.743987560272217, "logps/chosen": -364.4995422363281, "logps/rejected": -434.4844055175781, "logits/chosen": 4.4918341636657715, "logits/rejected": 4.6333909034729} +{"ts": "2025-12-26T16:45:59", "event": "train_log", "step": 110, "epoch": 0.12846715328467154, "progress_pct": 4.28, "epoch_pct": 4.28, "eta": "18:03:33", "max_grad_norm": 1.0, "loss": 0.014600476250052452, "grad_norm": 0.42474085092544556, "learning_rate": 2.1124031007751938e-05, "rewards/chosen": 1.958223819732666, "rewards/rejected": -4.051264762878418, "rewards/accuracies": 1.0, "rewards/margins": 6.009488582611084, "logps/chosen": -306.4935607910156, "logps/rejected": -392.5444030761719, "logits/chosen": 3.857876777648926, "logits/rejected": 3.9678285121917725} +{"ts": "2025-12-26T16:46:19", "event": "train_log", "step": 112, "epoch": 0.1308029197080292, "progress_pct": 4.36, "epoch_pct": 4.36, "eta": "17:50:24", "max_grad_norm": 1.0, "loss": 0.010151136666536331, "grad_norm": 0.14177864789962769, "learning_rate": 2.1511627906976744e-05, "rewards/chosen": 2.196099281311035, "rewards/rejected": -3.75758695602417, "rewards/accuracies": 1.0, "rewards/margins": 5.953686237335205, "logps/chosen": -339.5606689453125, "logps/rejected": -425.51361083984375, "logits/chosen": 4.254065036773682, "logits/rejected": 4.352800369262695} +{"ts": "2025-12-26T16:46:40", "event": "train_log", "step": 114, "epoch": 0.13313868613138685, "progress_pct": 4.43, "epoch_pct": 4.44, "eta": "17:38:30", "max_grad_norm": 1.0, "loss": 0.011391772888600826, "grad_norm": 0.29438889026641846, "learning_rate": 2.1899224806201552e-05, "rewards/chosen": 1.9949897527694702, "rewards/rejected": -3.3917016983032227, "rewards/accuracies": 1.0, "rewards/margins": 5.386691093444824, "logps/chosen": -349.3886413574219, "logps/rejected": -431.79925537109375, "logits/chosen": 3.7171452045440674, "logits/rejected": 3.9224042892456055} +{"ts": "2025-12-26T16:47:00", "event": "train_log", "step": 116, "epoch": 0.13547445255474452, "progress_pct": 4.51, "epoch_pct": 4.52, "eta": "17:26:32", "max_grad_norm": 1.0, "loss": 0.024509863927960396, "grad_norm": 0.9541389346122742, "learning_rate": 2.2286821705426357e-05, "rewards/chosen": 1.8549680709838867, "rewards/rejected": -3.4747393131256104, "rewards/accuracies": 1.0, "rewards/margins": 5.329707622528076, "logps/chosen": -343.19482421875, "logps/rejected": -423.23565673828125, "logits/chosen": 3.5138039588928223, "logits/rejected": 3.7400965690612793} +{"ts": "2025-12-26T16:47:21", "event": "train_log", "step": 118, "epoch": 0.1378102189781022, "progress_pct": 4.59, "epoch_pct": 4.59, "eta": "17:15:03", "max_grad_norm": 1.0, "loss": 0.007583940401673317, "grad_norm": 0.45693957805633545, "learning_rate": 2.2674418604651163e-05, "rewards/chosen": 2.130192518234253, "rewards/rejected": -4.364559650421143, "rewards/accuracies": 1.0, "rewards/margins": 6.494752407073975, "logps/chosen": -382.1067810058594, "logps/rejected": -480.72265625, "logits/chosen": 3.9002795219421387, "logits/rejected": 3.9630849361419678} +{"ts": "2025-12-26T16:47:41", "event": "train_log", "step": 120, "epoch": 0.14014598540145987, "progress_pct": 4.67, "epoch_pct": 4.67, "eta": "17:03:44", "max_grad_norm": 1.0, "loss": 0.007748167496174574, "grad_norm": 0.20826944708824158, "learning_rate": 2.3062015503875968e-05, "rewards/chosen": 1.398924469947815, "rewards/rejected": -4.580015182495117, "rewards/accuracies": 1.0, "rewards/margins": 5.978940010070801, "logps/chosen": -355.2779541015625, "logps/rejected": -436.54022216796875, "logits/chosen": 3.7722253799438477, "logits/rejected": 3.939023494720459} +{"ts": "2025-12-26T16:48:01", "event": "train_log", "step": 122, "epoch": 0.1424817518248175, "progress_pct": 4.75, "epoch_pct": 4.75, "eta": "16:52:48", "max_grad_norm": 1.0, "loss": 0.014359460212290287, "grad_norm": 0.21926206350326538, "learning_rate": 2.3449612403100777e-05, "rewards/chosen": 1.1770455837249756, "rewards/rejected": -5.205078125, "rewards/accuracies": 1.0, "rewards/margins": 6.382123947143555, "logps/chosen": -327.1947326660156, "logps/rejected": -414.7738037109375, "logits/chosen": 3.656745672225952, "logits/rejected": 3.875434160232544} +{"ts": "2025-12-26T16:48:21", "event": "train_log", "step": 124, "epoch": 0.14481751824817518, "progress_pct": 4.82, "epoch_pct": 4.83, "eta": "16:42:21", "max_grad_norm": 1.0, "loss": 0.007621760480105877, "grad_norm": 0.03550998866558075, "learning_rate": 2.3837209302325582e-05, "rewards/chosen": 0.7802913188934326, "rewards/rejected": -6.6151628494262695, "rewards/accuracies": 1.0, "rewards/margins": 7.395453453063965, "logps/chosen": -369.8974304199219, "logps/rejected": -473.97283935546875, "logits/chosen": 3.659773826599121, "logits/rejected": 3.725044012069702} +{"ts": "2025-12-26T16:56:05", "event": "train_log", "step": 125, "epoch": 0.145985401459854, "progress_pct": 4.86, "epoch_pct": 4.87, "eta": "19:05:16", "max_grad_norm": 1.0, "eval_loss": 0.024107323959469795, "eval_runtime": 454.8045, "eval_samples_per_second": 1.675, "eval_steps_per_second": 1.675, "eval_rewards/chosen": 0.5319492816925049, "eval_rewards/rejected": -6.150709629058838, "eval_rewards/accuracies": 0.9934383034706116, "eval_rewards/margins": 6.682660102844238, "eval_logps/chosen": -365.087646484375, "eval_logps/rejected": -457.28314208984375, "eval_logits/chosen": 3.6694726943969727, "eval_logits/rejected": 3.8436598777770996} +{"ts": "2025-12-26T16:56:15", "event": "train_log", "step": 126, "epoch": 0.14715328467153285, "progress_pct": 4.9, "epoch_pct": 4.91, "eta": "18:59:00", "max_grad_norm": 1.0, "loss": 0.005531508009880781, "grad_norm": 0.21691419184207916, "learning_rate": 2.4224806201550387e-05, "rewards/chosen": 0.9075853824615479, "rewards/rejected": -7.027284622192383, "rewards/accuracies": 1.0, "rewards/margins": 7.934869289398193, "logps/chosen": -345.18023681640625, "logps/rejected": -454.6177978515625, "logits/chosen": 3.777791738510132, "logits/rejected": 3.7573630809783936} +{"ts": "2025-12-26T16:56:36", "event": "train_log", "step": 128, "epoch": 0.14948905109489052, "progress_pct": 4.98, "epoch_pct": 4.98, "eta": "18:46:50", "max_grad_norm": 1.0, "loss": 0.0008547124452888966, "grad_norm": 0.0514506921172142, "learning_rate": 2.4612403100775196e-05, "rewards/chosen": 1.0864180326461792, "rewards/rejected": -6.75621223449707, "rewards/accuracies": 1.0, "rewards/margins": 7.842630863189697, "logps/chosen": -376.30023193359375, "logps/rejected": -486.30615234375, "logits/chosen": 3.6862380504608154, "logits/rejected": 3.77681827545166} +{"ts": "2025-12-26T16:56:54", "event": "train_log", "step": 130, "epoch": 0.15182481751824817, "progress_pct": 5.06, "epoch_pct": 5.06, "eta": "18:34:28", "max_grad_norm": 1.0, "loss": 0.019211476668715477, "grad_norm": 1.0013993978500366, "learning_rate": 2.5e-05, "rewards/chosen": 0.7987843751907349, "rewards/rejected": -6.31362247467041, "rewards/accuracies": 1.0, "rewards/margins": 7.1124067306518555, "logps/chosen": -330.2728271484375, "logps/rejected": -420.1920166015625, "logits/chosen": 3.8558738231658936, "logits/rejected": 4.067385673522949} +{"ts": "2025-12-26T16:57:14", "event": "train_log", "step": 132, "epoch": 0.15416058394160584, "progress_pct": 5.13, "epoch_pct": 5.14, "eta": "18:22:39", "max_grad_norm": 1.0, "loss": 0.005051509942859411, "grad_norm": 0.2909312844276428, "learning_rate": 2.5387596899224806e-05, "rewards/chosen": 0.7269927859306335, "rewards/rejected": -6.733253479003906, "rewards/accuracies": 1.0, "rewards/margins": 7.4602460861206055, "logps/chosen": -346.632568359375, "logps/rejected": -448.4364318847656, "logits/chosen": 3.8564915657043457, "logits/rejected": 4.106588363647461} +{"ts": "2025-12-26T16:57:34", "event": "train_log", "step": 134, "epoch": 0.1564963503649635, "progress_pct": 5.21, "epoch_pct": 5.22, "eta": "18:11:24", "max_grad_norm": 1.0, "loss": 0.029044320806860924, "grad_norm": 0.10341063886880875, "learning_rate": 2.5775193798449615e-05, "rewards/chosen": -0.15517807006835938, "rewards/rejected": -7.108559608459473, "rewards/accuracies": 1.0, "rewards/margins": 6.953381538391113, "logps/chosen": -398.5205078125, "logps/rejected": -493.3382568359375, "logits/chosen": 3.6923415660858154, "logits/rejected": 3.8758797645568848} +{"ts": "2025-12-26T16:57:55", "event": "train_log", "step": 136, "epoch": 0.15883211678832118, "progress_pct": 5.29, "epoch_pct": 5.29, "eta": "18:00:50", "max_grad_norm": 1.0, "loss": 0.008300668559968472, "grad_norm": 0.40827327966690063, "learning_rate": 2.616279069767442e-05, "rewards/chosen": 0.3356212377548218, "rewards/rejected": -6.36344051361084, "rewards/accuracies": 1.0, "rewards/margins": 6.699062347412109, "logps/chosen": -420.5874328613281, "logps/rejected": -511.71661376953125, "logits/chosen": 3.701347827911377, "logits/rejected": 3.8854856491088867} +{"ts": "2025-12-26T16:58:15", "event": "train_log", "step": 138, "epoch": 0.16116788321167883, "progress_pct": 5.37, "epoch_pct": 5.37, "eta": "17:50:12", "max_grad_norm": 1.0, "loss": 0.010793081484735012, "grad_norm": 0.17690710723400116, "learning_rate": 2.655038759689923e-05, "rewards/chosen": 1.5296471118927002, "rewards/rejected": -5.799461364746094, "rewards/accuracies": 1.0, "rewards/margins": 7.329109191894531, "logps/chosen": -361.59417724609375, "logps/rejected": -455.0230407714844, "logits/chosen": 3.476500988006592, "logits/rejected": 3.6296494007110596} +{"ts": "2025-12-26T16:58:35", "event": "train_log", "step": 140, "epoch": 0.1635036496350365, "progress_pct": 5.45, "epoch_pct": 5.45, "eta": "17:39:44", "max_grad_norm": 1.0, "loss": 0.012777667492628098, "grad_norm": 0.15591435134410858, "learning_rate": 2.693798449612403e-05, "rewards/chosen": 1.2819935083389282, "rewards/rejected": -6.812654972076416, "rewards/accuracies": 1.0, "rewards/margins": 8.094648361206055, "logps/chosen": -379.35333251953125, "logps/rejected": -490.0003356933594, "logits/chosen": 3.440129518508911, "logits/rejected": 3.5673890113830566} +{"ts": "2025-12-26T16:58:56", "event": "train_log", "step": 142, "epoch": 0.16583941605839417, "progress_pct": 5.52, "epoch_pct": 5.53, "eta": "17:29:50", "max_grad_norm": 1.0, "loss": 0.007118214387446642, "grad_norm": 0.820688009262085, "learning_rate": 2.7325581395348836e-05, "rewards/chosen": 1.4685018062591553, "rewards/rejected": -6.558856964111328, "rewards/accuracies": 1.0, "rewards/margins": 8.027359008789062, "logps/chosen": -402.8253479003906, "logps/rejected": -506.32000732421875, "logits/chosen": 3.23529052734375, "logits/rejected": 3.393266201019287} diff --git a/dpo_qwen_14B/wandb/debug-internal.log b/dpo_qwen_14B/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..4997f9ce61d06da20afbeba3cfa2eb77964556df --- /dev/null +++ b/dpo_qwen_14B/wandb/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-26T15:56:50.297401502Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-26T15:56:50.452320078Z","level":"INFO","msg":"stream: created new stream","id":"wbzoafvt"} +{"time":"2025-12-26T15:56:50.452494836Z","level":"INFO","msg":"handler: started","stream_id":"wbzoafvt"} +{"time":"2025-12-26T15:56:50.452572405Z","level":"INFO","msg":"stream: started","id":"wbzoafvt"} +{"time":"2025-12-26T15:56:50.452599156Z","level":"INFO","msg":"writer: started","stream_id":"wbzoafvt"} +{"time":"2025-12-26T15:56:50.452607804Z","level":"INFO","msg":"sender: started","stream_id":"wbzoafvt"} +{"time":"2025-12-26T16:59:00.070531235Z","level":"INFO","msg":"stream: closing","id":"wbzoafvt"} +{"time":"2025-12-26T16:59:00.346670237Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-26T16:59:00.473496131Z","level":"INFO","msg":"handler: closed","stream_id":"wbzoafvt"} +{"time":"2025-12-26T16:59:00.473589831Z","level":"INFO","msg":"sender: closed","stream_id":"wbzoafvt"} +{"time":"2025-12-26T16:59:00.473602236Z","level":"INFO","msg":"stream: closed","id":"wbzoafvt"} diff --git a/dpo_qwen_14B/wandb/debug.log b/dpo_qwen_14B/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..77992573ecd912291c2ced9226296badd871bb37 --- /dev/null +++ b/dpo_qwen_14B/wandb/debug.log @@ -0,0 +1,26 @@ +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Configure stats pid to 148906 +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO-14b/wandb/settings +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_14b_v1/wandb/run-20251226_155650-wbzoafvt/logs/debug.log +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_14b_v1/wandb/run-20251226_155650-wbzoafvt/logs/debug-internal.log +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:init():841] calling init triggers +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'model': {'repo_id': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with \n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_14b_v1', '_wandb': {}} +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:init():889] starting backend +2025-12-26 15:56:50,290 INFO MainThread:148906 [wandb_init.py:init():892] sending inform_init request +2025-12-26 15:56:50,295 INFO MainThread:148906 [wandb_init.py:init():900] backend started and connected +2025-12-26 15:56:50,297 INFO MainThread:148906 [wandb_init.py:init():970] updated telemetry +2025-12-26 15:56:50,297 INFO MainThread:148906 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-26 15:56:50,648 INFO MainThread:148906 [wandb_init.py:init():1041] starting run threads in backend +2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_console_start():2521] atexit reg +2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-26 15:56:50,762 INFO MainThread:148906 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-26 15:57:33,783 INFO MainThread:148906 [wandb_run.py:_config_callback():1396] config_cb None None {'peft_config': {'default': {'task_type': 'CAUSAL_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.18.0', 'base_model_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'inference_mode': False, 'r': 16, 'target_modules': ['k_proj', 'o_proj', 'v_proj', 'q_proj'], 'exclude_modules': None, 'lora_alpha': 32, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 152064, 'max_position_embeddings': 32768, 'hidden_size': 5120, 'intermediate_size': 13824, 'num_hidden_layers': 48, 'num_attention_heads': 40, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 48, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'attention_dropout': 0.0, 'layer_types': ['full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention'], 'rope_parameters': {'rope_theta': 1000000.0, 'rope_type': 'default'}, 'return_dict': True, 'output_hidden_states': False, 'dtype': 'bfloat16', 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 151643, 'eos_token_id': 151643, 'sep_token_id': None, 'decoder_start_token_id': None, '_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'transformers_version': '5.0.0.dev0', 'model_type': 'qwen2', 'output_attentions': False, 'output_dir': 'runs/dpo_run_14b_v1', 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': None, 'warmup_ratio': 0.1, 'warmup_steps': 0.1, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': None, 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 2, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'enable_jit_checkpoint': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'use_cpu': False, 'seed': 42, 'data_seed': None, 'bf16': True, 'fp16': False, 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': -1, 'ddp_backend': None, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 25, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'auto_find_batch_size': False, 'full_determinism': False, 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_init_kwargs': None, 'ref_model_init_kwargs': None, 'model_adapter_name': None, 'ref_adapter_name': None, 'force_use_ref_model': False, 'disable_dropout': True, 'use_logits_to_keep': False, 'dataset_num_proc': None, 'pad_token': '', 'label_pad_token_id': -100, 'max_prompt_length': 1024, 'max_completion_length': None, 'max_length': 2048, 'truncation_mode': 'keep_end', 'padding_free': False, 'precompute_ref_log_probs': False, 'precompute_ref_batch_size': None, 'tools': None, 'loss_type': 'sigmoid', 'use_liger_loss': None, 'base_model_attribute_name': 'model', 'beta': 0.1, 'f_divergence_type': 'reverse_kl', 'f_alpha_divergence_coef': 1.0, 'reference_free': False, 'label_smoothing': 0.0, 'use_weighting': False, 'rpo_alpha': None, 'ld_alpha': None, 'discopop_tau': 0.05, 'loss_weights': None, 'sync_ref_model': False, 'ref_model_mixup_alpha': 0.6, 'ref_model_sync_steps': 512, 'generate_during_eval': False} +2025-12-26 15:57:33,791 INFO MainThread:148906 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 14795199488 - > +2025-12-26 15:57:33,792 INFO MainThread:148906 [wandb_run.py:_config_callback():1396] config_cb model/num_parameters 14795199488 None +2025-12-26 16:59:00,070 INFO wandb-AsyncioManager-main:148906 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-26 16:59:00,070 INFO wandb-AsyncioManager-main:148906 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/config.yaml b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7498bfade34f178d61751b3fa0096164dc7cfe2 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/config.yaml @@ -0,0 +1,661 @@ +_name_or_path: + value: ../../Models/Qwen2.5-Coder-14B-CPT-SFT +_wandb: + value: + cli_version: 0.23.1 + e: + ce8b9zq5sbh73okdbbvozze07ayjamtf: + args: + - --config + - config_dpo.yaml + codePath: run_dpo.py + codePathLocal: run_dpo.py + cpu_count: 12 + cpu_count_logical: 24 + cudaVersion: "13.0" + disk: + /: + total: "791251738624" + used: "314755911680" + email: shaiksirajuddin9949@gmail.com + executable: /workspace/llm_finetuning_env/bin/python + gpu: NVIDIA A100-SXM4-80GB + gpu_count: 2 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40 + host: a100-2gpu-shell-session-757d587799-mfdvv + memory: + total: "359047892992" + os: Linux-6.12.46+-x86_64-with-glibc2.35 + program: /workspace/trainer-kit/DPO-14b/run_dpo.py + python: CPython 3.10.12 + root: runs/dpo_run_14b_v1 + startedAt: "2025-12-26T15:23:32.328004Z" + writerId: ce8b9zq5sbh73okdbbvozze07ayjamtf + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.10.12 + t: + "1": + - 1 + - 11 + - 41 + - 49 + - 51 + - 71 + - 84 + - 98 + "2": + - 1 + - 11 + - 41 + - 49 + - 51 + - 71 + - 84 + - 98 + "3": + - 7 + - 15 + - 16 + - 19 + - 66 + "4": 3.10.12 + "5": 0.23.1 + "6": 5.0.0.dev0 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - Qwen2ForCausalLM +attention_dropout: + value: 0 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +base_model_attribute_name: + value: model +batch_eval_metrics: + value: false +beta: + value: 0.1 +bf16: + value: true +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +cross_attention_hidden_size: + value: null +data: + value: + chosen_field: chosen + eval_jsonl: null + eval_split_ratio: 0.1 + format_type: chatml + max_length: 2048 + num_proc: 4 + prompt_field: prompt + rejected_field: rejected + score_field: f1_score + shuffle: true + system_prompt: | + You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task. + + ## Output Format + + ##OUTPUT + Explain the data flow and why each component must change: + - Flow: [Input → Processing → Output with arrows] + - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]" + - Explain coupling between components + + ##SELECT + modify::crates/path/to/file.rs::impl::ComponentName + add::crates/another/file.rs::function::AnotherComponent + + + ## Rules + + 1. Use full paths: `remove::crates/folder/file.rs::Type::Name` + 2. Use `::` for nested items: `status::StructName::Type::Name` + 3. Always explain "must change because" and "without this" + 3. Types of components: function, struct, enum, impl, trait + 4. If there is extra information (e.g., enum variants), include that too. + 5. Start with ##OUTPUT, end with ##SELECT, terminate with + train_jsonl: dpo_pairs_generated.jsonl +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +dataset_num_proc: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_dropout: + value: true +disable_tqdm: + value: false +discopop_tau: + value: 0.05 +do_eval: + value: true +do_predict: + value: false +do_train: + value: false +dpo: + value: + beta: 0.1 + label_smoothing: 0 + loss_type: sigmoid + reference_free: false + use_reference_model: true +dtype: + value: bfloat16 +enable_jit_checkpoint: + value: false +eos_token_id: + value: 151643 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: 25 +eval_strategy: + value: steps +eval_use_gather_object: + value: false +f_alpha_divergence_coef: + value: 1 +f_divergence_type: + value: reverse_kl +finetuning_task: + value: null +force_use_ref_model: + value: false +fp16: + value: false +fp16_full_eval: + value: false +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +full_determinism: + value: false +generate_during_eval: + value: false +gradient_accumulation_steps: + value: 8 +gradient_checkpointing: + value: true +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: false +group_by_length: + value: false +hidden_act: + value: silu +hidden_size: + value: 5120 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: null +hub_revision: + value: null +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_num_input_tokens_seen: + value: "no" +initializer_range: + value: 0.02 +intermediate_size: + value: 13824 +is_decoder: + value: false +is_encoder_decoder: + value: false +label_names: + value: null +label_pad_token_id: + value: -100 +label_smoothing: + value: 0 +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_types: + value: + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention +ld_alpha: + value: null +learning_rate: + value: 5e-05 +length_column_name: + value: length +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: -1 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: null +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 2 +logging_strategy: + value: steps +loss_type: + value: sigmoid +loss_weights: + value: null +lr_scheduler_kwargs: + value: null +lr_scheduler_type: + value: cosine +max_completion_length: + value: null +max_grad_norm: + value: 1 +max_length: + value: 2048 +max_position_embeddings: + value: 32768 +max_prompt_length: + value: 1024 +max_steps: + value: -1 +max_window_layers: + value: 48 +metric_for_best_model: + value: eval_loss +model: + value: + attn_implementation: null + base_local_dir: base_model + bnb_4bit_compute_dtype: bfloat16 + bnb_4bit_quant_type: nf4 + bnb_4bit_use_double_quant: false + device_map: auto + repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT + revision: null + tokenizer_use_fast: true + torch_dtype: bfloat16 + trust_remote_code: true + use_4bit: false +model/num_parameters: + value: 14795199488 +model_adapter_name: + value: null +model_init_kwargs: + value: null +model_type: + value: qwen2 +neftune_noise_alpha: + value: null +num_attention_heads: + value: 40 +num_hidden_layers: + value: 48 +num_key_value_heads: + value: 8 +num_train_epochs: + value: 3 +optim: + value: adamw_torch +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: runs/dpo_run_14b_v1 +output_hidden_states: + value: false +pad_token: + value: +pad_token_id: + value: 151643 +padding_free: + value: false +parallelism_config: + value: null +peft: + value: + bias: none + enabled: true + lora_alpha: 32 + lora_dropout: 0.05 + r: 16 + target_modules: auto +peft_config: + value: + default: + alora_invocation_tokens: null + arrow_config: null + auto_mapping: null + base_model_name_or_path: ../../Models/Qwen2.5-Coder-14B-CPT-SFT + bias: none + corda_config: null + ensure_weight_tying: false + eva_config: null + exclude_modules: null + fan_in_fan_out: false + inference_mode: false + init_lora_weights: true + layer_replication: null + layers_pattern: null + layers_to_transform: null + lora_alpha: 32 + lora_bias: false + lora_dropout: 0.05 + megatron_config: null + megatron_core: megatron.core + modules_to_save: null + peft_type: LORA + peft_version: 0.18.0 + qalora_group_size: 16 + r: 16 + revision: null + runtime_config: + ephemeral_gpu_offload: false + target_modules: + - v_proj + - k_proj + - o_proj + - q_proj + target_parameters: null + task_type: CAUSAL_LM + trainable_token_indices: null + use_dora: false + use_qalora: false + use_rslora: false +per_device_eval_batch_size: + value: 1 +per_device_train_batch_size: + value: 1 +precompute_ref_batch_size: + value: null +precompute_ref_log_probs: + value: false +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: false +ref_adapter_name: + value: null +ref_model_init_kwargs: + value: null +ref_model_mixup_alpha: + value: 0.6 +ref_model_sync_steps: + value: 512 +reference_free: + value: false +remove_unused_columns: + value: false +report_to: + value: + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +rms_norm_eps: + value: 1e-06 +rope_parameters: + value: + rope_theta: 1e+06 + rope_type: default +rpo_alpha: + value: null +run_dir: + value: runs/dpo_run_14b_v1 +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_steps: + value: 100 +save_strategy: + value: steps +save_total_limit: + value: 10 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +sliding_window: + value: null +sync_ref_model: + value: false +task_specific_params: + value: null +tf32: + value: null +tie_word_embeddings: + value: false +tokenizer_class: + value: null +tools: + value: null +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +trackio_space_id: + value: trackio +train: + value: + early_stopping: + enabled: true + metric: eval_loss + min_delta: 0.001 + mode: min + patience: 5 + eval_steps: 25 + evaluation_strategy: steps + gradient_accumulation_steps: 8 + gradient_checkpointing: true + learning_rate: "5e-5" + load_best_model_at_end: true + logging_steps: 2 + lr_scheduler_type: cosine + max_grad_norm: 1 + num_train_epochs: 3 + optim: adamw_torch + per_device_eval_batch_size: 1 + per_device_train_batch_size: 1 + resume_from_checkpoint: auto + save_steps: 100 + save_strategy: steps + save_total_limit: 10 + warmup_ratio: 0.1 + weight_decay: 0 +transformers_version: + value: 5.0.0.dev0 +truncation_mode: + value: keep_end +use_cache: + value: false +use_cpu: + value: false +use_liger_kernel: + value: false +use_liger_loss: + value: null +use_logits_to_keep: + value: false +use_sliding_window: + value: false +use_weighting: + value: false +vocab_size: + value: 152064 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0.1 +weight_decay: + value: 0 diff --git a/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/output.log b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1454a77fdd1b4e54cd06e3a0a95b63f13311933d --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/output.log @@ -0,0 +1,189 @@ +Wandb initialized: project='dpo-training', name='auto-generated' +`torch_dtype` is deprecated! Use `dtype` instead! +Loading weights: 100%|█████████████████████████████████| 579/579 [00:09<00:00, 61.71it/s, Materializing param=model.norm.weight] +Loading reference model (frozen copy)... +Loading weights: 100%|█████████████████████████████████| 579/579 [00:09<00:00, 61.41it/s, Materializing param=model.norm.weight] +Reference model loaded and frozen +2025-12-26 15:24:00,888 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK" +2025-12-26 15:24:00,903 - INFO - Formatting train DPO data... +2025-12-26 15:24:03,288 - INFO - Train dataset after filtering: 6850 examples +2025-12-26 15:24:03,289 - INFO - train dataset validation passed: 6850 examples +2025-12-26 15:24:03,289 - INFO - Formatting eval DPO data... +2025-12-26 15:24:05,675 - INFO - Eval dataset after filtering: 762 examples +2025-12-26 15:24:05,675 - INFO - eval dataset validation passed: 762 examples +warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead. +Early stopping enabled: patience=5, min_delta=0.001 +2025-12-26 15:24:05,710 - INFO - DPO Training with beta=0.1, loss_type=sigmoid +warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead. +2025-12-26 15:24:15,316 - INFO - Starting DPO training... +The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}. + 0%|▏ | 5/2571 [00:51<7:16:02, 10.20s/it]Traceback (most recent call last): +{'loss': '0.6931', 'grad_norm': '1.242', 'learning_rate': '1.938e-07', 'rewards/chosen': '0', 'rewards/rejected': '0', 'rewards/accuracies': '0', 'rewards/margins': '0', 'logps/chosen': '-368.9', 'logps/rejected': '-398.8', 'logits/chosen': '5.179', 'logits/rejected': '5.193', 'epoch': '0.002336'} +{'loss': '0.6933', 'grad_norm': '1.388', 'learning_rate': '5.814e-07', 'rewards/chosen': '0.02254', 'rewards/rejected': '0.02266', 'rewards/accuracies': '0.5', 'rewards/margins': '-0.0001159', 'logps/chosen': '-338.3', 'logps/rejected': '-366.9', 'logits/chosen': '5.405', 'logits/rejected': '5.456', 'epoch': '0.004672'} + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in + main() + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 928, in main + trainer.train(resume_from_checkpoint=resume_from) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2168, in train + return inner_training_loop( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2535, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs, num_items_in_batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 3807, in training_step + loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1810, in compute_loss + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train") + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1733, in get_batch_loss_metrics + ref_chosen_logps, ref_rejected_logps = self.compute_ref_log_probs(batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 926, in compute_ref_log_probs + ref_model_output = self.concatenated_forward(self.ref_model, batch, is_ref_model=True) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1600, in concatenated_forward + outputs = model(input_ids, **model_kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 819, in forward + return model_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 807, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/peft_model.py", line 1923, in forward + return self.base_model( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 308, in forward + return self.model.forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 810, in wrapper + output = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 477, in forward + outputs: BaseModelOutputWithPast = self.model( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper + outputs = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 412, in forward + hidden_states = decoder_layer( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/modeling_layers.py", line 94, in __call__ + return super().__call__(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapped_forward + output = orig_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 300, in forward + hidden_states, _ = self.self_attn( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 222, in forward + value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/lora/layer.py", line 807, in forward + result = result + lora_B(lora_A(dropout(x))) * scaling + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 125, in forward + return F.linear(input, self.weight, self.bias) +KeyboardInterrupt +Traceback (most recent call last): + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in + main() + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 928, in main + trainer.train(resume_from_checkpoint=resume_from) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2168, in train + return inner_training_loop( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2535, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs, num_items_in_batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 3807, in training_step + loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1810, in compute_loss + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train") + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1733, in get_batch_loss_metrics + ref_chosen_logps, ref_rejected_logps = self.compute_ref_log_probs(batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 926, in compute_ref_log_probs + ref_model_output = self.concatenated_forward(self.ref_model, batch, is_ref_model=True) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1600, in concatenated_forward + outputs = model(input_ids, **model_kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 819, in forward + return model_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 807, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/peft_model.py", line 1923, in forward + return self.base_model( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 308, in forward + return self.model.forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 810, in wrapper + output = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 477, in forward + outputs: BaseModelOutputWithPast = self.model( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper + outputs = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 412, in forward + hidden_states = decoder_layer( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/modeling_layers.py", line 94, in __call__ + return super().__call__(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapped_forward + output = orig_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 300, in forward + hidden_states, _ = self.self_attn( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 222, in forward + value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/lora/layer.py", line 807, in forward + result = result + lora_B(lora_A(dropout(x))) * scaling + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 125, in forward + return F.linear(input, self.weight, self.bias) +KeyboardInterrupt diff --git a/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/requirements.txt b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/requirements.txt @@ -0,0 +1,104 @@ +exceptiongroup==1.3.1 +wheel==0.45.1 +python-dateutil==2.9.0.post0 +nvidia-ml-py==13.580.82 +huggingface_hub==1.2.3 +idna==3.11 +click==8.3.1 +numpy==2.2.6 +httpx==0.28.1 +tokenizers==0.22.1 +sympy==1.13.1 +yarl==1.22.0 +async-timeout==5.0.1 +datasets==4.4.2 +platformdirs==4.5.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-nvtx-cu12==12.1.105 +smmap==5.0.2 +accelerate==1.12.0 +requests==2.32.5 +aiohttp==3.13.2 +bitsandbytes==0.49.0 +nvidia-cublas-cu12==12.1.3.1 +mpmath==1.3.0 +torchaudio==2.5.1+cu121 +nvidia-cuda-runtime-cu12==12.1.105 +typing-inspection==0.4.2 +GitPython==3.1.45 +xxhash==3.6.0 +nvidia-cusolver-cu12==11.4.5.107 +pydantic_core==2.41.5 +six==1.17.0 +torchvision==0.20.1+cu121 +typing_extensions==4.15.0 +triton==3.1.0 +charset-normalizer==3.4.4 +nvitop==1.6.1 +wandb==0.23.1 +regex==2025.11.3 +pip==25.3 +nvidia-cusparse-cu12==12.1.0.106 +pytz==2025.2 +Jinja2==3.1.6 +psutil==7.2.0 +pillow==12.0.0 +packaging==25.0 +safetensors==0.7.0 +sentry-sdk==2.48.0 +gitdb==4.0.12 +httpcore==1.0.9 +setuptools==80.9.0 +nvidia-cufft-cu12==11.0.2.54 +anyio==4.12.0 +transformers==5.0.0.dev0 +pydantic==2.12.5 +fsspec==2025.10.0 +filelock==3.20.0 +PyYAML==6.0.3 +hf-xet==1.2.0 +nvidia-cudnn-cu12==9.1.0.70 +tqdm==4.67.1 +MarkupSafe==2.1.5 +attrs==25.4.0 +nvidia-cuda-nvrtc-cu12==12.1.105 +peft==0.18.0 +aiohappyeyeballs==2.6.1 +networkx==3.4.2 +nvidia-nvjitlink-cu12==12.9.86 +certifi==2025.11.12 +pyarrow==22.0.0 +dill==0.4.0 +protobuf==6.33.2 +aiosignal==1.4.0 +frozenlist==1.8.0 +urllib3==2.6.2 +propcache==0.4.1 +tzdata==2025.3 +pandas==2.3.3 +annotated-types==0.7.0 +shellingham==1.5.4 +nvidia-nccl-cu12==2.21.5 +multidict==6.7.0 +nvidia-curand-cu12==10.3.2.106 +trl==0.26.2 +torch==2.5.1+cu121 +h11==0.16.0 +multiprocess==0.70.18 +typer-slim==0.21.0 +wheel==0.45.1 +tomli==2.0.1 +autocommand==2.2.2 +jaraco.context==5.3.0 +zipp==3.19.2 +packaging==24.2 +inflect==7.3.1 +typing_extensions==4.12.2 +platformdirs==4.2.2 +jaraco.functools==4.0.1 +jaraco.collections==5.1.0 +jaraco.text==3.12.1 +backports.tarfile==1.2.0 +more-itertools==10.3.0 +importlib_metadata==8.0.0 +typeguard==4.3.0 diff --git a/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-metadata.json b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..91ed5775004a6150f2c29c019b1b488c3235219c --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-metadata.json @@ -0,0 +1,47 @@ +{ + "os": "Linux-6.12.46+-x86_64-with-glibc2.35", + "python": "CPython 3.10.12", + "startedAt": "2025-12-26T15:23:32.328004Z", + "args": [ + "--config", + "config_dpo.yaml" + ], + "program": "/workspace/trainer-kit/DPO-14b/run_dpo.py", + "codePath": "run_dpo.py", + "codePathLocal": "run_dpo.py", + "email": "shaiksirajuddin9949@gmail.com", + "root": "runs/dpo_run_14b_v1", + "host": "a100-2gpu-shell-session-757d587799-mfdvv", + "executable": "/workspace/llm_finetuning_env/bin/python", + "cpu_count": 12, + "cpu_count_logical": 24, + "gpu": "NVIDIA A100-SXM4-80GB", + "gpu_count": 2, + "disk": { + "/": { + "total": "791251738624", + "used": "314755911680" + } + }, + "memory": { + "total": "359047892992" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba" + }, + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40" + } + ], + "cudaVersion": "13.0", + "writerId": "ce8b9zq5sbh73okdbbvozze07ayjamtf" +} \ No newline at end of file diff --git a/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-summary.json b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..eecb816fcd8d8160e2c0ccb694689facfda2f431 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-summary.json @@ -0,0 +1 @@ +{"_step":1,"train/logps/rejected":-366.88128662109375,"train/grad_norm":1.3884541988372803,"train/logps/chosen":-338.257568359375,"train/rewards/margins":-0.00011587224435061216,"train/logits/chosen":5.405174255371094,"_wandb":{"runtime":101},"train/loss":0.693317174911499,"train/global_step":4,"train/epoch":0.004671532846715329,"_timestamp":1.7667626963258417e+09,"train/rewards/chosen":0.022540951147675514,"train/logits/rejected":5.456291675567627,"train/rewards/accuracies":0.5,"train/rewards/rejected":0.022656824439764023,"_runtime":101,"train/learning_rate":5.813953488372093e-07} \ No newline at end of file diff --git a/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-core.log b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..9785aee4ef61178b7eb88412a60c6b2ba43d0124 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-26T15:23:32.418743785Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpiwm5qcwf/port-134621.txt","pid":134621,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-26T15:23:32.419487782Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":134621} +{"time":"2025-12-26T15:23:32.419441897Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-134621-134691-43401370/socket","Net":"unix"}} +{"time":"2025-12-26T15:23:32.60107271Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-26T15:23:32.607567183Z","level":"INFO","msg":"handleInformInit: received","streamId":"r9hfat2g","id":"1(@)"} +{"time":"2025-12-26T15:23:32.769941198Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"r9hfat2g","id":"1(@)"} +{"time":"2025-12-26T15:25:14.279920394Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-26T15:25:14.279987785Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-26T15:25:14.280023071Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-26T15:25:14.280085895Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-26T15:25:14.280137634Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-134621-134691-43401370/socket","Net":"unix"}} +{"time":"2025-12-26T15:25:14.643871761Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-26T15:25:14.643905607Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-26T15:25:14.643922133Z","level":"INFO","msg":"server is closed"} diff --git a/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-internal.log b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..eaaccd2f448c287ca25aaf303216806012e225fa --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-26T15:23:32.607728655Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-26T15:23:32.769717362Z","level":"INFO","msg":"stream: created new stream","id":"r9hfat2g"} +{"time":"2025-12-26T15:23:32.769819803Z","level":"INFO","msg":"handler: started","stream_id":"r9hfat2g"} +{"time":"2025-12-26T15:23:32.76993207Z","level":"INFO","msg":"stream: started","id":"r9hfat2g"} +{"time":"2025-12-26T15:23:32.769980394Z","level":"INFO","msg":"sender: started","stream_id":"r9hfat2g"} +{"time":"2025-12-26T15:23:32.769979838Z","level":"INFO","msg":"writer: started","stream_id":"r9hfat2g"} +{"time":"2025-12-26T15:25:14.280016864Z","level":"INFO","msg":"stream: closing","id":"r9hfat2g"} +{"time":"2025-12-26T15:25:14.470499024Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-26T15:25:14.642982392Z","level":"INFO","msg":"handler: closed","stream_id":"r9hfat2g"} +{"time":"2025-12-26T15:25:14.643087783Z","level":"INFO","msg":"sender: closed","stream_id":"r9hfat2g"} +{"time":"2025-12-26T15:25:14.643101377Z","level":"INFO","msg":"stream: closed","id":"r9hfat2g"} diff --git a/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug.log b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0add9fc8128f1e5a6e37d389bbdb89735b111e86 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug.log @@ -0,0 +1,26 @@ +2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Configure stats pid to 134621 +2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO-14b/wandb/settings +2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_14b_v1/wandb/run-20251226_152332-r9hfat2g/logs/debug.log +2025-12-26 15:23:32,330 INFO MainThread:134621 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_14b_v1/wandb/run-20251226_152332-r9hfat2g/logs/debug-internal.log +2025-12-26 15:23:32,330 INFO MainThread:134621 [wandb_init.py:init():841] calling init triggers +2025-12-26 15:23:32,330 INFO MainThread:134621 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'model': {'repo_id': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with \n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_14b_v1', '_wandb': {}} +2025-12-26 15:23:32,330 INFO MainThread:134621 [wandb_init.py:init():889] starting backend +2025-12-26 15:23:32,601 INFO MainThread:134621 [wandb_init.py:init():892] sending inform_init request +2025-12-26 15:23:32,605 INFO MainThread:134621 [wandb_init.py:init():900] backend started and connected +2025-12-26 15:23:32,607 INFO MainThread:134621 [wandb_init.py:init():970] updated telemetry +2025-12-26 15:23:32,608 INFO MainThread:134621 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-26 15:23:32,915 INFO MainThread:134621 [wandb_init.py:init():1041] starting run threads in backend +2025-12-26 15:23:33,025 INFO MainThread:134621 [wandb_run.py:_console_start():2521] atexit reg +2025-12-26 15:23:33,025 INFO MainThread:134621 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-26 15:23:33,025 INFO MainThread:134621 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-26 15:23:33,025 INFO MainThread:134621 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-26 15:23:33,031 INFO MainThread:134621 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-26 15:24:15,664 INFO MainThread:134621 [wandb_run.py:_config_callback():1396] config_cb None None {'peft_config': {'default': {'task_type': 'CAUSAL_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.18.0', 'base_model_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'inference_mode': False, 'r': 16, 'target_modules': ['v_proj', 'k_proj', 'o_proj', 'q_proj'], 'exclude_modules': None, 'lora_alpha': 32, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 152064, 'max_position_embeddings': 32768, 'hidden_size': 5120, 'intermediate_size': 13824, 'num_hidden_layers': 48, 'num_attention_heads': 40, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 48, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'attention_dropout': 0.0, 'layer_types': ['full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention'], 'rope_parameters': {'rope_theta': 1000000.0, 'rope_type': 'default'}, 'return_dict': True, 'output_hidden_states': False, 'dtype': 'bfloat16', 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 151643, 'eos_token_id': 151643, 'sep_token_id': None, 'decoder_start_token_id': None, '_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'transformers_version': '5.0.0.dev0', 'model_type': 'qwen2', 'output_attentions': False, 'output_dir': 'runs/dpo_run_14b_v1', 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': None, 'warmup_ratio': 0.1, 'warmup_steps': 0.1, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': None, 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 2, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'enable_jit_checkpoint': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'use_cpu': False, 'seed': 42, 'data_seed': None, 'bf16': True, 'fp16': False, 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': -1, 'ddp_backend': None, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 25, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'auto_find_batch_size': False, 'full_determinism': False, 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_init_kwargs': None, 'ref_model_init_kwargs': None, 'model_adapter_name': None, 'ref_adapter_name': None, 'force_use_ref_model': False, 'disable_dropout': True, 'use_logits_to_keep': False, 'dataset_num_proc': None, 'pad_token': '', 'label_pad_token_id': -100, 'max_prompt_length': 1024, 'max_completion_length': None, 'max_length': 2048, 'truncation_mode': 'keep_end', 'padding_free': False, 'precompute_ref_log_probs': False, 'precompute_ref_batch_size': None, 'tools': None, 'loss_type': 'sigmoid', 'use_liger_loss': None, 'base_model_attribute_name': 'model', 'beta': 0.1, 'f_divergence_type': 'reverse_kl', 'f_alpha_divergence_coef': 1.0, 'reference_free': False, 'label_smoothing': 0.0, 'use_weighting': False, 'rpo_alpha': None, 'ld_alpha': None, 'discopop_tau': 0.05, 'loss_weights': None, 'sync_ref_model': False, 'ref_model_mixup_alpha': 0.6, 'ref_model_sync_steps': 512, 'generate_during_eval': False} +2025-12-26 15:24:15,672 INFO MainThread:134621 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 14795199488 - > +2025-12-26 15:24:15,672 INFO MainThread:134621 [wandb_run.py:_config_callback():1396] config_cb model/num_parameters 14795199488 None +2025-12-26 15:25:14,280 INFO wandb-AsyncioManager-main:134621 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-26 15:25:14,280 INFO wandb-AsyncioManager-main:134621 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/run-r9hfat2g.wandb b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/run-r9hfat2g.wandb new file mode 100644 index 0000000000000000000000000000000000000000..5e9dafbf0ec5ae269e950363821bada3ef42422a --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/run-r9hfat2g.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e630f821a728a70660f513c24097fbebe6281e9ed349c81fbbf5c9ee24270a +size 515777 diff --git a/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/config.yaml b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c76ed51a2000e653b43d1d83501af93e1d9fa76c --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/config.yaml @@ -0,0 +1,165 @@ +_wandb: + value: + cli_version: 0.23.1 + e: + 62bhwklrbfchpar5wzdaud7re7jdowat: + args: + - --config + - config_dpo.yaml + codePath: run_dpo.py + codePathLocal: run_dpo.py + cpu_count: 12 + cpu_count_logical: 24 + cudaVersion: "13.0" + disk: + /: + total: "791251738624" + used: "316563935232" + email: shaiksirajuddin9949@gmail.com + executable: /workspace/llm_finetuning_env/bin/python + gpu: NVIDIA A100-SXM4-80GB + gpu_count: 2 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40 + host: a100-2gpu-shell-session-757d587799-mfdvv + memory: + total: "359047892992" + os: Linux-6.12.46+-x86_64-with-glibc2.35 + program: /workspace/trainer-kit/DPO-14b/run_dpo.py + python: CPython 3.10.12 + root: runs/dpo_run_14b_v1 + startedAt: "2025-12-26T15:29:36.793485Z" + writerId: 62bhwklrbfchpar5wzdaud7re7jdowat + m: [] + python_version: 3.10.12 + t: + "1": + - 1 + - 11 + - 41 + - 49 + - 51 + - 71 + - 84 + - 98 + "2": + - 1 + - 11 + - 41 + - 49 + - 51 + - 71 + - 84 + - 98 + "3": + - 15 + - 16 + "4": 3.10.12 + "5": 0.23.1 + "6": 5.0.0.dev0 + "12": 0.23.1 + "13": linux-x86_64 +data: + value: + chosen_field: chosen + eval_jsonl: null + eval_split_ratio: 0.1 + format_type: chatml + max_length: 2048 + num_proc: 4 + prompt_field: prompt + rejected_field: rejected + score_field: f1_score + shuffle: true + system_prompt: | + You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task. + + ## Output Format + + ##OUTPUT + Explain the data flow and why each component must change: + - Flow: [Input → Processing → Output with arrows] + - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]" + - Explain coupling between components + + ##SELECT + modify::crates/path/to/file.rs::impl::ComponentName + add::crates/another/file.rs::function::AnotherComponent + + + ## Rules + + 1. Use full paths: `remove::crates/folder/file.rs::Type::Name` + 2. Use `::` for nested items: `status::StructName::Type::Name` + 3. Always explain "must change because" and "without this" + 3. Types of components: function, struct, enum, impl, trait + 4. If there is extra information (e.g., enum variants), include that too. + 5. Start with ##OUTPUT, end with ##SELECT, terminate with + train_jsonl: dpo_pairs_generated.jsonl +dpo: + value: + beta: 0.1 + label_smoothing: 0 + loss_type: sigmoid + reference_free: false + use_reference_model: true +model: + value: + attn_implementation: null + base_local_dir: base_model + bnb_4bit_compute_dtype: bfloat16 + bnb_4bit_quant_type: nf4 + bnb_4bit_use_double_quant: false + device_map: auto + repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT + revision: null + tokenizer_use_fast: true + torch_dtype: bfloat16 + trust_remote_code: true + use_4bit: false +peft: + value: + bias: none + enabled: true + lora_alpha: 32 + lora_dropout: 0.05 + r: 16 + target_modules: auto +run_dir: + value: runs/dpo_run_14b_v1 +train: + value: + early_stopping: + enabled: true + metric: eval_loss + min_delta: 0.001 + mode: min + patience: 5 + eval_steps: 25 + evaluation_strategy: steps + gradient_accumulation_steps: 8 + gradient_checkpointing: true + learning_rate: "5e-5" + load_best_model_at_end: true + logging_steps: 2 + lr_scheduler_type: cosine + max_grad_norm: 1 + num_train_epochs: 3 + optim: adamw_torch + per_device_eval_batch_size: 1 + per_device_train_batch_size: 1 + resume_from_checkpoint: auto + save_steps: 100 + save_strategy: steps + save_total_limit: 10 + warmup_ratio: 0.1 + weight_decay: 0 diff --git a/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/output.log b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..61f84f11f099052e3c7febc15ea084fc65692745 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/output.log @@ -0,0 +1,121 @@ +Wandb initialized: project='dpo-training', name='auto-generated' +`torch_dtype` is deprecated! Use `dtype` instead! +Loading weights: 100%|█████████| 579/579 [00:09<00:00, 61.06it/s, Materializing param=model.norm.weight] +Loading reference model (frozen copy)... +Loading weights: 100%|█████████| 579/579 [00:09<00:00, 62.49it/s, Materializing param=model.norm.weight] +Reference model loaded and frozen +2025-12-26 15:30:05,632 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK" +2025-12-26 15:30:05,647 - INFO - Formatting train DPO data... +2025-12-26 15:30:07,996 - INFO - Train dataset after filtering: 6850 examples +2025-12-26 15:30:07,997 - INFO - train dataset validation passed: 6850 examples +2025-12-26 15:30:07,997 - INFO - Formatting eval DPO data... +2025-12-26 15:30:10,371 - INFO - Eval dataset after filtering: 762 examples +2025-12-26 15:30:10,372 - INFO - eval dataset validation passed: 762 examples +warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead. +Early stopping enabled: patience=5, min_delta=0.001 +2025-12-26 15:30:10,408 - INFO - DPO Training with beta=0.1, loss_type=sigmoid +warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead. +Parameter 'fn_kwargs'={'tokenizer': Qwen2Tokenizer(name_or_path='../../Models/Qwen2.5-Coder-14B-CPT-SFT', vocab_size=151643, model_max_length=32768, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|endoftext|>', 'pad_token': '<|endoftext|>'}, added_tokens_decoder={ + 151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151647: AddedToken("<|object_ref_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151648: AddedToken("<|box_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151649: AddedToken("<|box_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151650: AddedToken("<|quad_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151651: AddedToken("<|quad_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151652: AddedToken("<|vision_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151653: AddedToken("<|vision_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151654: AddedToken("<|vision_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151655: AddedToken("<|image_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151656: AddedToken("<|video_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151657: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151658: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151659: AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151660: AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151661: AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151662: AddedToken("<|fim_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151663: AddedToken("<|repo_name|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), +} +), 'tools': None} of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only shown once. Subsequent hashing failures won't be shown. +2025-12-26 15:30:15,283 - WARNING - Parameter 'fn_kwargs'={'tokenizer': Qwen2Tokenizer(name_or_path='../../Models/Qwen2.5-Coder-14B-CPT-SFT', vocab_size=151643, model_max_length=32768, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|endoftext|>', 'pad_token': '<|endoftext|>'}, added_tokens_decoder={ + 151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151647: AddedToken("<|object_ref_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151648: AddedToken("<|box_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151649: AddedToken("<|box_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151650: AddedToken("<|quad_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151651: AddedToken("<|quad_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151652: AddedToken("<|vision_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151653: AddedToken("<|vision_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151654: AddedToken("<|vision_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151655: AddedToken("<|image_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151656: AddedToken("<|video_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 151657: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151658: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151659: AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151660: AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151661: AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151662: AddedToken("<|fim_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151663: AddedToken("<|repo_name|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), + 151664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False), +} +), 'tools': None} of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only shown once. Subsequent hashing failures won't be shown. +Applying chat template to eval dataset: 100%|████████████████| 762/762 [00:00<00:00, 8054.02 examples/s] +Tokenizing eval dataset: 47%|███████████████▏ | 361/762 [00:01<00:01, 236.68 examples/s] +Traceback (most recent call last): + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in + main() + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 909, in main + trainer = DPOTrainer( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 488, in __init__ + eval_dataset = self._prepare_dataset(eval_dataset, processing_class, args, "eval") + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset + dataset = dataset.map( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map + for rank, done, content in Dataset._map_single(**unprocessed_kwargs): + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single + for i, example in iter_outputs(shard_iterable): + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs + yield i, apply_function(example, i, offset=offset) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function + processed_inputs = function(*fn_args, *additional_args, **fn_kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 714, in tokenize_row + rejected_input_ids = tokenizer(features["rejected"], add_special_tokens=False)["input_ids"] + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2567, in __call__ + encodings = self._encode_plus( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_tokenizers.py", line 833, in _encode_plus + encodings = self._tokenizer.encode_batch( +KeyboardInterrupt +Traceback (most recent call last): + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in + main() + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 909, in main + trainer = DPOTrainer( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 488, in __init__ + eval_dataset = self._prepare_dataset(eval_dataset, processing_class, args, "eval") + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset + dataset = dataset.map( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper + out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map + for rank, done, content in Dataset._map_single(**unprocessed_kwargs): + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single + for i, example in iter_outputs(shard_iterable): + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs + yield i, apply_function(example, i, offset=offset) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function + processed_inputs = function(*fn_args, *additional_args, **fn_kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 714, in tokenize_row + rejected_input_ids = tokenizer(features["rejected"], add_special_tokens=False)["input_ids"] + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2567, in __call__ + encodings = self._encode_plus( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_tokenizers.py", line 833, in _encode_plus + encodings = self._tokenizer.encode_batch( +KeyboardInterrupt diff --git a/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/requirements.txt b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/requirements.txt @@ -0,0 +1,104 @@ +exceptiongroup==1.3.1 +wheel==0.45.1 +python-dateutil==2.9.0.post0 +nvidia-ml-py==13.580.82 +huggingface_hub==1.2.3 +idna==3.11 +click==8.3.1 +numpy==2.2.6 +httpx==0.28.1 +tokenizers==0.22.1 +sympy==1.13.1 +yarl==1.22.0 +async-timeout==5.0.1 +datasets==4.4.2 +platformdirs==4.5.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-nvtx-cu12==12.1.105 +smmap==5.0.2 +accelerate==1.12.0 +requests==2.32.5 +aiohttp==3.13.2 +bitsandbytes==0.49.0 +nvidia-cublas-cu12==12.1.3.1 +mpmath==1.3.0 +torchaudio==2.5.1+cu121 +nvidia-cuda-runtime-cu12==12.1.105 +typing-inspection==0.4.2 +GitPython==3.1.45 +xxhash==3.6.0 +nvidia-cusolver-cu12==11.4.5.107 +pydantic_core==2.41.5 +six==1.17.0 +torchvision==0.20.1+cu121 +typing_extensions==4.15.0 +triton==3.1.0 +charset-normalizer==3.4.4 +nvitop==1.6.1 +wandb==0.23.1 +regex==2025.11.3 +pip==25.3 +nvidia-cusparse-cu12==12.1.0.106 +pytz==2025.2 +Jinja2==3.1.6 +psutil==7.2.0 +pillow==12.0.0 +packaging==25.0 +safetensors==0.7.0 +sentry-sdk==2.48.0 +gitdb==4.0.12 +httpcore==1.0.9 +setuptools==80.9.0 +nvidia-cufft-cu12==11.0.2.54 +anyio==4.12.0 +transformers==5.0.0.dev0 +pydantic==2.12.5 +fsspec==2025.10.0 +filelock==3.20.0 +PyYAML==6.0.3 +hf-xet==1.2.0 +nvidia-cudnn-cu12==9.1.0.70 +tqdm==4.67.1 +MarkupSafe==2.1.5 +attrs==25.4.0 +nvidia-cuda-nvrtc-cu12==12.1.105 +peft==0.18.0 +aiohappyeyeballs==2.6.1 +networkx==3.4.2 +nvidia-nvjitlink-cu12==12.9.86 +certifi==2025.11.12 +pyarrow==22.0.0 +dill==0.4.0 +protobuf==6.33.2 +aiosignal==1.4.0 +frozenlist==1.8.0 +urllib3==2.6.2 +propcache==0.4.1 +tzdata==2025.3 +pandas==2.3.3 +annotated-types==0.7.0 +shellingham==1.5.4 +nvidia-nccl-cu12==2.21.5 +multidict==6.7.0 +nvidia-curand-cu12==10.3.2.106 +trl==0.26.2 +torch==2.5.1+cu121 +h11==0.16.0 +multiprocess==0.70.18 +typer-slim==0.21.0 +wheel==0.45.1 +tomli==2.0.1 +autocommand==2.2.2 +jaraco.context==5.3.0 +zipp==3.19.2 +packaging==24.2 +inflect==7.3.1 +typing_extensions==4.12.2 +platformdirs==4.2.2 +jaraco.functools==4.0.1 +jaraco.collections==5.1.0 +jaraco.text==3.12.1 +backports.tarfile==1.2.0 +more-itertools==10.3.0 +importlib_metadata==8.0.0 +typeguard==4.3.0 diff --git a/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-metadata.json b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d20442dd859e50ad822177d9a90d9b63bcb70b67 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-metadata.json @@ -0,0 +1,47 @@ +{ + "os": "Linux-6.12.46+-x86_64-with-glibc2.35", + "python": "CPython 3.10.12", + "startedAt": "2025-12-26T15:29:36.793485Z", + "args": [ + "--config", + "config_dpo.yaml" + ], + "program": "/workspace/trainer-kit/DPO-14b/run_dpo.py", + "codePath": "run_dpo.py", + "codePathLocal": "run_dpo.py", + "email": "shaiksirajuddin9949@gmail.com", + "root": "runs/dpo_run_14b_v1", + "host": "a100-2gpu-shell-session-757d587799-mfdvv", + "executable": "/workspace/llm_finetuning_env/bin/python", + "cpu_count": 12, + "cpu_count_logical": 24, + "gpu": "NVIDIA A100-SXM4-80GB", + "gpu_count": 2, + "disk": { + "/": { + "total": "791251738624", + "used": "316563935232" + } + }, + "memory": { + "total": "359047892992" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba" + }, + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40" + } + ], + "cudaVersion": "13.0", + "writerId": "62bhwklrbfchpar5wzdaud7re7jdowat" +} \ No newline at end of file diff --git a/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-summary.json b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..27a3da6debdb8b89c8ee34c41b4fd70e72812d25 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":41},"_runtime":41} \ No newline at end of file diff --git a/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-core.log b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..455932b44f85a1a51630c43ac485fcb2bc9b4e97 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-26T15:29:36.871855887Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp2764fn9e/port-137205.txt","pid":137205,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-26T15:29:36.872449374Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":137205} +{"time":"2025-12-26T15:29:36.872451526Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-137205-137352-1482738377/socket","Net":"unix"}} +{"time":"2025-12-26T15:29:37.058666689Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-26T15:29:37.064819569Z","level":"INFO","msg":"handleInformInit: received","streamId":"r1nptay8","id":"1(@)"} +{"time":"2025-12-26T15:29:37.216524061Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"r1nptay8","id":"1(@)"} +{"time":"2025-12-26T15:30:19.248432516Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-26T15:30:19.248506742Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-26T15:30:19.24857928Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-26T15:30:19.248524342Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-26T15:30:19.248647813Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-137205-137352-1482738377/socket","Net":"unix"}} +{"time":"2025-12-26T15:30:19.549751743Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-26T15:30:19.549788501Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-26T15:30:19.549806198Z","level":"INFO","msg":"server is closed"} diff --git a/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-internal.log b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..35b999fae2e657710a055999a217efaaa4339f16 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-26T15:29:37.064937062Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-26T15:29:37.216325813Z","level":"INFO","msg":"stream: created new stream","id":"r1nptay8"} +{"time":"2025-12-26T15:29:37.216413019Z","level":"INFO","msg":"handler: started","stream_id":"r1nptay8"} +{"time":"2025-12-26T15:29:37.216515668Z","level":"INFO","msg":"stream: started","id":"r1nptay8"} +{"time":"2025-12-26T15:29:37.216542759Z","level":"INFO","msg":"writer: started","stream_id":"r1nptay8"} +{"time":"2025-12-26T15:29:37.216565747Z","level":"INFO","msg":"sender: started","stream_id":"r1nptay8"} +{"time":"2025-12-26T15:30:19.248508176Z","level":"INFO","msg":"stream: closing","id":"r1nptay8"} +{"time":"2025-12-26T15:30:19.441030263Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-26T15:30:19.548847282Z","level":"INFO","msg":"handler: closed","stream_id":"r1nptay8"} +{"time":"2025-12-26T15:30:19.548944003Z","level":"INFO","msg":"sender: closed","stream_id":"r1nptay8"} +{"time":"2025-12-26T15:30:19.54895272Z","level":"INFO","msg":"stream: closed","id":"r1nptay8"} diff --git a/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug.log b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..15bc5783aaadc25337e064fb9a91d958cb064e3d --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug.log @@ -0,0 +1,23 @@ +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Configure stats pid to 137205 +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO-14b/wandb/settings +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_14b_v1/wandb/run-20251226_152936-r1nptay8/logs/debug.log +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_14b_v1/wandb/run-20251226_152936-r1nptay8/logs/debug-internal.log +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:init():841] calling init triggers +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'model': {'repo_id': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with \n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_14b_v1', '_wandb': {}} +2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:init():889] starting backend +2025-12-26 15:29:37,058 INFO MainThread:137205 [wandb_init.py:init():892] sending inform_init request +2025-12-26 15:29:37,063 INFO MainThread:137205 [wandb_init.py:init():900] backend started and connected +2025-12-26 15:29:37,065 INFO MainThread:137205 [wandb_init.py:init():970] updated telemetry +2025-12-26 15:29:37,065 INFO MainThread:137205 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-26 15:29:37,469 INFO MainThread:137205 [wandb_init.py:init():1041] starting run threads in backend +2025-12-26 15:29:37,577 INFO MainThread:137205 [wandb_run.py:_console_start():2521] atexit reg +2025-12-26 15:29:37,578 INFO MainThread:137205 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-26 15:29:37,578 INFO MainThread:137205 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-26 15:29:37,578 INFO MainThread:137205 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-26 15:29:37,582 INFO MainThread:137205 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-26 15:30:19,248 INFO wandb-AsyncioManager-main:137205 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-26 15:30:19,248 INFO wandb-AsyncioManager-main:137205 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/run-r1nptay8.wandb b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/run-r1nptay8.wandb new file mode 100644 index 0000000000000000000000000000000000000000..ffef78edcd5df0bc12884d2c6307d0465afea46b --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/run-r1nptay8.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc498b55a73f9c8a0d524a9f073f87ac74b98d8031b36455fd731caf2cff78f +size 403205 diff --git a/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/config.yaml b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1668984ebf34ec782e674ed9a5357877ce5d054a --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/config.yaml @@ -0,0 +1,661 @@ +_name_or_path: + value: ../../Models/Qwen2.5-Coder-14B-CPT-SFT +_wandb: + value: + cli_version: 0.23.1 + e: + afn1h9dtq29ul6sseazq0ojw1mqcn19i: + args: + - --config + - config_dpo.yaml + codePath: run_dpo.py + codePathLocal: run_dpo.py + cpu_count: 12 + cpu_count_logical: 24 + cudaVersion: "13.0" + disk: + /: + total: "791251738624" + used: "323290275840" + email: shaiksirajuddin9949@gmail.com + executable: /workspace/llm_finetuning_env/bin/python + gpu: NVIDIA A100-SXM4-80GB + gpu_count: 2 + gpu_nvidia: + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba + - architecture: Ampere + cudaCores: 6912 + memoryTotal: "85899345920" + name: NVIDIA A100-SXM4-80GB + uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40 + host: a100-2gpu-shell-session-757d587799-mfdvv + memory: + total: "359047892992" + os: Linux-6.12.46+-x86_64-with-glibc2.35 + program: /workspace/trainer-kit/DPO-14b/run_dpo.py + python: CPython 3.10.12 + root: runs/dpo_run_14b_v1 + startedAt: "2025-12-26T15:56:50.015524Z" + writerId: afn1h9dtq29ul6sseazq0ojw1mqcn19i + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.10.12 + t: + "1": + - 1 + - 11 + - 41 + - 49 + - 51 + - 71 + - 84 + - 98 + "2": + - 1 + - 11 + - 41 + - 49 + - 51 + - 71 + - 84 + - 98 + "3": + - 7 + - 15 + - 16 + - 19 + - 66 + "4": 3.10.12 + "5": 0.23.1 + "6": 5.0.0.dev0 + "9": + "1": transformers_trainer + "12": 0.23.1 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - Qwen2ForCausalLM +attention_dropout: + value: 0 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +base_model_attribute_name: + value: model +batch_eval_metrics: + value: false +beta: + value: 0.1 +bf16: + value: true +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +cross_attention_hidden_size: + value: null +data: + value: + chosen_field: chosen + eval_jsonl: null + eval_split_ratio: 0.1 + format_type: chatml + max_length: 2048 + num_proc: 4 + prompt_field: prompt + rejected_field: rejected + score_field: f1_score + shuffle: true + system_prompt: | + You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task. + + ## Output Format + + ##OUTPUT + Explain the data flow and why each component must change: + - Flow: [Input → Processing → Output with arrows] + - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]" + - Explain coupling between components + + ##SELECT + modify::crates/path/to/file.rs::impl::ComponentName + add::crates/another/file.rs::function::AnotherComponent + + + ## Rules + + 1. Use full paths: `remove::crates/folder/file.rs::Type::Name` + 2. Use `::` for nested items: `status::StructName::Type::Name` + 3. Always explain "must change because" and "without this" + 3. Types of components: function, struct, enum, impl, trait + 4. If there is extra information (e.g., enum variants), include that too. + 5. Start with ##OUTPUT, end with ##SELECT, terminate with + train_jsonl: dpo_pairs_generated.jsonl +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +dataset_num_proc: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_dropout: + value: true +disable_tqdm: + value: false +discopop_tau: + value: 0.05 +do_eval: + value: true +do_predict: + value: false +do_train: + value: false +dpo: + value: + beta: 0.1 + label_smoothing: 0 + loss_type: sigmoid + reference_free: false + use_reference_model: true +dtype: + value: bfloat16 +enable_jit_checkpoint: + value: false +eos_token_id: + value: 151643 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: 25 +eval_strategy: + value: steps +eval_use_gather_object: + value: false +f_alpha_divergence_coef: + value: 1 +f_divergence_type: + value: reverse_kl +finetuning_task: + value: null +force_use_ref_model: + value: false +fp16: + value: false +fp16_full_eval: + value: false +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +full_determinism: + value: false +generate_during_eval: + value: false +gradient_accumulation_steps: + value: 8 +gradient_checkpointing: + value: true +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: false +group_by_length: + value: false +hidden_act: + value: silu +hidden_size: + value: 5120 +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: null +hub_revision: + value: null +hub_strategy: + value: every_save +hub_token: + value: +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_num_input_tokens_seen: + value: "no" +initializer_range: + value: 0.02 +intermediate_size: + value: 13824 +is_decoder: + value: false +is_encoder_decoder: + value: false +label_names: + value: null +label_pad_token_id: + value: -100 +label_smoothing: + value: 0 +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_types: + value: + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention + - full_attention +ld_alpha: + value: null +learning_rate: + value: 5e-05 +length_column_name: + value: length +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: -1 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: null +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 2 +logging_strategy: + value: steps +loss_type: + value: sigmoid +loss_weights: + value: null +lr_scheduler_kwargs: + value: null +lr_scheduler_type: + value: cosine +max_completion_length: + value: null +max_grad_norm: + value: 1 +max_length: + value: 2048 +max_position_embeddings: + value: 32768 +max_prompt_length: + value: 1024 +max_steps: + value: -1 +max_window_layers: + value: 48 +metric_for_best_model: + value: eval_loss +model: + value: + attn_implementation: null + base_local_dir: base_model + bnb_4bit_compute_dtype: bfloat16 + bnb_4bit_quant_type: nf4 + bnb_4bit_use_double_quant: false + device_map: auto + repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT + revision: null + tokenizer_use_fast: true + torch_dtype: bfloat16 + trust_remote_code: true + use_4bit: false +model/num_parameters: + value: 14795199488 +model_adapter_name: + value: null +model_init_kwargs: + value: null +model_type: + value: qwen2 +neftune_noise_alpha: + value: null +num_attention_heads: + value: 40 +num_hidden_layers: + value: 48 +num_key_value_heads: + value: 8 +num_train_epochs: + value: 3 +optim: + value: adamw_torch +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: runs/dpo_run_14b_v1 +output_hidden_states: + value: false +pad_token: + value: +pad_token_id: + value: 151643 +padding_free: + value: false +parallelism_config: + value: null +peft: + value: + bias: none + enabled: true + lora_alpha: 32 + lora_dropout: 0.05 + r: 16 + target_modules: auto +peft_config: + value: + default: + alora_invocation_tokens: null + arrow_config: null + auto_mapping: null + base_model_name_or_path: ../../Models/Qwen2.5-Coder-14B-CPT-SFT + bias: none + corda_config: null + ensure_weight_tying: false + eva_config: null + exclude_modules: null + fan_in_fan_out: false + inference_mode: false + init_lora_weights: true + layer_replication: null + layers_pattern: null + layers_to_transform: null + lora_alpha: 32 + lora_bias: false + lora_dropout: 0.05 + megatron_config: null + megatron_core: megatron.core + modules_to_save: null + peft_type: LORA + peft_version: 0.18.0 + qalora_group_size: 16 + r: 16 + revision: null + runtime_config: + ephemeral_gpu_offload: false + target_modules: + - k_proj + - o_proj + - v_proj + - q_proj + target_parameters: null + task_type: CAUSAL_LM + trainable_token_indices: null + use_dora: false + use_qalora: false + use_rslora: false +per_device_eval_batch_size: + value: 1 +per_device_train_batch_size: + value: 1 +precompute_ref_batch_size: + value: null +precompute_ref_log_probs: + value: false +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: false +ref_adapter_name: + value: null +ref_model_init_kwargs: + value: null +ref_model_mixup_alpha: + value: 0.6 +ref_model_sync_steps: + value: 512 +reference_free: + value: false +remove_unused_columns: + value: false +report_to: + value: + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +rms_norm_eps: + value: 1e-06 +rope_parameters: + value: + rope_theta: 1e+06 + rope_type: default +rpo_alpha: + value: null +run_dir: + value: runs/dpo_run_14b_v1 +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_steps: + value: 100 +save_strategy: + value: steps +save_total_limit: + value: 10 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +sliding_window: + value: null +sync_ref_model: + value: false +task_specific_params: + value: null +tf32: + value: null +tie_word_embeddings: + value: false +tokenizer_class: + value: null +tools: + value: null +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +trackio_space_id: + value: trackio +train: + value: + early_stopping: + enabled: true + metric: eval_loss + min_delta: 0.001 + mode: min + patience: 5 + eval_steps: 25 + evaluation_strategy: steps + gradient_accumulation_steps: 8 + gradient_checkpointing: true + learning_rate: "5e-5" + load_best_model_at_end: true + logging_steps: 2 + lr_scheduler_type: cosine + max_grad_norm: 1 + num_train_epochs: 3 + optim: adamw_torch + per_device_eval_batch_size: 1 + per_device_train_batch_size: 1 + resume_from_checkpoint: auto + save_steps: 100 + save_strategy: steps + save_total_limit: 10 + warmup_ratio: 0.1 + weight_decay: 0 +transformers_version: + value: 5.0.0.dev0 +truncation_mode: + value: keep_end +use_cache: + value: false +use_cpu: + value: false +use_liger_kernel: + value: false +use_liger_loss: + value: null +use_logits_to_keep: + value: false +use_sliding_window: + value: false +use_weighting: + value: false +vocab_size: + value: 152064 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0.1 +weight_decay: + value: 0 diff --git a/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/output.log b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7973b262fbf51d26a29c3cf1b5cd88f031d70cd0 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/output.log @@ -0,0 +1,279 @@ +Wandb initialized: project='dpo-training', name='auto-generated' +`torch_dtype` is deprecated! Use `dtype` instead! +Loading weights: 100%|█████████| 579/579 [00:09<00:00, 60.71it/s, Materializing param=model.norm.weight] +Loading reference model (frozen copy)... +Loading weights: 100%|█████████| 579/579 [00:09<00:00, 60.20it/s, Materializing param=model.norm.weight] +Reference model loaded and frozen +2025-12-26 15:57:19,133 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK" +2025-12-26 15:57:19,148 - INFO - Formatting train DPO data... +2025-12-26 15:57:21,512 - INFO - Train dataset after filtering: 6850 examples +2025-12-26 15:57:21,513 - INFO - train dataset validation passed: 6850 examples +2025-12-26 15:57:21,513 - INFO - Formatting eval DPO data... +2025-12-26 15:57:23,870 - INFO - Eval dataset after filtering: 762 examples +2025-12-26 15:57:23,871 - INFO - eval dataset validation passed: 762 examples +warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead. +Early stopping enabled: patience=5, min_delta=0.001 +2025-12-26 15:57:23,907 - INFO - DPO Training with beta=0.1, loss_type=sigmoid +warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead. +2025-12-26 15:57:33,435 - INFO - Starting DPO training... +The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}. + +{'loss': '0.6931', 'grad_norm': '1.243', 'learning_rate': '1.938e-07', 'rewards/chosen': '0', 'rewards/rejected': '0', 'rewards/accuracies': '0', 'rewards/margins': '0', 'logps/chosen': '-368.9', 'logps/rejected': '-398.8', 'logits/chosen': '5.179', 'logits/rejected': '5.193', 'epoch': '0.002336'} +{'loss': '0.695', 'grad_norm': '1.392', 'learning_rate': '5.814e-07', 'rewards/chosen': '0.004505', 'rewards/rejected': '0.007727', 'rewards/accuracies': '0.625', 'rewards/margins': '-0.003223', 'logps/chosen': '-338.4', 'logps/rejected': '-367', 'logits/chosen': '5.404', 'logits/rejected': '5.457', 'epoch': '0.004672'} +{'loss': '0.6892', 'grad_norm': '1.067', 'learning_rate': '9.69e-07', 'rewards/chosen': '-0.003407', 'rewards/rejected': '-0.01166', 'rewards/accuracies': '0.5625', 'rewards/margins': '0.008256', 'logps/chosen': '-362.3', 'logps/rejected': '-387.6', 'logits/chosen': '5.292', 'logits/rejected': '5.328', 'epoch': '0.007007'} +{'loss': '0.6944', 'grad_norm': '1.001', 'learning_rate': '1.357e-06', 'rewards/chosen': '0.01466', 'rewards/rejected': '0.01589', 'rewards/accuracies': '0.375', 'rewards/margins': '-0.001235', 'logps/chosen': '-379.9', 'logps/rejected': '-389.1', 'logits/chosen': '5.323', 'logits/rejected': '5.411', 'epoch': '0.009343'} +{'loss': '0.6933', 'grad_norm': '1.246', 'learning_rate': '1.744e-06', 'rewards/chosen': '-0.0285', 'rewards/rejected': '-0.02862', 'rewards/accuracies': '0.625', 'rewards/margins': '0.0001264', 'logps/chosen': '-363.2', 'logps/rejected': '-389.7', 'logits/chosen': '5.436', 'logits/rejected': '5.495', 'epoch': '0.01168'} +{'loss': '0.6883', 'grad_norm': '1.403', 'learning_rate': '2.132e-06', 'rewards/chosen': '0.01622', 'rewards/rejected': '0.006134', 'rewards/accuracies': '0.5', 'rewards/margins': '0.01009', 'logps/chosen': '-371', 'logps/rejected': '-402.5', 'logits/chosen': '5.355', 'logits/rejected': '5.376', 'epoch': '0.01401'} +{'loss': '0.6897', 'grad_norm': '1.116', 'learning_rate': '2.519e-06', 'rewards/chosen': '-0.01732', 'rewards/rejected': '-0.02465', 'rewards/accuracies': '0.625', 'rewards/margins': '0.007329', 'logps/chosen': '-336.7', 'logps/rejected': '-357.5', 'logits/chosen': '5.515', 'logits/rejected': '5.561', 'epoch': '0.01635'} +{'loss': '0.6904', 'grad_norm': '0.9471', 'learning_rate': '2.907e-06', 'rewards/chosen': '0.0327', 'rewards/rejected': '0.02688', 'rewards/accuracies': '0.5625', 'rewards/margins': '0.005827', 'logps/chosen': '-415.7', 'logps/rejected': '-441.1', 'logits/chosen': '5.553', 'logits/rejected': '5.583', 'epoch': '0.01869'} +{'loss': '0.6836', 'grad_norm': '1.44', 'learning_rate': '3.295e-06', 'rewards/chosen': '0.01102', 'rewards/rejected': '-0.008499', 'rewards/accuracies': '0.5625', 'rewards/margins': '0.01952', 'logps/chosen': '-392.5', 'logps/rejected': '-420.2', 'logits/chosen': '5.441', 'logits/rejected': '5.49', 'epoch': '0.02102'} +{'loss': '0.6902', 'grad_norm': '1.594', 'learning_rate': '3.682e-06', 'rewards/chosen': '0.006536', 'rewards/rejected': '0.0005231', 'rewards/accuracies': '0.5625', 'rewards/margins': '0.006013', 'logps/chosen': '-345.2', 'logps/rejected': '-366', 'logits/chosen': '5.318', 'logits/rejected': '5.398', 'epoch': '0.02336'} +{'loss': '0.6913', 'grad_norm': '1.136', 'learning_rate': '4.07e-06', 'rewards/chosen': '0.01191', 'rewards/rejected': '0.00737', 'rewards/accuracies': '0.5', 'rewards/margins': '0.004538', 'logps/chosen': '-347.9', 'logps/rejected': '-370.7', 'logits/chosen': '5.633', 'logits/rejected': '5.727', 'epoch': '0.02569'} +{'loss': '0.6769', 'grad_norm': '1.068', 'learning_rate': '4.457e-06', 'rewards/chosen': '0.01244', 'rewards/rejected': '-0.02045', 'rewards/accuracies': '0.875', 'rewards/margins': '0.03289', 'logps/chosen': '-347.2', 'logps/rejected': '-377.6', 'logits/chosen': '5.357', 'logits/rejected': '5.406', 'epoch': '0.02803'} + +{'eval_loss': '0.6837', 'eval_runtime': '454.4', 'eval_samples_per_second': '1.677', 'eval_steps_per_second': '1.677', 'eval_rewards/chosen': '0.02464', 'eval_rewards/rejected': '0.005081', 'eval_rewards/accuracies': '0.6654', 'eval_rewards/margins': '0.01956', 'eval_logps/chosen': '-370.2', 'eval_logps/rejected': '-395.7', 'eval_logits/chosen': '5.295', 'eval_logits/rejected': '5.345', 'epoch': '0.0292'} +{'loss': '0.685', 'grad_norm': '1.592', 'learning_rate': '4.845e-06', 'rewards/chosen': '0.02639', 'rewards/rejected': '0.009419', 'rewards/accuracies': '0.625', 'rewards/margins': '0.01697', 'logps/chosen': '-387.5', 'logps/rejected': '-412.1', 'logits/chosen': '5.157', 'logits/rejected': '5.245', 'epoch': '0.03036'} +{'loss': '0.6752', 'grad_norm': '1.318', 'learning_rate': '5.233e-06', 'rewards/chosen': '0.04595', 'rewards/rejected': '0.009191', 'rewards/accuracies': '0.8125', 'rewards/margins': '0.03676', 'logps/chosen': '-360.4', 'logps/rejected': '-391.2', 'logits/chosen': '5.546', 'logits/rejected': '5.544', 'epoch': '0.0327'} +{'loss': '0.6752', 'grad_norm': '1.444', 'learning_rate': '5.62e-06', 'rewards/chosen': '0.04195', 'rewards/rejected': '0.005257', 'rewards/accuracies': '0.8125', 'rewards/margins': '0.03669', 'logps/chosen': '-378.6', 'logps/rejected': '-405.4', 'logits/chosen': '5.136', 'logits/rejected': '5.239', 'epoch': '0.03504'} +{'loss': '0.6701', 'grad_norm': '1.38', 'learning_rate': '6.008e-06', 'rewards/chosen': '0.06658', 'rewards/rejected': '0.01939', 'rewards/accuracies': '0.875', 'rewards/margins': '0.04719', 'logps/chosen': '-358.5', 'logps/rejected': '-382.4', 'logits/chosen': '5.411', 'logits/rejected': '5.427', 'epoch': '0.03737'} +{'loss': '0.6611', 'grad_norm': '1.326', 'learning_rate': '6.395e-06', 'rewards/chosen': '0.07039', 'rewards/rejected': '0.004514', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.06587', 'logps/chosen': '-326.9', 'logps/rejected': '-346.5', 'logits/chosen': '5.207', 'logits/rejected': '5.255', 'epoch': '0.03971'} +{'loss': '0.6282', 'grad_norm': '1.578', 'learning_rate': '6.783e-06', 'rewards/chosen': '0.1174', 'rewards/rejected': '-0.01899', 'rewards/accuracies': '1', 'rewards/margins': '0.1364', 'logps/chosen': '-360', 'logps/rejected': '-384.3', 'logits/chosen': '5.551', 'logits/rejected': '5.637', 'epoch': '0.04204'} +{'loss': '0.6271', 'grad_norm': '1.859', 'learning_rate': '7.171e-06', 'rewards/chosen': '0.1618', 'rewards/rejected': '0.02293', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.1389', 'logps/chosen': '-325.9', 'logps/rejected': '-352', 'logits/chosen': '5.391', 'logits/rejected': '5.412', 'epoch': '0.04438'} +{'loss': '0.6412', 'grad_norm': '1.323', 'learning_rate': '7.558e-06', 'rewards/chosen': '0.1525', 'rewards/rejected': '0.0409', 'rewards/accuracies': '0.875', 'rewards/margins': '0.1116', 'logps/chosen': '-343.4', 'logps/rejected': '-374.8', 'logits/chosen': '5.19', 'logits/rejected': '5.203', 'epoch': '0.04672'} +{'loss': '0.6094', 'grad_norm': '2.533', 'learning_rate': '7.946e-06', 'rewards/chosen': '0.2898', 'rewards/rejected': '0.1082', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.1816', 'logps/chosen': '-341.8', 'logps/rejected': '-372.4', 'logits/chosen': '5.42', 'logits/rejected': '5.453', 'epoch': '0.04905'} +{'loss': '0.5816', 'grad_norm': '1.525', 'learning_rate': '8.333e-06', 'rewards/chosen': '0.3246', 'rewards/rejected': '0.07354', 'rewards/accuracies': '0.8125', 'rewards/margins': '0.2511', 'logps/chosen': '-354.5', 'logps/rejected': '-376.9', 'logits/chosen': '5.384', 'logits/rejected': '5.398', 'epoch': '0.05139'} +{'loss': '0.527', 'grad_norm': '2.081', 'learning_rate': '8.721e-06', 'rewards/chosen': '0.6465', 'rewards/rejected': '0.2707', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.3758', 'logps/chosen': '-331.1', 'logps/rejected': '-362.9', 'logits/chosen': '5.27', 'logits/rejected': '5.287', 'epoch': '0.05372'} +{'loss': '0.5066', 'grad_norm': '1.769', 'learning_rate': '9.109e-06', 'rewards/chosen': '0.6378', 'rewards/rejected': '0.2113', 'rewards/accuracies': '1', 'rewards/margins': '0.4265', 'logps/chosen': '-369.4', 'logps/rejected': '-400.2', 'logits/chosen': '5.473', 'logits/rejected': '5.465', 'epoch': '0.05606'} +{'loss': '0.5293', 'grad_norm': '2.842', 'learning_rate': '9.496e-06', 'rewards/chosen': '0.7923', 'rewards/rejected': '0.4136', 'rewards/accuracies': '1', 'rewards/margins': '0.3787', 'logps/chosen': '-363.5', 'logps/rejected': '-397.8', 'logits/chosen': '5.05', 'logits/rejected': '5.112', 'epoch': '0.05839'} +{'eval_loss': '0.4611', 'eval_runtime': '454.6', 'eval_samples_per_second': '1.676', 'eval_steps_per_second': '1.676', 'eval_rewards/chosen': '0.8944', 'eval_rewards/rejected': '0.3205', 'eval_rewards/accuracies': '0.9619', 'eval_rewards/margins': '0.5739', 'eval_logps/chosen': '-361.5', 'eval_logps/rejected': '-392.6', 'eval_logits/chosen': '5.224', 'eval_logits/rejected': '5.287', 'epoch': '0.05839'} +{'loss': '0.446', 'grad_norm': '1.691', 'learning_rate': '9.884e-06', 'rewards/chosen': '0.987', 'rewards/rejected': '0.3813', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.6057', 'logps/chosen': '-343.5', 'logps/rejected': '-379.4', 'logits/chosen': '5.486', 'logits/rejected': '5.542', 'epoch': '0.06073'} +{'loss': '0.4361', 'grad_norm': '1.946', 'learning_rate': '1.027e-05', 'rewards/chosen': '0.7795', 'rewards/rejected': '0.1529', 'rewards/accuracies': '1', 'rewards/margins': '0.6266', 'logps/chosen': '-379.5', 'logps/rejected': '-401.6', 'logits/chosen': '5.17', 'logits/rejected': '5.269', 'epoch': '0.06307'} +{'loss': '0.3928', 'grad_norm': '2.127', 'learning_rate': '1.066e-05', 'rewards/chosen': '1.274', 'rewards/rejected': '0.4879', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.7864', 'logps/chosen': '-378.1', 'logps/rejected': '-413.3', 'logits/chosen': '5.097', 'logits/rejected': '5.153', 'epoch': '0.0654'} +{'loss': '0.3586', 'grad_norm': '1.538', 'learning_rate': '1.105e-05', 'rewards/chosen': '1.29', 'rewards/rejected': '0.3544', 'rewards/accuracies': '0.875', 'rewards/margins': '0.9354', 'logps/chosen': '-372.9', 'logps/rejected': '-401.8', 'logits/chosen': '5.139', 'logits/rejected': '5.203', 'epoch': '0.06774'} +{'loss': '0.428', 'grad_norm': '2.358', 'learning_rate': '1.143e-05', 'rewards/chosen': '1.382', 'rewards/rejected': '0.6533', 'rewards/accuracies': '0.875', 'rewards/margins': '0.7291', 'logps/chosen': '-361', 'logps/rejected': '-392.3', 'logits/chosen': '5.072', 'logits/rejected': '5.188', 'epoch': '0.07007'} +{'loss': '0.3137', 'grad_norm': '2.178', 'learning_rate': '1.182e-05', 'rewards/chosen': '1.664', 'rewards/rejected': '0.575', 'rewards/accuracies': '1', 'rewards/margins': '1.089', 'logps/chosen': '-364.8', 'logps/rejected': '-401', 'logits/chosen': '5.264', 'logits/rejected': '5.311', 'epoch': '0.07241'} +{'loss': '0.3038', 'grad_norm': '1.698', 'learning_rate': '1.221e-05', 'rewards/chosen': '1.647', 'rewards/rejected': '0.5322', 'rewards/accuracies': '1', 'rewards/margins': '1.115', 'logps/chosen': '-359.8', 'logps/rejected': '-397.2', 'logits/chosen': '5.192', 'logits/rejected': '5.261', 'epoch': '0.07474'} +{'loss': '0.2503', 'grad_norm': '1.322', 'learning_rate': '1.26e-05', 'rewards/chosen': '1.567', 'rewards/rejected': '0.1573', 'rewards/accuracies': '1', 'rewards/margins': '1.41', 'logps/chosen': '-352.4', 'logps/rejected': '-392.7', 'logits/chosen': '5.293', 'logits/rejected': '5.309', 'epoch': '0.07708'} +{'loss': '0.3108', 'grad_norm': '1.817', 'learning_rate': '1.298e-05', 'rewards/chosen': '1.479', 'rewards/rejected': '0.2151', 'rewards/accuracies': '0.9375', 'rewards/margins': '1.264', 'logps/chosen': '-320', 'logps/rejected': '-364.1', 'logits/chosen': '5.026', 'logits/rejected': '5.115', 'epoch': '0.07942'} +{'loss': '0.2299', 'grad_norm': '1.066', 'learning_rate': '1.337e-05', 'rewards/chosen': '1.395', 'rewards/rejected': '-0.1015', 'rewards/accuracies': '1', 'rewards/margins': '1.497', 'logps/chosen': '-383.8', 'logps/rejected': '-431.8', 'logits/chosen': '4.945', 'logits/rejected': '4.959', 'epoch': '0.08175'} +{'loss': '0.226', 'grad_norm': '1.035', 'learning_rate': '1.376e-05', 'rewards/chosen': '1.298', 'rewards/rejected': '-0.3464', 'rewards/accuracies': '1', 'rewards/margins': '1.644', 'logps/chosen': '-350.9', 'logps/rejected': '-382.7', 'logits/chosen': '5.004', 'logits/rejected': '5.12', 'epoch': '0.08409'} +{'loss': '0.1892', 'grad_norm': '1.16', 'learning_rate': '1.415e-05', 'rewards/chosen': '1.198', 'rewards/rejected': '-0.5511', 'rewards/accuracies': '1', 'rewards/margins': '1.75', 'logps/chosen': '-352.3', 'logps/rejected': '-399.2', 'logits/chosen': '4.89', 'logits/rejected': '4.95', 'epoch': '0.08642'} +{'eval_loss': '0.1602', 'eval_runtime': '454.3', 'eval_samples_per_second': '1.677', 'eval_steps_per_second': '1.677', 'eval_rewards/chosen': '1.121', 'eval_rewards/rejected': '-0.9336', 'eval_rewards/accuracies': '0.9961', 'eval_rewards/margins': '2.055', 'eval_logps/chosen': '-359.2', 'eval_logps/rejected': '-405.1', 'eval_logits/chosen': '4.93', 'eval_logits/rejected': '5.032', 'epoch': '0.08759'} +{'loss': '0.16', 'grad_norm': '1.143', 'learning_rate': '1.453e-05', 'rewards/chosen': '1.213', 'rewards/rejected': '-0.8816', 'rewards/accuracies': '1', 'rewards/margins': '2.095', 'logps/chosen': '-313.1', 'logps/rejected': '-356.1', 'logits/chosen': '5.037', 'logits/rejected': '5.132', 'epoch': '0.08876'} +{'loss': '0.1895', 'grad_norm': '0.9839', 'learning_rate': '1.492e-05', 'rewards/chosen': '1.061', 'rewards/rejected': '-0.8471', 'rewards/accuracies': '1', 'rewards/margins': '1.908', 'logps/chosen': '-366.3', 'logps/rejected': '-405.8', 'logits/chosen': '4.817', 'logits/rejected': '4.874', 'epoch': '0.09109'} +{'loss': '0.1595', 'grad_norm': '0.9213', 'learning_rate': '1.531e-05', 'rewards/chosen': '0.6765', 'rewards/rejected': '-1.491', 'rewards/accuracies': '1', 'rewards/margins': '2.167', 'logps/chosen': '-348.1', 'logps/rejected': '-395.2', 'logits/chosen': '5.047', 'logits/rejected': '5.158', 'epoch': '0.09343'} +{'loss': '0.1209', 'grad_norm': '0.9821', 'learning_rate': '1.57e-05', 'rewards/chosen': '0.872', 'rewards/rejected': '-1.775', 'rewards/accuracies': '1', 'rewards/margins': '2.647', 'logps/chosen': '-378.9', 'logps/rejected': '-436.9', 'logits/chosen': '4.691', 'logits/rejected': '4.772', 'epoch': '0.09577'} +{'loss': '0.08721', 'grad_norm': '0.6679', 'learning_rate': '1.609e-05', 'rewards/chosen': '1.134', 'rewards/rejected': '-1.77', 'rewards/accuracies': '1', 'rewards/margins': '2.904', 'logps/chosen': '-346.5', 'logps/rejected': '-400.1', 'logits/chosen': '4.88', 'logits/rejected': '4.962', 'epoch': '0.0981'} +{'loss': '0.07943', 'grad_norm': '0.5761', 'learning_rate': '1.647e-05', 'rewards/chosen': '1.246', 'rewards/rejected': '-1.769', 'rewards/accuracies': '1', 'rewards/margins': '3.015', 'logps/chosen': '-341.7', 'logps/rejected': '-398.3', 'logits/chosen': '4.464', 'logits/rejected': '4.68', 'epoch': '0.1004'} +{'loss': '0.1258', 'grad_norm': '1.602', 'learning_rate': '1.686e-05', 'rewards/chosen': '1.071', 'rewards/rejected': '-2.048', 'rewards/accuracies': '0.9375', 'rewards/margins': '3.119', 'logps/chosen': '-344.9', 'logps/rejected': '-395.4', 'logits/chosen': '4.564', 'logits/rejected': '4.681', 'epoch': '0.1028'} +{'loss': '0.06663', 'grad_norm': '0.4641', 'learning_rate': '1.725e-05', 'rewards/chosen': '1.413', 'rewards/rejected': '-2.348', 'rewards/accuracies': '1', 'rewards/margins': '3.761', 'logps/chosen': '-327', 'logps/rejected': '-388.4', 'logits/chosen': '4.499', 'logits/rejected': '4.673', 'epoch': '0.1051'} +{'loss': '0.04482', 'grad_norm': '0.67', 'learning_rate': '1.764e-05', 'rewards/chosen': '1.478', 'rewards/rejected': '-2.901', 'rewards/accuracies': '1', 'rewards/margins': '4.379', 'logps/chosen': '-362.7', 'logps/rejected': '-439.3', 'logits/chosen': '4.729', 'logits/rejected': '4.814', 'epoch': '0.1074'} +{'loss': '0.05633', 'grad_norm': '0.4153', 'learning_rate': '1.802e-05', 'rewards/chosen': '0.7137', 'rewards/rejected': '-2.745', 'rewards/accuracies': '1', 'rewards/margins': '3.458', 'logps/chosen': '-381.6', 'logps/rejected': '-444.3', 'logits/chosen': '4.785', 'logits/rejected': '4.892', 'epoch': '0.1098'} +{'loss': '0.04092', 'grad_norm': '0.3153', 'learning_rate': '1.841e-05', 'rewards/chosen': '1.757', 'rewards/rejected': '-2.264', 'rewards/accuracies': '1', 'rewards/margins': '4.021', 'logps/chosen': '-356.7', 'logps/rejected': '-414.7', 'logits/chosen': '4.604', 'logits/rejected': '4.805', 'epoch': '0.1121'} +{'loss': '0.02579', 'grad_norm': '0.377', 'learning_rate': '1.88e-05', 'rewards/chosen': '1.387', 'rewards/rejected': '-3.268', 'rewards/accuracies': '1', 'rewards/margins': '4.654', 'logps/chosen': '-339.8', 'logps/rejected': '-413.9', 'logits/chosen': '4.559', 'logits/rejected': '4.691', 'epoch': '0.1145'} +{'loss': '0.01516', 'grad_norm': '0.1502', 'learning_rate': '1.919e-05', 'rewards/chosen': '1.794', 'rewards/rejected': '-3.149', 'rewards/accuracies': '1', 'rewards/margins': '4.943', 'logps/chosen': '-346.3', 'logps/rejected': '-418.9', 'logits/chosen': '4.387', 'logits/rejected': '4.495', 'epoch': '0.1168'} +{'eval_loss': '0.04428', 'eval_runtime': '454.7', 'eval_samples_per_second': '1.676', 'eval_steps_per_second': '1.676', 'eval_rewards/chosen': '1.725', 'eval_rewards/rejected': '-2.864', 'eval_rewards/accuracies': '0.9921', 'eval_rewards/margins': '4.589', 'eval_logps/chosen': '-353.2', 'eval_logps/rejected': '-424.4', 'eval_logits/chosen': '4.286', 'eval_logits/rejected': '4.426', 'epoch': '0.1168'} +{'loss': '0.0159', 'grad_norm': '0.2124', 'learning_rate': '1.957e-05', 'rewards/chosen': '1.77', 'rewards/rejected': '-3.026', 'rewards/accuracies': '1', 'rewards/margins': '4.796', 'logps/chosen': '-305', 'logps/rejected': '-384.9', 'logits/chosen': '4.197', 'logits/rejected': '4.353', 'epoch': '0.1191'} +{'loss': '0.03818', 'grad_norm': '1.196', 'learning_rate': '1.996e-05', 'rewards/chosen': '1.556', 'rewards/rejected': '-3.267', 'rewards/accuracies': '1', 'rewards/margins': '4.823', 'logps/chosen': '-341.1', 'logps/rejected': '-417.6', 'logits/chosen': '4.185', 'logits/rejected': '4.28', 'epoch': '0.1215'} +{'loss': '0.05679', 'grad_norm': '1.302', 'learning_rate': '2.035e-05', 'rewards/chosen': '1.654', 'rewards/rejected': '-3.076', 'rewards/accuracies': '1', 'rewards/margins': '4.73', 'logps/chosen': '-358.1', 'logps/rejected': '-426.9', 'logits/chosen': '4.324', 'logits/rejected': '4.452', 'epoch': '0.1238'} +{'loss': '0.07615', 'grad_norm': '0.3007', 'learning_rate': '2.074e-05', 'rewards/chosen': '1.412', 'rewards/rejected': '-3.332', 'rewards/accuracies': '0.9375', 'rewards/margins': '4.744', 'logps/chosen': '-364.5', 'logps/rejected': '-434.5', 'logits/chosen': '4.492', 'logits/rejected': '4.633', 'epoch': '0.1261'} +{'loss': '0.0146', 'grad_norm': '0.4247', 'learning_rate': '2.112e-05', 'rewards/chosen': '1.958', 'rewards/rejected': '-4.051', 'rewards/accuracies': '1', 'rewards/margins': '6.009', 'logps/chosen': '-306.5', 'logps/rejected': '-392.5', 'logits/chosen': '3.858', 'logits/rejected': '3.968', 'epoch': '0.1285'} +{'loss': '0.01015', 'grad_norm': '0.1418', 'learning_rate': '2.151e-05', 'rewards/chosen': '2.196', 'rewards/rejected': '-3.758', 'rewards/accuracies': '1', 'rewards/margins': '5.954', 'logps/chosen': '-339.6', 'logps/rejected': '-425.5', 'logits/chosen': '4.254', 'logits/rejected': '4.353', 'epoch': '0.1308'} +{'loss': '0.01139', 'grad_norm': '0.2944', 'learning_rate': '2.19e-05', 'rewards/chosen': '1.995', 'rewards/rejected': '-3.392', 'rewards/accuracies': '1', 'rewards/margins': '5.387', 'logps/chosen': '-349.4', 'logps/rejected': '-431.8', 'logits/chosen': '3.717', 'logits/rejected': '3.922', 'epoch': '0.1331'} +{'loss': '0.02451', 'grad_norm': '0.9541', 'learning_rate': '2.229e-05', 'rewards/chosen': '1.855', 'rewards/rejected': '-3.475', 'rewards/accuracies': '1', 'rewards/margins': '5.33', 'logps/chosen': '-343.2', 'logps/rejected': '-423.2', 'logits/chosen': '3.514', 'logits/rejected': '3.74', 'epoch': '0.1355'} +{'loss': '0.007584', 'grad_norm': '0.4569', 'learning_rate': '2.267e-05', 'rewards/chosen': '2.13', 'rewards/rejected': '-4.365', 'rewards/accuracies': '1', 'rewards/margins': '6.495', 'logps/chosen': '-382.1', 'logps/rejected': '-480.7', 'logits/chosen': '3.9', 'logits/rejected': '3.963', 'epoch': '0.1378'} +{'loss': '0.007748', 'grad_norm': '0.2083', 'learning_rate': '2.306e-05', 'rewards/chosen': '1.399', 'rewards/rejected': '-4.58', 'rewards/accuracies': '1', 'rewards/margins': '5.979', 'logps/chosen': '-355.3', 'logps/rejected': '-436.5', 'logits/chosen': '3.772', 'logits/rejected': '3.939', 'epoch': '0.1401'} +{'loss': '0.01436', 'grad_norm': '0.2193', 'learning_rate': '2.345e-05', 'rewards/chosen': '1.177', 'rewards/rejected': '-5.205', 'rewards/accuracies': '1', 'rewards/margins': '6.382', 'logps/chosen': '-327.2', 'logps/rejected': '-414.8', 'logits/chosen': '3.657', 'logits/rejected': '3.875', 'epoch': '0.1425'} +{'loss': '0.007622', 'grad_norm': '0.03551', 'learning_rate': '2.384e-05', 'rewards/chosen': '0.7803', 'rewards/rejected': '-6.615', 'rewards/accuracies': '1', 'rewards/margins': '7.395', 'logps/chosen': '-369.9', 'logps/rejected': '-474', 'logits/chosen': '3.66', 'logits/rejected': '3.725', 'epoch': '0.1448'} +{'eval_loss': '0.02411', 'eval_runtime': '454.8', 'eval_samples_per_second': '1.675', 'eval_steps_per_second': '1.675', 'eval_rewards/chosen': '0.5319', 'eval_rewards/rejected': '-6.151', 'eval_rewards/accuracies': '0.9934', 'eval_rewards/margins': '6.683', 'eval_logps/chosen': '-365.1', 'eval_logps/rejected': '-457.3', 'eval_logits/chosen': '3.669', 'eval_logits/rejected': '3.844', 'epoch': '0.146'} +{'loss': '0.005532', 'grad_norm': '0.2169', 'learning_rate': '2.422e-05', 'rewards/chosen': '0.9076', 'rewards/rejected': '-7.027', 'rewards/accuracies': '1', 'rewards/margins': '7.935', 'logps/chosen': '-345.2', 'logps/rejected': '-454.6', 'logits/chosen': '3.778', 'logits/rejected': '3.757', 'epoch': '0.1472'} +{'loss': '0.0008547', 'grad_norm': '0.05145', 'learning_rate': '2.461e-05', 'rewards/chosen': '1.086', 'rewards/rejected': '-6.756', 'rewards/accuracies': '1', 'rewards/margins': '7.843', 'logps/chosen': '-376.3', 'logps/rejected': '-486.3', 'logits/chosen': '3.686', 'logits/rejected': '3.777', 'epoch': '0.1495'} +{'loss': '0.01921', 'grad_norm': '1.001', 'learning_rate': '2.5e-05', 'rewards/chosen': '0.7988', 'rewards/rejected': '-6.314', 'rewards/accuracies': '1', 'rewards/margins': '7.112', 'logps/chosen': '-330.3', 'logps/rejected': '-420.2', 'logits/chosen': '3.856', 'logits/rejected': '4.067', 'epoch': '0.1518'} +{'loss': '0.005052', 'grad_norm': '0.2909', 'learning_rate': '2.539e-05', 'rewards/chosen': '0.727', 'rewards/rejected': '-6.733', 'rewards/accuracies': '1', 'rewards/margins': '7.46', 'logps/chosen': '-346.6', 'logps/rejected': '-448.4', 'logits/chosen': '3.856', 'logits/rejected': '4.107', 'epoch': '0.1542'} +{'loss': '0.02904', 'grad_norm': '0.1034', 'learning_rate': '2.578e-05', 'rewards/chosen': '-0.1552', 'rewards/rejected': '-7.109', 'rewards/accuracies': '1', 'rewards/margins': '6.953', 'logps/chosen': '-398.5', 'logps/rejected': '-493.3', 'logits/chosen': '3.692', 'logits/rejected': '3.876', 'epoch': '0.1565'} +{'loss': '0.008301', 'grad_norm': '0.4083', 'learning_rate': '2.616e-05', 'rewards/chosen': '0.3356', 'rewards/rejected': '-6.363', 'rewards/accuracies': '1', 'rewards/margins': '6.699', 'logps/chosen': '-420.6', 'logps/rejected': '-511.7', 'logits/chosen': '3.701', 'logits/rejected': '3.885', 'epoch': '0.1588'} +{'loss': '0.01079', 'grad_norm': '0.1769', 'learning_rate': '2.655e-05', 'rewards/chosen': '1.53', 'rewards/rejected': '-5.799', 'rewards/accuracies': '1', 'rewards/margins': '7.329', 'logps/chosen': '-361.6', 'logps/rejected': '-455', 'logits/chosen': '3.477', 'logits/rejected': '3.63', 'epoch': '0.1612'} +{'loss': '0.01278', 'grad_norm': '0.1559', 'learning_rate': '2.694e-05', 'rewards/chosen': '1.282', 'rewards/rejected': '-6.813', 'rewards/accuracies': '1', 'rewards/margins': '8.095', 'logps/chosen': '-379.4', 'logps/rejected': '-490', 'logits/chosen': '3.44', 'logits/rejected': '3.567', 'epoch': '0.1635'} +{'loss': '0.007118', 'grad_norm': '0.8207', 'learning_rate': '2.733e-05', 'rewards/chosen': '1.469', 'rewards/rejected': '-6.559', 'rewards/accuracies': '1', 'rewards/margins': '8.027', 'logps/chosen': '-402.8', 'logps/rejected': '-506.3', 'logits/chosen': '3.235', 'logits/rejected': '3.393', 'epoch': '0.1658'} + main() + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 928, in main + trainer.train(resume_from_checkpoint=resume_from) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2168, in train + return inner_training_loop( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2535, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs, num_items_in_batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 3807, in training_step + loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1810, in compute_loss + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train") + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1726, in get_batch_loss_metrics + model_output = self.concatenated_forward(model, batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1600, in concatenated_forward + outputs = model(input_ids, **model_kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 819, in forward + return model_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 807, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/peft_model.py", line 1923, in forward + return self.base_model( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 308, in forward + return self.model.forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 810, in wrapper + output = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 477, in forward + outputs: BaseModelOutputWithPast = self.model( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper + outputs = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 412, in forward + hidden_states = decoder_layer( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/modeling_layers.py", line 93, in __call__ + return self._gradient_checkpointing_func(partial(super().__call__, **kwargs), *args) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/_compile.py", line 32, in inner + return disable_fn(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 632, in _fn + return fn(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 489, in checkpoint + return CheckpointFunction.apply(function, preserve, *args) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/autograd/function.py", line 575, in apply + return super().apply(*args, **kwargs) # type: ignore[misc] + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 264, in forward + outputs = run_function(*args) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapped_forward + output = orig_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 300, in forward + hidden_states, _ = self.self_attn( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 220, in forward + query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/lora/layer.py", line 793, in forward + result = self.base_layer(x, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) +KeyboardInterrupt +Traceback (most recent call last): + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in + main() + File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 928, in main + trainer.train(resume_from_checkpoint=resume_from) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2168, in train + return inner_training_loop( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2535, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs, num_items_in_batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 3807, in training_step + loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1810, in compute_loss + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train") + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1726, in get_batch_loss_metrics + model_output = self.concatenated_forward(model, batch) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1600, in concatenated_forward + outputs = model(input_ids, **model_kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 819, in forward + return model_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 807, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/peft_model.py", line 1923, in forward + return self.base_model( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 308, in forward + return self.model.forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 810, in wrapper + output = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 477, in forward + outputs: BaseModelOutputWithPast = self.model( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper + outputs = func(self, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 412, in forward + hidden_states = decoder_layer( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/modeling_layers.py", line 93, in __call__ + return self._gradient_checkpointing_func(partial(super().__call__, **kwargs), *args) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/_compile.py", line 32, in inner + return disable_fn(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 632, in _fn + return fn(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 489, in checkpoint + return CheckpointFunction.apply(function, preserve, *args) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/autograd/function.py", line 575, in apply + return super().apply(*args, **kwargs) # type: ignore[misc] + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 264, in forward + outputs = run_function(*args) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapped_forward + output = orig_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 300, in forward + hidden_states, _ = self.self_attn( + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 220, in forward + query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/lora/layer.py", line 793, in forward + result = self.base_layer(x, *args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl + return forward_call(*args, **kwargs) + File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward + output = module._old_forward(*args, **kwargs) +KeyboardInterrupt diff --git a/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/requirements.txt b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/requirements.txt @@ -0,0 +1,104 @@ +exceptiongroup==1.3.1 +wheel==0.45.1 +python-dateutil==2.9.0.post0 +nvidia-ml-py==13.580.82 +huggingface_hub==1.2.3 +idna==3.11 +click==8.3.1 +numpy==2.2.6 +httpx==0.28.1 +tokenizers==0.22.1 +sympy==1.13.1 +yarl==1.22.0 +async-timeout==5.0.1 +datasets==4.4.2 +platformdirs==4.5.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-nvtx-cu12==12.1.105 +smmap==5.0.2 +accelerate==1.12.0 +requests==2.32.5 +aiohttp==3.13.2 +bitsandbytes==0.49.0 +nvidia-cublas-cu12==12.1.3.1 +mpmath==1.3.0 +torchaudio==2.5.1+cu121 +nvidia-cuda-runtime-cu12==12.1.105 +typing-inspection==0.4.2 +GitPython==3.1.45 +xxhash==3.6.0 +nvidia-cusolver-cu12==11.4.5.107 +pydantic_core==2.41.5 +six==1.17.0 +torchvision==0.20.1+cu121 +typing_extensions==4.15.0 +triton==3.1.0 +charset-normalizer==3.4.4 +nvitop==1.6.1 +wandb==0.23.1 +regex==2025.11.3 +pip==25.3 +nvidia-cusparse-cu12==12.1.0.106 +pytz==2025.2 +Jinja2==3.1.6 +psutil==7.2.0 +pillow==12.0.0 +packaging==25.0 +safetensors==0.7.0 +sentry-sdk==2.48.0 +gitdb==4.0.12 +httpcore==1.0.9 +setuptools==80.9.0 +nvidia-cufft-cu12==11.0.2.54 +anyio==4.12.0 +transformers==5.0.0.dev0 +pydantic==2.12.5 +fsspec==2025.10.0 +filelock==3.20.0 +PyYAML==6.0.3 +hf-xet==1.2.0 +nvidia-cudnn-cu12==9.1.0.70 +tqdm==4.67.1 +MarkupSafe==2.1.5 +attrs==25.4.0 +nvidia-cuda-nvrtc-cu12==12.1.105 +peft==0.18.0 +aiohappyeyeballs==2.6.1 +networkx==3.4.2 +nvidia-nvjitlink-cu12==12.9.86 +certifi==2025.11.12 +pyarrow==22.0.0 +dill==0.4.0 +protobuf==6.33.2 +aiosignal==1.4.0 +frozenlist==1.8.0 +urllib3==2.6.2 +propcache==0.4.1 +tzdata==2025.3 +pandas==2.3.3 +annotated-types==0.7.0 +shellingham==1.5.4 +nvidia-nccl-cu12==2.21.5 +multidict==6.7.0 +nvidia-curand-cu12==10.3.2.106 +trl==0.26.2 +torch==2.5.1+cu121 +h11==0.16.0 +multiprocess==0.70.18 +typer-slim==0.21.0 +wheel==0.45.1 +tomli==2.0.1 +autocommand==2.2.2 +jaraco.context==5.3.0 +zipp==3.19.2 +packaging==24.2 +inflect==7.3.1 +typing_extensions==4.12.2 +platformdirs==4.2.2 +jaraco.functools==4.0.1 +jaraco.collections==5.1.0 +jaraco.text==3.12.1 +backports.tarfile==1.2.0 +more-itertools==10.3.0 +importlib_metadata==8.0.0 +typeguard==4.3.0 diff --git a/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/wandb-metadata.json b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..20b8f9d8999d072115b6971baab0288966441459 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/wandb-metadata.json @@ -0,0 +1,47 @@ +{ + "os": "Linux-6.12.46+-x86_64-with-glibc2.35", + "python": "CPython 3.10.12", + "startedAt": "2025-12-26T15:56:50.015524Z", + "args": [ + "--config", + "config_dpo.yaml" + ], + "program": "/workspace/trainer-kit/DPO-14b/run_dpo.py", + "codePath": "run_dpo.py", + "codePathLocal": "run_dpo.py", + "email": "shaiksirajuddin9949@gmail.com", + "root": "runs/dpo_run_14b_v1", + "host": "a100-2gpu-shell-session-757d587799-mfdvv", + "executable": "/workspace/llm_finetuning_env/bin/python", + "cpu_count": 12, + "cpu_count_logical": 24, + "gpu": "NVIDIA A100-SXM4-80GB", + "gpu_count": 2, + "disk": { + "/": { + "total": "791251738624", + "used": "323290275840" + } + }, + "memory": { + "total": "359047892992" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba" + }, + { + "name": "NVIDIA A100-SXM4-80GB", + "memoryTotal": "85899345920", + "cudaCores": 6912, + "architecture": "Ampere", + "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40" + } + ], + "cudaVersion": "13.0", + "writerId": "afn1h9dtq29ul6sseazq0ojw1mqcn19i" +} \ No newline at end of file diff --git a/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/wandb-summary.json b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..4cd6ed7c80091cf0744366fe454481e1bea77684 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/logits/chosen":3.23529052734375,"eval/logits/chosen":3.6694726943969727,"train/logits/rejected":3.393266201019287,"train/learning_rate":2.7325581395348836e-05,"eval/rewards/rejected":-6.150709629058838,"eval/logits/rejected":3.8436598777770996,"eval/rewards/chosen":0.5319492816925049,"_timestamp":1.7667683362567813e+09,"train/rewards/rejected":-6.558856964111328,"eval/runtime":454.8045,"eval/rewards/accuracies":0.9934383034706116,"train/logps/chosen":-402.8253479003906,"train/rewards/accuracies":1,"_wandb":{"runtime":3729},"train/grad_norm":0.820688009262085,"eval/rewards/margins":6.682660102844238,"_runtime":3729,"eval/logps/rejected":-457.28314208984375,"train/rewards/chosen":1.4685018062591553,"eval/samples_per_second":1.675,"eval/steps_per_second":1.675,"_step":75,"train/rewards/margins":8.027359008789062,"eval/logps/chosen":-365.087646484375,"train/logps/rejected":-506.32000732421875,"train/global_step":142,"train/loss":0.007118214387446642,"eval/loss":0.024107323959469795,"train/epoch":0.16583941605839417} \ No newline at end of file diff --git a/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/logs/debug-core.log b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..3d4643a6d481bcc439c7b9f7bf93e4c69382c747 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/logs/debug-core.log @@ -0,0 +1,14 @@ +{"time":"2025-12-26T15:56:50.109153388Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpa97nk_g5/port-148906.txt","pid":148906,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2025-12-26T15:56:50.110079679Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":148906} +{"time":"2025-12-26T15:56:50.110081586Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-148906-148983-325621513/socket","Net":"unix"}} +{"time":"2025-12-26T15:56:50.290687433Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2025-12-26T15:56:50.297246166Z","level":"INFO","msg":"handleInformInit: received","streamId":"wbzoafvt","id":"1(@)"} +{"time":"2025-12-26T15:56:50.452581495Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"wbzoafvt","id":"1(@)"} +{"time":"2025-12-26T16:59:00.070455239Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2025-12-26T16:59:00.070533969Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2025-12-26T16:59:00.0705585Z","level":"INFO","msg":"server is shutting down"} +{"time":"2025-12-26T16:59:00.070589863Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2025-12-26T16:59:00.070654266Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-148906-148983-325621513/socket","Net":"unix"}} +{"time":"2025-12-26T16:59:00.47438251Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2025-12-26T16:59:00.474428554Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2025-12-26T16:59:00.474451644Z","level":"INFO","msg":"server is closed"} diff --git a/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/logs/debug-internal.log b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..4997f9ce61d06da20afbeba3cfa2eb77964556df --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/logs/debug-internal.log @@ -0,0 +1,11 @@ +{"time":"2025-12-26T15:56:50.297401502Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"} +{"time":"2025-12-26T15:56:50.452320078Z","level":"INFO","msg":"stream: created new stream","id":"wbzoafvt"} +{"time":"2025-12-26T15:56:50.452494836Z","level":"INFO","msg":"handler: started","stream_id":"wbzoafvt"} +{"time":"2025-12-26T15:56:50.452572405Z","level":"INFO","msg":"stream: started","id":"wbzoafvt"} +{"time":"2025-12-26T15:56:50.452599156Z","level":"INFO","msg":"writer: started","stream_id":"wbzoafvt"} +{"time":"2025-12-26T15:56:50.452607804Z","level":"INFO","msg":"sender: started","stream_id":"wbzoafvt"} +{"time":"2025-12-26T16:59:00.070531235Z","level":"INFO","msg":"stream: closing","id":"wbzoafvt"} +{"time":"2025-12-26T16:59:00.346670237Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-12-26T16:59:00.473496131Z","level":"INFO","msg":"handler: closed","stream_id":"wbzoafvt"} +{"time":"2025-12-26T16:59:00.473589831Z","level":"INFO","msg":"sender: closed","stream_id":"wbzoafvt"} +{"time":"2025-12-26T16:59:00.473602236Z","level":"INFO","msg":"stream: closed","id":"wbzoafvt"} diff --git a/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/logs/debug.log b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..77992573ecd912291c2ced9226296badd871bb37 --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/logs/debug.log @@ -0,0 +1,26 @@ +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1 +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Configure stats pid to 148906 +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO-14b/wandb/settings +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_14b_v1/wandb/run-20251226_155650-wbzoafvt/logs/debug.log +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_14b_v1/wandb/run-20251226_155650-wbzoafvt/logs/debug-internal.log +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:init():841] calling init triggers +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:init():846] wandb.init called with sweep_config: {} +config: {'model': {'repo_id': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with \n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_14b_v1', '_wandb': {}} +2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:init():889] starting backend +2025-12-26 15:56:50,290 INFO MainThread:148906 [wandb_init.py:init():892] sending inform_init request +2025-12-26 15:56:50,295 INFO MainThread:148906 [wandb_init.py:init():900] backend started and connected +2025-12-26 15:56:50,297 INFO MainThread:148906 [wandb_init.py:init():970] updated telemetry +2025-12-26 15:56:50,297 INFO MainThread:148906 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout +2025-12-26 15:56:50,648 INFO MainThread:148906 [wandb_init.py:init():1041] starting run threads in backend +2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_console_start():2521] atexit reg +2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_redirect():2369] redirect: wrap_raw +2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_redirect():2438] Wrapping output streams. +2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_redirect():2461] Redirects installed. +2025-12-26 15:56:50,762 INFO MainThread:148906 [wandb_init.py:init():1081] run started, returning control to user process +2025-12-26 15:57:33,783 INFO MainThread:148906 [wandb_run.py:_config_callback():1396] config_cb None None {'peft_config': {'default': {'task_type': 'CAUSAL_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.18.0', 'base_model_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'inference_mode': False, 'r': 16, 'target_modules': ['k_proj', 'o_proj', 'v_proj', 'q_proj'], 'exclude_modules': None, 'lora_alpha': 32, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 152064, 'max_position_embeddings': 32768, 'hidden_size': 5120, 'intermediate_size': 13824, 'num_hidden_layers': 48, 'num_attention_heads': 40, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 48, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'attention_dropout': 0.0, 'layer_types': ['full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention'], 'rope_parameters': {'rope_theta': 1000000.0, 'rope_type': 'default'}, 'return_dict': True, 'output_hidden_states': False, 'dtype': 'bfloat16', 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 151643, 'eos_token_id': 151643, 'sep_token_id': None, 'decoder_start_token_id': None, '_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'transformers_version': '5.0.0.dev0', 'model_type': 'qwen2', 'output_attentions': False, 'output_dir': 'runs/dpo_run_14b_v1', 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': None, 'warmup_ratio': 0.1, 'warmup_steps': 0.1, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': None, 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 2, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'enable_jit_checkpoint': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'use_cpu': False, 'seed': 42, 'data_seed': None, 'bf16': True, 'fp16': False, 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': -1, 'ddp_backend': None, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 25, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'auto_find_batch_size': False, 'full_determinism': False, 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_init_kwargs': None, 'ref_model_init_kwargs': None, 'model_adapter_name': None, 'ref_adapter_name': None, 'force_use_ref_model': False, 'disable_dropout': True, 'use_logits_to_keep': False, 'dataset_num_proc': None, 'pad_token': '', 'label_pad_token_id': -100, 'max_prompt_length': 1024, 'max_completion_length': None, 'max_length': 2048, 'truncation_mode': 'keep_end', 'padding_free': False, 'precompute_ref_log_probs': False, 'precompute_ref_batch_size': None, 'tools': None, 'loss_type': 'sigmoid', 'use_liger_loss': None, 'base_model_attribute_name': 'model', 'beta': 0.1, 'f_divergence_type': 'reverse_kl', 'f_alpha_divergence_coef': 1.0, 'reference_free': False, 'label_smoothing': 0.0, 'use_weighting': False, 'rpo_alpha': None, 'ld_alpha': None, 'discopop_tau': 0.05, 'loss_weights': None, 'sync_ref_model': False, 'ref_model_mixup_alpha': 0.6, 'ref_model_sync_steps': 512, 'generate_during_eval': False} +2025-12-26 15:57:33,791 INFO MainThread:148906 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 14795199488 - > +2025-12-26 15:57:33,792 INFO MainThread:148906 [wandb_run.py:_config_callback():1396] config_cb model/num_parameters 14795199488 None +2025-12-26 16:59:00,070 INFO wandb-AsyncioManager-main:148906 [service_client.py:_forward_responses():80] Reached EOF. +2025-12-26 16:59:00,070 INFO wandb-AsyncioManager-main:148906 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles. diff --git a/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/run-wbzoafvt.wandb b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/run-wbzoafvt.wandb new file mode 100644 index 0000000000000000000000000000000000000000..35a7ce2ac0a8da7a40ce2c9df640b28ebcf29e5a --- /dev/null +++ b/dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/run-wbzoafvt.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00679558234aafe800040b8c88fedf7b94f44f3b9a953dcb8eb28ed2f6af9ccb +size 2174075