diff --git a/.gitattributes b/.gitattributes
index a911605c01e1062d60c14a4b605ff21f5ad4478c..89b7f01b606126365229f43efe4ff182c78e8228 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -59,3 +59,13 @@ grpo_qwen_14b/best_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 grpo_qwen_14b/checkpoints/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 grpo_qwen_14b/checkpoints/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 grpo_qwen_14b/wandb/run-20251227_194423-jz7bptqa/run-jz7bptqa.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/run-fb8js9es.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/run-g5bybskm.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/run-pmpxe28f.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/run-6fxdx0d2.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/run-0ek9e5bk.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/run-wvpf8qeo.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/run-q26c0nv5.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/run-csl0hdpv.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/run-xzbi1gai.wandb filter=lfs diff=lfs merge=lfs -text
+dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/run-00msx40b.wandb filter=lfs diff=lfs merge=lfs -text
diff --git a/dpo_run_24b_v1/config_resolved.yaml b/dpo_run_24b_v1/config_resolved.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1ac80c970dc422bfa3af17f169b3c967c8bc397a
--- /dev/null
+++ b/dpo_run_24b_v1/config_resolved.yaml
@@ -0,0 +1,93 @@
+run:
+  run_dir: ./runs/dpo_run_24b_v1
+  seed: 42
+wandb:
+  enabled: true
+  project: dpo-training
+  entity: null
+  name: null
+  tags:
+  - dpo-lora
+  - preference-optimization
+  notes: null
+model:
+  repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+  revision: null
+  base_local_dir: base_model
+  trust_remote_code: true
+  tokenizer_use_fast: true
+  device_map: auto
+  torch_dtype: bfloat16
+  use_4bit: false
+  bnb_4bit_quant_type: nf4
+  bnb_4bit_use_double_quant: false
+  bnb_4bit_compute_dtype: bfloat16
+  attn_implementation: null
+data:
+  train_jsonl: dpo_pairs_generated.jsonl
+  eval_jsonl: null
+  eval_split_ratio: 0.1
+  prompt_field: prompt
+  chosen_field: chosen
+  rejected_field: rejected
+  score_field: f1_score
+  format_type: chatml
+  system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\
+    \ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\
+    \ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\
+    \ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\
+    \ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\
+    \ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\
+    add::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n\
+    1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\
+    \ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\
+    \ because\" and \"without this\"\n3. Types of components: function, struct, enum,\
+    \ impl, trait\n4. If there is extra information (e.g., enum variants), include\
+    \ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n"
+  max_length: 2048
+  shuffle: true
+  num_proc: 4
+peft:
+  enabled: true
+  r: 16
+  lora_alpha: 32
+  lora_dropout: 0.05
+  bias: none
+  target_modules: auto
+dpo:
+  beta: 0.1
+  label_smoothing: 0.0
+  loss_type: sigmoid
+  use_reference_model: true
+  reference_free: false
+train:
+  num_train_epochs: 3
+  per_device_train_batch_size: 1
+  per_device_eval_batch_size: 1
+  gradient_accumulation_steps: 8
+  learning_rate: 5e-5
+  weight_decay: 0.0
+  warmup_ratio: 0.1
+  lr_scheduler_type: cosine
+  optim: adamw_torch
+  max_grad_norm: 1.0
+  gradient_checkpointing: true
+  logging_steps: 2
+  save_strategy: steps
+  save_steps: 100
+  save_total_limit: 10
+  evaluation_strategy: steps
+  eval_steps: 25
+  load_best_model_at_end: true
+  early_stopping:
+    enabled: true
+    patience: 5
+    min_delta: 0.001
+    metric: eval_loss
+    mode: min
+  resume_from_checkpoint: auto
+merge:
+  enabled: true
+  merged_dtype: float16
+  max_shard_size: 2GB
+  output_dir: ./merged_14b_dpo_lora
diff --git a/dpo_run_24b_v1/wandb/debug-internal.log b/dpo_run_24b_v1/wandb/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..5f36f167704caa7abd99a8abed599c6bacfb1641
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:52:04.520208686Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:52:04.677441728Z","level":"INFO","msg":"stream: created new stream","id":"00msx40b"}
+{"time":"2025-12-26T15:52:04.677538624Z","level":"INFO","msg":"handler: started","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:04.677662488Z","level":"INFO","msg":"stream: started","id":"00msx40b"}
+{"time":"2025-12-26T15:52:04.677684998Z","level":"INFO","msg":"writer: started","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:04.677696651Z","level":"INFO","msg":"sender: started","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:45.509029743Z","level":"INFO","msg":"stream: closing","id":"00msx40b"}
+{"time":"2025-12-26T15:52:45.704898985Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:52:45.822151941Z","level":"INFO","msg":"handler: closed","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:45.822254749Z","level":"INFO","msg":"sender: closed","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:45.822266001Z","level":"INFO","msg":"stream: closed","id":"00msx40b"}
diff --git a/dpo_run_24b_v1/wandb/debug.log b/dpo_run_24b_v1/wandb/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..997f3b39afc3e408f531cd3962917493439bd82e
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Configure stats pid to 147336
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug.log
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug-internal.log
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:init():889] starting backend
+2025-12-26 15:52:04,513 INFO    MainThread:147336 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:52:04,518 INFO    MainThread:147336 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:52:04,520 INFO    MainThread:147336 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:52:04,521 INFO    MainThread:147336 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:52:04,863 INFO    MainThread:147336 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:52:04,981 INFO    MainThread:147336 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:52:04,981 INFO    MainThread:147336 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:52:04,981 INFO    MainThread:147336 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:52:04,981 INFO    MainThread:147336 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:52:04,987 INFO    MainThread:147336 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:52:45,509 INFO    wandb-AsyncioManager-main:147336 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:52:45,509 INFO    wandb-AsyncioManager-main:147336 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9507254e38baa9b8ed92b52880f29266d282927
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            05zaxpswkd8t9j7nmlszf02dmdnkyst5:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "315579105280"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:26:29.019842Z"
+                writerId: 05zaxpswkd8t9j7nmlszf02dmdnkyst5
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..88692e5a75427e1cec7d1e6b5b15f1b1fa83593b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/output.log
@@ -0,0 +1,44 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+`torch_dtype` is deprecated! Use `dtype` instead!
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 555, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 953, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 744, in main
+    model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 568, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 555, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 953, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 744, in main
+    model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 568, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..9cc0ad33192a83f6a1cb4f3f6e7acb01ed66f2b8
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:26:29.019842Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "315579105280"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "05zaxpswkd8t9j7nmlszf02dmdnkyst5"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..8afb95f49483c85658a334253ad61c5e4b5851ef
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":2},"_runtime":2}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..f773509e3e2013824e6ac8bad0e8b68865352cd8
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:26:29.104421101Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpap_k1_i4/port-135896.txt","pid":135896,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:26:29.105138524Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":135896}
+{"time":"2025-12-26T15:26:29.105108641Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-135896-135976-1788857957/socket","Net":"unix"}}
+{"time":"2025-12-26T15:26:29.287192608Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:26:29.293378709Z","level":"INFO","msg":"handleInformInit: received","streamId":"ymohys1q","id":"1(@)"}
+{"time":"2025-12-26T15:26:29.445681764Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ymohys1q","id":"1(@)"}
+{"time":"2025-12-26T15:26:32.2673862Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:26:32.267537637Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:26:32.26754914Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:26:32.267646825Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:26:32.267756746Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-135896-135976-1788857957/socket","Net":"unix"}}
+{"time":"2025-12-26T15:26:32.914746116Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:26:32.914789004Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:26:32.914805725Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..b080e7301195b05bc63027b8b0ae9ed54ca3596e
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:26:29.293501638Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:26:29.445455498Z","level":"INFO","msg":"stream: created new stream","id":"ymohys1q"}
+{"time":"2025-12-26T15:26:29.445544858Z","level":"INFO","msg":"handler: started","stream_id":"ymohys1q"}
+{"time":"2025-12-26T15:26:29.445672321Z","level":"INFO","msg":"stream: started","id":"ymohys1q"}
+{"time":"2025-12-26T15:26:29.445699121Z","level":"INFO","msg":"writer: started","stream_id":"ymohys1q"}
+{"time":"2025-12-26T15:26:29.445775229Z","level":"INFO","msg":"sender: started","stream_id":"ymohys1q"}
+{"time":"2025-12-26T15:26:32.267559653Z","level":"INFO","msg":"stream: closing","id":"ymohys1q"}
+{"time":"2025-12-26T15:26:32.786868259Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:26:32.913988671Z","level":"INFO","msg":"handler: closed","stream_id":"ymohys1q"}
+{"time":"2025-12-26T15:26:32.914088362Z","level":"INFO","msg":"sender: closed","stream_id":"ymohys1q"}
+{"time":"2025-12-26T15:26:32.914111382Z","level":"INFO","msg":"stream: closed","id":"ymohys1q"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..ec922430519b35152df790c8e48ff0bd33989abc
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_setup.py:_flush():80] Configure stats pid to 135896
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug.log
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-internal.log
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:26:29,021 INFO    MainThread:135896 [wandb_init.py:init():889] starting backend
+2025-12-26 15:26:29,287 INFO    MainThread:135896 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:26:29,291 INFO    MainThread:135896 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:26:29,293 INFO    MainThread:135896 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:26:29,294 INFO    MainThread:135896 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:26:29,634 INFO    MainThread:135896 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:26:29,742 INFO    MainThread:135896 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:26:29,743 INFO    MainThread:135896 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:26:29,743 INFO    MainThread:135896 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:26:29,743 INFO    MainThread:135896 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:26:29,748 INFO    MainThread:135896 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:26:32,267 INFO    wandb-AsyncioManager-main:135896 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:26:32,267 INFO    wandb-AsyncioManager-main:135896 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/run-ymohys1q.wandb b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/run-ymohys1q.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..7a0d4b4a114654d25ec4c219c34f87b156d84b68
Binary files /dev/null and b/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/run-ymohys1q.wandb differ
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7766f751f3ec53171f1336b34f0b48e53a7a5121
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            3md0aqv809sr0p87xotdtgert1x2be48:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "316388069376"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:29:02.292056Z"
+                writerId: 3md0aqv809sr0p87xotdtgert1x2be48
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..de126ba5d610a05c20ebd7045c26cf43b05dc55d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/output.log
@@ -0,0 +1,44 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:29:05,363 - INFO - Loading model config from ../../Models/Devstral-Small-2-24B-HS-CPT-SFT with trust_remote_code=True
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 560, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 960, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 751, in main
+    model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 574, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 560, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 960, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 751, in main
+    model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 574, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..4048f09113fb110aa799801a3a7566e877a32769
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:29:02.292056Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "316388069376"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "3md0aqv809sr0p87xotdtgert1x2be48"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..8afb95f49483c85658a334253ad61c5e4b5851ef
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":2},"_runtime":2}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..d3becc2ee998818bb650a348b113d8fe037538de
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:29:02.378098877Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmph5oh7hv8/port-136944.txt","pid":136944,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:29:02.378909563Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":136944}
+{"time":"2025-12-26T15:29:02.378811163Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-136944-137024-1203613879/socket","Net":"unix"}}
+{"time":"2025-12-26T15:29:02.55980819Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:29:02.56654094Z","level":"INFO","msg":"handleInformInit: received","streamId":"spwc3b4a","id":"1(@)"}
+{"time":"2025-12-26T15:29:02.726927644Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"spwc3b4a","id":"1(@)"}
+{"time":"2025-12-26T15:29:05.56156698Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:29:05.56167497Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:29:05.561719256Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:29:05.561768332Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:29:05.561970735Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-136944-137024-1203613879/socket","Net":"unix"}}
+{"time":"2025-12-26T15:29:05.964901945Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:29:05.964947772Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:29:05.964970982Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..f0a7bbf78d48af17bbb2d345a4a02ef2fb8199c2
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:29:02.566705931Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:29:02.726675423Z","level":"INFO","msg":"stream: created new stream","id":"spwc3b4a"}
+{"time":"2025-12-26T15:29:02.726786973Z","level":"INFO","msg":"handler: started","stream_id":"spwc3b4a"}
+{"time":"2025-12-26T15:29:02.726918023Z","level":"INFO","msg":"stream: started","id":"spwc3b4a"}
+{"time":"2025-12-26T15:29:02.726947115Z","level":"INFO","msg":"writer: started","stream_id":"spwc3b4a"}
+{"time":"2025-12-26T15:29:02.726982198Z","level":"INFO","msg":"sender: started","stream_id":"spwc3b4a"}
+{"time":"2025-12-26T15:29:05.56168481Z","level":"INFO","msg":"stream: closing","id":"spwc3b4a"}
+{"time":"2025-12-26T15:29:05.886288482Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:29:05.963872485Z","level":"INFO","msg":"handler: closed","stream_id":"spwc3b4a"}
+{"time":"2025-12-26T15:29:05.96397042Z","level":"INFO","msg":"sender: closed","stream_id":"spwc3b4a"}
+{"time":"2025-12-26T15:29:05.963978351Z","level":"INFO","msg":"stream: closed","id":"spwc3b4a"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..379f8b531a1e4e31ab6e592aeb583a2974d17c5a
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:29:02,293 INFO    MainThread:136944 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:29:02,293 INFO    MainThread:136944 [wandb_setup.py:_flush():80] Configure stats pid to 136944
+2025-12-26 15:29:02,293 INFO    MainThread:136944 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:29:02,293 INFO    MainThread:136944 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:29:02,293 INFO    MainThread:136944 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:29:02,293 INFO    MainThread:136944 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug.log
+2025-12-26 15:29:02,294 INFO    MainThread:136944 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-internal.log
+2025-12-26 15:29:02,294 INFO    MainThread:136944 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:29:02,294 INFO    MainThread:136944 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:29:02,294 INFO    MainThread:136944 [wandb_init.py:init():889] starting backend
+2025-12-26 15:29:02,560 INFO    MainThread:136944 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:29:02,564 INFO    MainThread:136944 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:29:02,566 INFO    MainThread:136944 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:29:02,567 INFO    MainThread:136944 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:29:02,898 INFO    MainThread:136944 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:29:03,007 INFO    MainThread:136944 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:29:03,007 INFO    MainThread:136944 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:29:03,007 INFO    MainThread:136944 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:29:03,007 INFO    MainThread:136944 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:29:03,012 INFO    MainThread:136944 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:29:05,561 INFO    wandb-AsyncioManager-main:136944 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:29:05,561 INFO    wandb-AsyncioManager-main:136944 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/run-spwc3b4a.wandb b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/run-spwc3b4a.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..10e83c5cbee20de6a2b975f02fa11f7e5dcaf827
Binary files /dev/null and b/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/run-spwc3b4a.wandb differ
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95129805353f048ab83603466a4dccbb8c8b695d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            b8zcal9au539h6npajcoix9vzmhwciop:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "317141377024"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:30:52.545366Z"
+                writerId: b8zcal9au539h6npajcoix9vzmhwciop
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..921fb42c88c055c74b21561b4c0f5dc417d739b1
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/output.log
@@ -0,0 +1,44 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:30:55,534 - INFO - Loading model config from ../../Models/Devstral-Small-2-24B-HS-CPT-SFT with trust_remote_code=True
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 560, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 960, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 751, in main
+    model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 574, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 560, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 960, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 751, in main
+    model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 574, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..3fa20e8409d5994ac4abb8f797769a1468ad64a3
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:30:52.545366Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "317141377024"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "b8zcal9au539h6npajcoix9vzmhwciop"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..8afb95f49483c85658a334253ad61c5e4b5851ef
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":2},"_runtime":2}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..76208a6e01077c4faa9ae324a7da5d43b1e734d0
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:30:52.629789303Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvr96ff7k/port-138039.txt","pid":138039,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:30:52.630401044Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":138039}
+{"time":"2025-12-26T15:30:52.630403718Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-138039-138113-2700074418/socket","Net":"unix"}}
+{"time":"2025-12-26T15:30:52.815186583Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:30:52.821468691Z","level":"INFO","msg":"handleInformInit: received","streamId":"uo02exvi","id":"1(@)"}
+{"time":"2025-12-26T15:30:52.972985125Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"uo02exvi","id":"1(@)"}
+{"time":"2025-12-26T15:30:55.736694152Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:30:55.736777163Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:30:55.736810022Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:30:55.736839825Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:30:55.736962241Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-138039-138113-2700074418/socket","Net":"unix"}}
+{"time":"2025-12-26T15:30:56.144618955Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:30:56.144657804Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:30:56.144687588Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..0378adb39b62895a61ad222f33e4fd39713b0ef1
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:30:52.821600318Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:30:52.972768524Z","level":"INFO","msg":"stream: created new stream","id":"uo02exvi"}
+{"time":"2025-12-26T15:30:52.972861145Z","level":"INFO","msg":"handler: started","stream_id":"uo02exvi"}
+{"time":"2025-12-26T15:30:52.972976097Z","level":"INFO","msg":"stream: started","id":"uo02exvi"}
+{"time":"2025-12-26T15:30:52.973005528Z","level":"INFO","msg":"writer: started","stream_id":"uo02exvi"}
+{"time":"2025-12-26T15:30:52.973011764Z","level":"INFO","msg":"sender: started","stream_id":"uo02exvi"}
+{"time":"2025-12-26T15:30:55.736754973Z","level":"INFO","msg":"stream: closing","id":"uo02exvi"}
+{"time":"2025-12-26T15:30:56.052989718Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:30:56.143693278Z","level":"INFO","msg":"handler: closed","stream_id":"uo02exvi"}
+{"time":"2025-12-26T15:30:56.143802165Z","level":"INFO","msg":"sender: closed","stream_id":"uo02exvi"}
+{"time":"2025-12-26T15:30:56.143823041Z","level":"INFO","msg":"stream: closed","id":"uo02exvi"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..4e701decea3f96e6fe7219f33f6b1e58c0c921a6
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:30:52,546 INFO    MainThread:138039 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:30:52,546 INFO    MainThread:138039 [wandb_setup.py:_flush():80] Configure stats pid to 138039
+2025-12-26 15:30:52,547 INFO    MainThread:138039 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:30:52,547 INFO    MainThread:138039 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:30:52,547 INFO    MainThread:138039 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:30:52,547 INFO    MainThread:138039 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug.log
+2025-12-26 15:30:52,547 INFO    MainThread:138039 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-internal.log
+2025-12-26 15:30:52,547 INFO    MainThread:138039 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:30:52,547 INFO    MainThread:138039 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:30:52,547 INFO    MainThread:138039 [wandb_init.py:init():889] starting backend
+2025-12-26 15:30:52,815 INFO    MainThread:138039 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:30:52,819 INFO    MainThread:138039 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:30:52,821 INFO    MainThread:138039 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:30:52,822 INFO    MainThread:138039 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:30:53,141 INFO    MainThread:138039 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:30:53,249 INFO    MainThread:138039 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:30:53,249 INFO    MainThread:138039 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:30:53,250 INFO    MainThread:138039 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:30:53,250 INFO    MainThread:138039 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:30:53,255 INFO    MainThread:138039 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:30:55,736 INFO    wandb-AsyncioManager-main:138039 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:30:55,736 INFO    wandb-AsyncioManager-main:138039 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/run-uo02exvi.wandb b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/run-uo02exvi.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..fa858d24125cb5f1875f060925f98c885ae4431e
Binary files /dev/null and b/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/run-uo02exvi.wandb differ
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3449fe27c5329b9ce97311e3a0cbc7616ac1989d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            3kcue4vill6aqei124wdyde6gjjhpcn4:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "317655887872"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:31:52.735494Z"
+                writerId: 3kcue4vill6aqei124wdyde6gjjhpcn4
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..8eabab5aa036e5a4ccc25d2189497d59502edfa6
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/output.log
@@ -0,0 +1,45 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:31:55,903 - INFO - Loading model from ../../Models/Devstral-Small-2-24B-HS-CPT-SFT with trust_remote_code=True
+`torch_dtype` is deprecated! Use `dtype` instead!
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 559, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 957, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 748, in main
+    model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 572, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 559, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 957, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 748, in main
+    model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 572, in load_base_model_and_tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
+    raise ValueError(
+ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
+Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..6ba85c7a282bbcc9575cac8491ebaf054d29b037
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:31:52.735494Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "317655887872"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "3kcue4vill6aqei124wdyde6gjjhpcn4"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..8afb95f49483c85658a334253ad61c5e4b5851ef
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":2},"_runtime":2}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..f12a0452f74987b968d837590d74c67e8d8381c7
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:31:52.82802196Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqkio4efp/port-138465.txt","pid":138465,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:31:52.828849471Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":138465}
+{"time":"2025-12-26T15:31:52.828808724Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-138465-138539-4238173929/socket","Net":"unix"}}
+{"time":"2025-12-26T15:31:53.011057052Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:31:53.017383764Z","level":"INFO","msg":"handleInformInit: received","streamId":"wxs32uu8","id":"1(@)"}
+{"time":"2025-12-26T15:31:53.176090883Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"wxs32uu8","id":"1(@)"}
+{"time":"2025-12-26T15:31:56.103528174Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:31:56.103608964Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:31:56.103654139Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:31:56.103666347Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:31:56.103767972Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-138465-138539-4238173929/socket","Net":"unix"}}
+{"time":"2025-12-26T15:31:56.539431271Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:31:56.539481282Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:31:56.539506784Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..8ee17b3b9014e69e26cc48cdf4c36433010069ad
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:31:53.017531144Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:31:53.175841849Z","level":"INFO","msg":"stream: created new stream","id":"wxs32uu8"}
+{"time":"2025-12-26T15:31:53.175922557Z","level":"INFO","msg":"handler: started","stream_id":"wxs32uu8"}
+{"time":"2025-12-26T15:31:53.176076095Z","level":"INFO","msg":"stream: started","id":"wxs32uu8"}
+{"time":"2025-12-26T15:31:53.176100332Z","level":"INFO","msg":"writer: started","stream_id":"wxs32uu8"}
+{"time":"2025-12-26T15:31:53.176110819Z","level":"INFO","msg":"sender: started","stream_id":"wxs32uu8"}
+{"time":"2025-12-26T15:31:56.103613554Z","level":"INFO","msg":"stream: closing","id":"wxs32uu8"}
+{"time":"2025-12-26T15:31:56.401489323Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:31:56.538246836Z","level":"INFO","msg":"handler: closed","stream_id":"wxs32uu8"}
+{"time":"2025-12-26T15:31:56.538337251Z","level":"INFO","msg":"sender: closed","stream_id":"wxs32uu8"}
+{"time":"2025-12-26T15:31:56.538352318Z","level":"INFO","msg":"stream: closed","id":"wxs32uu8"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..a372bff419740281f1702364af9ed749fca71034
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_setup.py:_flush():80] Configure stats pid to 138465
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug.log
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-internal.log
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:31:52,737 INFO    MainThread:138465 [wandb_init.py:init():889] starting backend
+2025-12-26 15:31:53,010 INFO    MainThread:138465 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:31:53,015 INFO    MainThread:138465 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:31:53,017 INFO    MainThread:138465 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:31:53,018 INFO    MainThread:138465 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:31:53,479 INFO    MainThread:138465 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:31:53,592 INFO    MainThread:138465 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:31:53,592 INFO    MainThread:138465 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:31:53,592 INFO    MainThread:138465 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:31:53,592 INFO    MainThread:138465 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:31:53,597 INFO    MainThread:138465 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:31:56,103 INFO    wandb-AsyncioManager-main:138465 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:31:56,103 INFO    wandb-AsyncioManager-main:138465 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/run-wxs32uu8.wandb b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/run-wxs32uu8.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..2bfadbc337e23f70099d89bc43df9a553caa0285
Binary files /dev/null and b/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/run-wxs32uu8.wandb differ
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..81570a3a062112657a376e1f83859627aeb7a412
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            yg288fsgj0ia8gpi3lsxxd4iicxacpwh:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "318370951168"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:33:36.434359Z"
+                writerId: yg288fsgj0ia8gpi3lsxxd4iicxacpwh
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..f569f57dac71383af06c679e4422dbd6cc2bf211
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/output.log
@@ -0,0 +1,76 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:33:39,399 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 41.90it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:33:57,664 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:34:02,022 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 42.74it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:34:17,738 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:34:18,661 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+Generating train split: 7612 examples [00:00, 76349.43 examples/s]
+2025-12-26 15:34:18,785 - INFO - Formatting train DPO data...
+Formatting train DPO data (num_proc=4): 100%|██████████████| 6850/6850 [00:02<00:00, 2809.98 examples/s]
+Filter: 100%|█████████████████████████████████████████████| 6850/6850 [00:00<00:00, 58478.08 examples/s]
+2025-12-26 15:34:21,607 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:34:21,608 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:34:21,608 - INFO - Formatting eval DPO data...
+Formatting eval DPO data (num_proc=4): 100%|██████████████████| 762/762 [00:02<00:00, 332.47 examples/s]
+Filter: 100%|███████████████████████████████████████████████| 762/762 [00:00<00:00, 36813.75 examples/s]
+2025-12-26 15:34:24,198 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:34:24,199 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:34:24,239 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Extracting prompt in train dataset: 100%|██████████████████| 6850/6850 [00:01<00:00, 5396.61 examples/s]
+Applying chat template to train dataset: 100%|█████████████| 6850/6850 [00:00<00:00, 8653.30 examples/s]
+Tokenizing train dataset:   0%|                                         | 0/6850 [00:00<?, ? examples/s]
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1011, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 967, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1011, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 967, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..ff42988e78c2962d170699052790956677f84f05
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:33:36.434359Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "318370951168"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "yg288fsgj0ia8gpi3lsxxd4iicxacpwh"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..6522728d094978444863b741099b8461af7feae2
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_runtime":49,"_wandb":{"runtime":49}}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..e571410198ebc8ee0fb32fc08c3b6c6ace154a0b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:33:36.517152278Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmptn5phprh/port-138967.txt","pid":138967,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:33:36.517842446Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":138967}
+{"time":"2025-12-26T15:33:36.517839815Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-138967-139051-1880805868/socket","Net":"unix"}}
+{"time":"2025-12-26T15:33:36.700377241Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:33:36.707152235Z","level":"INFO","msg":"handleInformInit: received","streamId":"fb8js9es","id":"1(@)"}
+{"time":"2025-12-26T15:33:36.860726066Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"fb8js9es","id":"1(@)"}
+{"time":"2025-12-26T15:34:26.830462396Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:34:26.830539704Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:34:26.830534975Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:34:26.830623895Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:34:26.830653617Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-138967-139051-1880805868/socket","Net":"unix"}}
+{"time":"2025-12-26T15:34:27.129538377Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:34:27.129564503Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:34:27.129572669Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..59cdec62f212ddb312defd844ab5469abfa99f68
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:33:36.707301381Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:33:36.860475041Z","level":"INFO","msg":"stream: created new stream","id":"fb8js9es"}
+{"time":"2025-12-26T15:33:36.860568007Z","level":"INFO","msg":"handler: started","stream_id":"fb8js9es"}
+{"time":"2025-12-26T15:33:36.860715948Z","level":"INFO","msg":"stream: started","id":"fb8js9es"}
+{"time":"2025-12-26T15:33:36.860739836Z","level":"INFO","msg":"writer: started","stream_id":"fb8js9es"}
+{"time":"2025-12-26T15:33:36.860764109Z","level":"INFO","msg":"sender: started","stream_id":"fb8js9es"}
+{"time":"2025-12-26T15:34:26.830537011Z","level":"INFO","msg":"stream: closing","id":"fb8js9es"}
+{"time":"2025-12-26T15:34:27.027238553Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:34:27.128722564Z","level":"INFO","msg":"handler: closed","stream_id":"fb8js9es"}
+{"time":"2025-12-26T15:34:27.12882072Z","level":"INFO","msg":"sender: closed","stream_id":"fb8js9es"}
+{"time":"2025-12-26T15:34:27.128830906Z","level":"INFO","msg":"stream: closed","id":"fb8js9es"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..102181d27f2a812272ddc5466d77612c363e32e5
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:33:36,435 INFO    MainThread:138967 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:33:36,436 INFO    MainThread:138967 [wandb_setup.py:_flush():80] Configure stats pid to 138967
+2025-12-26 15:33:36,436 INFO    MainThread:138967 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:33:36,436 INFO    MainThread:138967 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:33:36,436 INFO    MainThread:138967 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:33:36,436 INFO    MainThread:138967 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug.log
+2025-12-26 15:33:36,436 INFO    MainThread:138967 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-internal.log
+2025-12-26 15:33:36,436 INFO    MainThread:138967 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:33:36,436 INFO    MainThread:138967 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:33:36,436 INFO    MainThread:138967 [wandb_init.py:init():889] starting backend
+2025-12-26 15:33:36,700 INFO    MainThread:138967 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:33:36,705 INFO    MainThread:138967 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:33:36,708 INFO    MainThread:138967 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:33:36,709 INFO    MainThread:138967 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:33:36,984 INFO    MainThread:138967 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:33:37,095 INFO    MainThread:138967 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:33:37,095 INFO    MainThread:138967 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:33:37,095 INFO    MainThread:138967 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:33:37,095 INFO    MainThread:138967 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:33:37,101 INFO    MainThread:138967 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:34:26,830 INFO    wandb-AsyncioManager-main:138967 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:34:26,830 INFO    wandb-AsyncioManager-main:138967 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/run-fb8js9es.wandb b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/run-fb8js9es.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..9188ea19432d982013d7d9d14ff9cf3ba890e917
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/run-fb8js9es.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47b1b5cb59559e68f367989a74a0cbd677bc17c04c539a4bc2448b00f2fcb402
+size 410520
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..07ca14d9954dd18bb4c3fc7015f7999b530bd00d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            47sn84x90gu1yvzd0dzhhujsd7q0za53:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "319211442176"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:35:17.840098Z"
+                writerId: 47sn84x90gu1yvzd0dzhhujsd7q0za53
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..ba1ae601d685e7cd4f277f3b60e2d26ed0099343
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/output.log
@@ -0,0 +1,32 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:35:21,001 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 42.15it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:35:36,966 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:35:41,441 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 43.35it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:35:56,856 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:35:57,883 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+2025-12-26 15:35:57,897 - INFO - Formatting train DPO data...
+2025-12-26 15:35:58,172 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:35:58,173 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:35:58,174 - INFO - Formatting eval DPO data...
+2025-12-26 15:35:58,443 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:35:58,444 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:35:58,482 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1011, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 967, in main
+    trainer = DPOTrainer(
+TypeError: DPOTrainer.__init__() got an unexpected keyword argument 'tokenizer'
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1011, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 967, in main
+    trainer = DPOTrainer(
+TypeError: DPOTrainer.__init__() got an unexpected keyword argument 'tokenizer'
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f86daa21dacffefb46289f1bd6345d525ec36f9
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:35:17.840098Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "319211442176"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "47sn84x90gu1yvzd0dzhhujsd7q0za53"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b7f6bbcb78630a18aad85b8e448b0042f42c288
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":39},"_runtime":39}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..dc8e9063129f3199bcb8e4fffc045224f2c5df4e
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:35:17.923515456Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp801gi0v4/port-140009.txt","pid":140009,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:35:17.924118644Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":140009}
+{"time":"2025-12-26T15:35:17.924098664Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-140009-140090-3920473864/socket","Net":"unix"}}
+{"time":"2025-12-26T15:35:18.106708115Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:35:18.11375188Z","level":"INFO","msg":"handleInformInit: received","streamId":"g5bybskm","id":"1(@)"}
+{"time":"2025-12-26T15:35:18.2719386Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"g5bybskm","id":"1(@)"}
+{"time":"2025-12-26T15:35:58.515524095Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:35:58.515582071Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:35:58.515610065Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:35:58.515665551Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:35:58.515768631Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-140009-140090-3920473864/socket","Net":"unix"}}
+{"time":"2025-12-26T15:35:58.815511252Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:35:58.815545738Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:35:58.815561066Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..365ff9f1504e6620cc74136d49aef9903915294c
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:35:18.113875845Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:35:18.27171914Z","level":"INFO","msg":"stream: created new stream","id":"g5bybskm"}
+{"time":"2025-12-26T15:35:18.271814736Z","level":"INFO","msg":"handler: started","stream_id":"g5bybskm"}
+{"time":"2025-12-26T15:35:18.271929571Z","level":"INFO","msg":"stream: started","id":"g5bybskm"}
+{"time":"2025-12-26T15:35:18.271959653Z","level":"INFO","msg":"writer: started","stream_id":"g5bybskm"}
+{"time":"2025-12-26T15:35:18.271964288Z","level":"INFO","msg":"sender: started","stream_id":"g5bybskm"}
+{"time":"2025-12-26T15:35:58.515614932Z","level":"INFO","msg":"stream: closing","id":"g5bybskm"}
+{"time":"2025-12-26T15:35:58.727534601Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:35:58.814678316Z","level":"INFO","msg":"handler: closed","stream_id":"g5bybskm"}
+{"time":"2025-12-26T15:35:58.814772432Z","level":"INFO","msg":"sender: closed","stream_id":"g5bybskm"}
+{"time":"2025-12-26T15:35:58.814786446Z","level":"INFO","msg":"stream: closed","id":"g5bybskm"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..d486c987f04342d891e0ae6364f564a1b0ca3c63
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:35:17,841 INFO    MainThread:140009 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:35:17,841 INFO    MainThread:140009 [wandb_setup.py:_flush():80] Configure stats pid to 140009
+2025-12-26 15:35:17,841 INFO    MainThread:140009 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:35:17,841 INFO    MainThread:140009 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:35:17,841 INFO    MainThread:140009 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:35:17,842 INFO    MainThread:140009 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug.log
+2025-12-26 15:35:17,842 INFO    MainThread:140009 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/logs/debug-internal.log
+2025-12-26 15:35:17,842 INFO    MainThread:140009 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:35:17,842 INFO    MainThread:140009 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:35:17,842 INFO    MainThread:140009 [wandb_init.py:init():889] starting backend
+2025-12-26 15:35:18,106 INFO    MainThread:140009 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:35:18,111 INFO    MainThread:140009 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:35:18,114 INFO    MainThread:140009 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:35:18,116 INFO    MainThread:140009 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:35:18,546 INFO    MainThread:140009 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:35:18,657 INFO    MainThread:140009 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:35:18,657 INFO    MainThread:140009 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:35:18,657 INFO    MainThread:140009 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:35:18,657 INFO    MainThread:140009 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:35:18,662 INFO    MainThread:140009 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:35:58,515 INFO    wandb-AsyncioManager-main:140009 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:35:58,515 INFO    wandb-AsyncioManager-main:140009 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/run-g5bybskm.wandb b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/run-g5bybskm.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..9c9eb10e84dfd70de15fb29ad533e7f1eeac89e3
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/run-g5bybskm.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5843f6a30ba69561e141413303149b3bf17dd043eaab242758e56f85e2a8eb9c
+size 396939
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c7470e5e85db5212e2bb8aed7081a95a3c985998
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            wbhk79mzrraduaf9zrubl5urrc81gfow:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "319811207168"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:37:29.335954Z"
+                writerId: wbhk79mzrraduaf9zrubl5urrc81gfow
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..d08621e5df2aa2e268613870a8f0833c6ccc0feb
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/output.log
@@ -0,0 +1,60 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:37:32,370 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 42.99it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:37:48,062 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:37:52,674 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 41.01it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:38:08,372 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:38:09,309 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+2025-12-26 15:38:09,323 - INFO - Formatting train DPO data...
+2025-12-26 15:38:09,590 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:38:09,591 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:38:09,591 - INFO - Formatting eval DPO data...
+2025-12-26 15:38:09,883 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:38:09,884 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:38:09,925 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1012, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 969, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 330, in __init__
+    processing_class = AutoProcessor.from_pretrained(model_id)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py", line 395, in from_pretrained
+    return processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1413, in from_pretrained
+    args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1524, in _get_arguments_from_pretrained
+    sub_processor = auto_processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 504, in from_pretrained
+    raise initial_exception
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 486, in from_pretrained
+    config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/image_processing_base.py", line 334, in get_image_processor_dict
+    raise OSError(
+OSError: Can't load image processor for '../../Models/Devstral-Small-2-24B-HS-CPT-SFT'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '../../Models/Devstral-Small-2-24B-HS-CPT-SFT' is the correct path to a directory containing a preprocessor_config.json file
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1012, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 969, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 330, in __init__
+    processing_class = AutoProcessor.from_pretrained(model_id)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py", line 395, in from_pretrained
+    return processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1413, in from_pretrained
+    args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1524, in _get_arguments_from_pretrained
+    sub_processor = auto_processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 504, in from_pretrained
+    raise initial_exception
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 486, in from_pretrained
+    config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/image_processing_base.py", line 334, in get_image_processor_dict
+    raise OSError(
+OSError: Can't load image processor for '../../Models/Devstral-Small-2-24B-HS-CPT-SFT'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '../../Models/Devstral-Small-2-24B-HS-CPT-SFT' is the correct path to a directory containing a preprocessor_config.json file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..591cf31961060edc67b4e49bdebe05d13fa4b51a
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:37:29.335954Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "319811207168"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "wbhk79mzrraduaf9zrubl5urrc81gfow"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b7f6bbcb78630a18aad85b8e448b0042f42c288
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":39},"_runtime":39}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..06c822aeaf4bcba29092f9505c43afdd6aeecc8d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:37:29.435465391Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpykepxwwx/port-141038.txt","pid":141038,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:37:29.43610374Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":141038}
+{"time":"2025-12-26T15:37:29.43609418Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-141038-141113-2012905546/socket","Net":"unix"}}
+{"time":"2025-12-26T15:37:29.618357151Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:37:29.624453312Z","level":"INFO","msg":"handleInformInit: received","streamId":"pmpxe28f","id":"1(@)"}
+{"time":"2025-12-26T15:37:29.778916194Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"pmpxe28f","id":"1(@)"}
+{"time":"2025-12-26T15:38:09.967379626Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:38:09.967454483Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:38:09.967459452Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:38:09.96756713Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:38:09.967598647Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-141038-141113-2012905546/socket","Net":"unix"}}
+{"time":"2025-12-26T15:38:10.262343094Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:38:10.262376426Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:38:10.262392957Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..d615c5c6dda7ec22f00101d44211d3ecfd9c45ac
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:37:29.624602058Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:37:29.778695552Z","level":"INFO","msg":"stream: created new stream","id":"pmpxe28f"}
+{"time":"2025-12-26T15:37:29.778776133Z","level":"INFO","msg":"handler: started","stream_id":"pmpxe28f"}
+{"time":"2025-12-26T15:37:29.778906838Z","level":"INFO","msg":"stream: started","id":"pmpxe28f"}
+{"time":"2025-12-26T15:37:29.778928819Z","level":"INFO","msg":"writer: started","stream_id":"pmpxe28f"}
+{"time":"2025-12-26T15:37:29.778955571Z","level":"INFO","msg":"sender: started","stream_id":"pmpxe28f"}
+{"time":"2025-12-26T15:38:09.967470419Z","level":"INFO","msg":"stream: closing","id":"pmpxe28f"}
+{"time":"2025-12-26T15:38:10.180310736Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:38:10.261514362Z","level":"INFO","msg":"handler: closed","stream_id":"pmpxe28f"}
+{"time":"2025-12-26T15:38:10.261605124Z","level":"INFO","msg":"sender: closed","stream_id":"pmpxe28f"}
+{"time":"2025-12-26T15:38:10.26161384Z","level":"INFO","msg":"stream: closed","id":"pmpxe28f"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..f9dd52a87db6e651ba6b5a419540602563487957
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:37:29,338 INFO    MainThread:141038 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:37:29,338 INFO    MainThread:141038 [wandb_setup.py:_flush():80] Configure stats pid to 141038
+2025-12-26 15:37:29,339 INFO    MainThread:141038 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:37:29,339 INFO    MainThread:141038 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:37:29,339 INFO    MainThread:141038 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:37:29,339 INFO    MainThread:141038 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug.log
+2025-12-26 15:37:29,339 INFO    MainThread:141038 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/logs/debug-internal.log
+2025-12-26 15:37:29,339 INFO    MainThread:141038 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:37:29,339 INFO    MainThread:141038 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:37:29,339 INFO    MainThread:141038 [wandb_init.py:init():889] starting backend
+2025-12-26 15:37:29,618 INFO    MainThread:141038 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:37:29,622 INFO    MainThread:141038 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:37:29,624 INFO    MainThread:141038 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:37:29,625 INFO    MainThread:141038 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:37:30,001 INFO    MainThread:141038 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:37:30,134 INFO    MainThread:141038 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:37:30,134 INFO    MainThread:141038 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:37:30,134 INFO    MainThread:141038 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:37:30,135 INFO    MainThread:141038 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:37:30,141 INFO    MainThread:141038 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:38:09,966 INFO    wandb-AsyncioManager-main:141038 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:38:09,966 INFO    wandb-AsyncioManager-main:141038 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/run-pmpxe28f.wandb b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/run-pmpxe28f.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..98f3176f8d89406ab363fbc66ab811c7aef67258
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/run-pmpxe28f.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f2e35e066dc4960f90181279a27714188df96ffd07e8a028d05d6424efe3e02
+size 401067
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da30122085835a5a991ba74aa721d04ee3a6c79d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            jlai83u305o14a736lgdag3ufvfafe3v:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "320268279808"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:39:49.837948Z"
+                writerId: jlai83u305o14a736lgdag3ufvfafe3v
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..6a10ac78cb734f5adfdb5fa48935f4f64a866b21
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/output.log
@@ -0,0 +1,32 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:39:53,081 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 40.93it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:40:09,258 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:40:14,109 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 41.53it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:40:29,809 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:40:30,750 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+2025-12-26 15:40:30,765 - INFO - Formatting train DPO data...
+2025-12-26 15:40:31,059 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:40:31,061 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:40:31,061 - INFO - Formatting eval DPO data...
+2025-12-26 15:40:31,328 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:40:31,328 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:40:31,362 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1014, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 970, in main
+    trainer = DPOTrainer(
+TypeError: DPOTrainer.__init__() got an unexpected keyword argument 'tokenizer'
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1014, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 970, in main
+    trainer = DPOTrainer(
+TypeError: DPOTrainer.__init__() got an unexpected keyword argument 'tokenizer'
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..31961508f64009d7463210906b88f2968c382478
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:39:49.837948Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "320268279808"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "jlai83u305o14a736lgdag3ufvfafe3v"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..f2b7ff96efd576179c1af4bf6b36aeadbff8186d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":40},"_runtime":40}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..57f3e34ecc0e542eb33d9d9e2a8788cca4b61cd4
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:39:49.923087261Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp5nmmvfbz/port-142016.txt","pid":142016,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:39:49.923704522Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":142016}
+{"time":"2025-12-26T15:39:49.923709686Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-142016-142091-3798965062/socket","Net":"unix"}}
+{"time":"2025-12-26T15:39:50.106742816Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:39:50.113453163Z","level":"INFO","msg":"handleInformInit: received","streamId":"6fxdx0d2","id":"1(@)"}
+{"time":"2025-12-26T15:39:50.267606364Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"6fxdx0d2","id":"1(@)"}
+{"time":"2025-12-26T15:40:31.414054083Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:40:31.414139331Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:40:31.414122375Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:40:31.414264776Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:40:31.414261796Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-142016-142091-3798965062/socket","Net":"unix"}}
+{"time":"2025-12-26T15:40:31.799961744Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:40:31.799997574Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:40:31.800012854Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..011e875369909cdc9b8f8c8be965d269b7dde963
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:39:50.113601485Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:39:50.267379008Z","level":"INFO","msg":"stream: created new stream","id":"6fxdx0d2"}
+{"time":"2025-12-26T15:39:50.267508186Z","level":"INFO","msg":"handler: started","stream_id":"6fxdx0d2"}
+{"time":"2025-12-26T15:39:50.267597058Z","level":"INFO","msg":"stream: started","id":"6fxdx0d2"}
+{"time":"2025-12-26T15:39:50.267647332Z","level":"INFO","msg":"writer: started","stream_id":"6fxdx0d2"}
+{"time":"2025-12-26T15:39:50.267644841Z","level":"INFO","msg":"sender: started","stream_id":"6fxdx0d2"}
+{"time":"2025-12-26T15:40:31.414142508Z","level":"INFO","msg":"stream: closing","id":"6fxdx0d2"}
+{"time":"2025-12-26T15:40:31.62032167Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:40:31.799141715Z","level":"INFO","msg":"handler: closed","stream_id":"6fxdx0d2"}
+{"time":"2025-12-26T15:40:31.799247169Z","level":"INFO","msg":"sender: closed","stream_id":"6fxdx0d2"}
+{"time":"2025-12-26T15:40:31.799262172Z","level":"INFO","msg":"stream: closed","id":"6fxdx0d2"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..a9b83d3b044d9688618f5ac3642f967eba58bf16
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:39:49,839 INFO    MainThread:142016 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:39:49,839 INFO    MainThread:142016 [wandb_setup.py:_flush():80] Configure stats pid to 142016
+2025-12-26 15:39:49,839 INFO    MainThread:142016 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:39:49,839 INFO    MainThread:142016 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:39:49,839 INFO    MainThread:142016 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:39:49,839 INFO    MainThread:142016 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug.log
+2025-12-26 15:39:49,839 INFO    MainThread:142016 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/logs/debug-internal.log
+2025-12-26 15:39:49,839 INFO    MainThread:142016 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:39:49,840 INFO    MainThread:142016 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:39:49,840 INFO    MainThread:142016 [wandb_init.py:init():889] starting backend
+2025-12-26 15:39:50,106 INFO    MainThread:142016 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:39:50,111 INFO    MainThread:142016 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:39:50,113 INFO    MainThread:142016 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:39:50,114 INFO    MainThread:142016 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:39:50,578 INFO    MainThread:142016 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:39:50,690 INFO    MainThread:142016 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:39:50,690 INFO    MainThread:142016 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:39:50,690 INFO    MainThread:142016 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:39:50,690 INFO    MainThread:142016 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:39:50,695 INFO    MainThread:142016 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:40:31,414 INFO    wandb-AsyncioManager-main:142016 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:40:31,414 INFO    wandb-AsyncioManager-main:142016 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/run-6fxdx0d2.wandb b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/run-6fxdx0d2.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..0ae5eaacacf0ab630946e161db1cf4fd52c7efd4
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/run-6fxdx0d2.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b2e2c757425096b313df3831776a61a8aa79a7ee282052070cc36f3132f3f7a
+size 397289
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4377497e16a58444abca37aa1429283d0759389
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            90vjlmu5p6jkl5ssnhu2u08q2ctox6p8:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "320575160320"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:41:44.545917Z"
+                writerId: 90vjlmu5p6jkl5ssnhu2u08q2ctox6p8
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..f23ca0e5f86ffe22568e6094d1ba740837cca5d3
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/output.log
@@ -0,0 +1,70 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:41:47,613 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 41.88it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:42:03,564 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:42:08,322 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 41.47it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:42:24,045 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:42:24,990 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+2025-12-26 15:42:25,005 - INFO - Formatting train DPO data...
+2025-12-26 15:42:25,312 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:42:25,313 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:42:25,313 - INFO - Formatting eval DPO data...
+2025-12-26 15:42:25,576 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:42:25,577 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:42:25,625 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2025-12-26 15:42:25,654 - WARNING - You passed `model_init_kwargs` to the `DPOConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.
+Tokenizing train dataset:   0%|                                         | 0/6850 [00:00<?, ? examples/s]
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1014, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 970, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 1014, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 970, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..15456d0d9c90728465354da9d49542b880b6c035
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:41:44.545917Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "320575160320"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "90vjlmu5p6jkl5ssnhu2u08q2ctox6p8"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..f2b7ff96efd576179c1af4bf6b36aeadbff8186d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":40},"_runtime":40}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..0e4894c1504e0fd663852ccd8828560cc50b5986
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:41:44.628744027Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpj7kxuaiq/port-142861.txt","pid":142861,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:41:44.629406111Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":142861}
+{"time":"2025-12-26T15:41:44.629403167Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-142861-142936-3246350941/socket","Net":"unix"}}
+{"time":"2025-12-26T15:41:44.812450159Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:41:44.819182469Z","level":"INFO","msg":"handleInformInit: received","streamId":"0ek9e5bk","id":"1(@)"}
+{"time":"2025-12-26T15:41:44.972334322Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"0ek9e5bk","id":"1(@)"}
+{"time":"2025-12-26T15:42:26.1721421Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:42:26.172243369Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:42:26.172235238Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:42:26.172318006Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:42:26.172348821Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-142861-142936-3246350941/socket","Net":"unix"}}
+{"time":"2025-12-26T15:42:26.614135794Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:42:26.614163212Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:42:26.61417638Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..9d1fd10f7f2efe5f747f438a58611cdec33e388d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:41:44.819366235Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:41:44.972082208Z","level":"INFO","msg":"stream: created new stream","id":"0ek9e5bk"}
+{"time":"2025-12-26T15:41:44.972180687Z","level":"INFO","msg":"handler: started","stream_id":"0ek9e5bk"}
+{"time":"2025-12-26T15:41:44.972323175Z","level":"INFO","msg":"stream: started","id":"0ek9e5bk"}
+{"time":"2025-12-26T15:41:44.972371443Z","level":"INFO","msg":"sender: started","stream_id":"0ek9e5bk"}
+{"time":"2025-12-26T15:41:44.972369905Z","level":"INFO","msg":"writer: started","stream_id":"0ek9e5bk"}
+{"time":"2025-12-26T15:42:26.17225508Z","level":"INFO","msg":"stream: closing","id":"0ek9e5bk"}
+{"time":"2025-12-26T15:42:26.374173287Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:42:26.613328003Z","level":"INFO","msg":"handler: closed","stream_id":"0ek9e5bk"}
+{"time":"2025-12-26T15:42:26.613433477Z","level":"INFO","msg":"sender: closed","stream_id":"0ek9e5bk"}
+{"time":"2025-12-26T15:42:26.613442172Z","level":"INFO","msg":"stream: closed","id":"0ek9e5bk"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..631ad2b58f1b385cbb9bfbb679ab1dc18ff8b2c7
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:41:44,547 INFO    MainThread:142861 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:41:44,547 INFO    MainThread:142861 [wandb_setup.py:_flush():80] Configure stats pid to 142861
+2025-12-26 15:41:44,547 INFO    MainThread:142861 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:41:44,547 INFO    MainThread:142861 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:41:44,547 INFO    MainThread:142861 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:41:44,547 INFO    MainThread:142861 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug.log
+2025-12-26 15:41:44,547 INFO    MainThread:142861 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/logs/debug-internal.log
+2025-12-26 15:41:44,547 INFO    MainThread:142861 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:41:44,547 INFO    MainThread:142861 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:41:44,548 INFO    MainThread:142861 [wandb_init.py:init():889] starting backend
+2025-12-26 15:41:44,812 INFO    MainThread:142861 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:41:44,817 INFO    MainThread:142861 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:41:44,820 INFO    MainThread:142861 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:41:44,821 INFO    MainThread:142861 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:41:45,176 INFO    MainThread:142861 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:41:45,288 INFO    MainThread:142861 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:41:45,288 INFO    MainThread:142861 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:41:45,288 INFO    MainThread:142861 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:41:45,288 INFO    MainThread:142861 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:41:45,294 INFO    MainThread:142861 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:42:26,172 INFO    wandb-AsyncioManager-main:142861 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:42:26,172 INFO    wandb-AsyncioManager-main:142861 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/run-0ek9e5bk.wandb b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/run-0ek9e5bk.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..75f47623f04568c83799e895325e295886c01f0e
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/run-0ek9e5bk.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8938253e58a76c1d30b64439aa1861a08938d86fd84ea6e322d3660d8b5ce41d
+size 402242
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7d19af2958205e9d72484700f37d37ae5486abf2
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            gp5fp03pdijn3jiuqim6z206ffjhw528:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "320952397824"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:43:34.263133Z"
+                writerId: gp5fp03pdijn3jiuqim6z206ffjhw528
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..03f792567a5c553904f34a01bb7c0b0170b0629b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/output.log
@@ -0,0 +1,76 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:43:37,394 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 41.72it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:43:53,518 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:43:58,010 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 42.23it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:44:13,776 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:44:14,735 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+2025-12-26 15:44:14,749 - INFO - Formatting train DPO data...
+Formatting train DPO data (num_proc=4): 100%|██████████████| 6850/6850 [00:02<00:00, 2959.63 examples/s]
+Filter: 100%|█████████████████████████████████████████████| 6850/6850 [00:00<00:00, 73824.98 examples/s]
+2025-12-26 15:44:17,443 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:44:17,443 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:44:17,443 - INFO - Formatting eval DPO data...
+Formatting eval DPO data (num_proc=4): 100%|██████████████████| 762/762 [00:02<00:00, 340.26 examples/s]
+Filter: 100%|███████████████████████████████████████████████| 762/762 [00:00<00:00, 38630.55 examples/s]
+2025-12-26 15:44:19,997 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:44:19,997 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:44:20,048 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2025-12-26 15:44:20,083 - WARNING - You passed `model_init_kwargs` to the `DPOConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.
+Extracting prompt in train dataset: 100%|██████████████████| 6850/6850 [00:01<00:00, 5581.69 examples/s]
+Applying chat template to train dataset: 100%|████████████| 6850/6850 [00:00<00:00, 10111.56 examples/s]
+Tokenizing train dataset:   0%|                                         | 0/6850 [00:00<?, ? examples/s]
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 982, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 938, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 982, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 938, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..c64e89223fa4cfcd669a93a1c407d7aa806df76a
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:43:34.263133Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "320952397824"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "gp5fp03pdijn3jiuqim6z206ffjhw528"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..f9e4843abb5d7f611d55c94e912ecc0e50067f04
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":47},"_runtime":47}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..ae8843a8a1c54531f7bf940f08432b1ea7ba9982
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:43:34.345866904Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpxciva727/port-143688.txt","pid":143688,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:43:34.346499227Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":143688}
+{"time":"2025-12-26T15:43:34.346496679Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-143688-143757-2344502487/socket","Net":"unix"}}
+{"time":"2025-12-26T15:43:34.529672189Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:43:34.53598054Z","level":"INFO","msg":"handleInformInit: received","streamId":"wvpf8qeo","id":"1(@)"}
+{"time":"2025-12-26T15:43:34.6939677Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"wvpf8qeo","id":"1(@)"}
+{"time":"2025-12-26T15:44:22.518902242Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:44:22.518956245Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:44:22.518957575Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:44:22.519030346Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:44:22.519077171Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-143688-143757-2344502487/socket","Net":"unix"}}
+{"time":"2025-12-26T15:44:22.811047629Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:44:22.811075648Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:44:22.811088161Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..3f2f0e330718d551242bc555d1d00f803594a39a
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:43:34.536104499Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:43:34.693746904Z","level":"INFO","msg":"stream: created new stream","id":"wvpf8qeo"}
+{"time":"2025-12-26T15:43:34.69390992Z","level":"INFO","msg":"handler: started","stream_id":"wvpf8qeo"}
+{"time":"2025-12-26T15:43:34.693958863Z","level":"INFO","msg":"stream: started","id":"wvpf8qeo"}
+{"time":"2025-12-26T15:43:34.693967546Z","level":"INFO","msg":"writer: started","stream_id":"wvpf8qeo"}
+{"time":"2025-12-26T15:43:34.693987517Z","level":"INFO","msg":"sender: started","stream_id":"wvpf8qeo"}
+{"time":"2025-12-26T15:44:22.518976106Z","level":"INFO","msg":"stream: closing","id":"wvpf8qeo"}
+{"time":"2025-12-26T15:44:22.733597908Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:44:22.810066814Z","level":"INFO","msg":"handler: closed","stream_id":"wvpf8qeo"}
+{"time":"2025-12-26T15:44:22.810166548Z","level":"INFO","msg":"sender: closed","stream_id":"wvpf8qeo"}
+{"time":"2025-12-26T15:44:22.810180457Z","level":"INFO","msg":"stream: closed","id":"wvpf8qeo"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..91f61326d3ca752d84d2991676429ac33521ecae
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:43:34,264 INFO    MainThread:143688 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:43:34,264 INFO    MainThread:143688 [wandb_setup.py:_flush():80] Configure stats pid to 143688
+2025-12-26 15:43:34,264 INFO    MainThread:143688 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:43:34,264 INFO    MainThread:143688 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:43:34,264 INFO    MainThread:143688 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:43:34,265 INFO    MainThread:143688 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug.log
+2025-12-26 15:43:34,265 INFO    MainThread:143688 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/logs/debug-internal.log
+2025-12-26 15:43:34,265 INFO    MainThread:143688 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:43:34,265 INFO    MainThread:143688 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:43:34,265 INFO    MainThread:143688 [wandb_init.py:init():889] starting backend
+2025-12-26 15:43:34,529 INFO    MainThread:143688 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:43:34,534 INFO    MainThread:143688 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:43:34,536 INFO    MainThread:143688 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:43:34,536 INFO    MainThread:143688 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:43:34,921 INFO    MainThread:143688 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:43:35,030 INFO    MainThread:143688 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:43:35,030 INFO    MainThread:143688 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:43:35,030 INFO    MainThread:143688 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:43:35,030 INFO    MainThread:143688 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:43:35,035 INFO    MainThread:143688 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:44:22,519 INFO    wandb-AsyncioManager-main:143688 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:44:22,519 INFO    wandb-AsyncioManager-main:143688 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/run-wvpf8qeo.wandb b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/run-wvpf8qeo.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..c07164525670532984651851de60afefc2dea425
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/run-wvpf8qeo.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10e2bb7fb88709bacde40c47ecf3d6c2060d9e6e9c962a03cd940152ca13a7e1
+size 410631
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1bfa9e380e3ac3cfa813f197d54b16c96731ac5e
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            vkwg4w421cdzqcahglfwssvuu5b1q8gu:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "321426280448"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:45:26.602783Z"
+                writerId: vkwg4w421cdzqcahglfwssvuu5b1q8gu
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..432d64504d908eb1f1cbc97d0807e70b27e15f3b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/output.log
@@ -0,0 +1,70 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:45:29,678 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 41.60it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:45:45,661 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:45:50,175 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 41.22it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:46:06,041 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:46:06,985 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+2025-12-26 15:46:07,000 - INFO - Formatting train DPO data...
+2025-12-26 15:46:07,281 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:46:07,282 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:46:07,283 - INFO - Formatting eval DPO data...
+2025-12-26 15:46:07,581 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:46:07,582 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:46:07,618 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2025-12-26 15:46:07,649 - WARNING - You passed `model_init_kwargs` to the `DPOConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.
+Tokenizing train dataset:   0%|                                         | 0/6850 [00:00<?, ? examples/s]
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 982, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 938, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 982, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 938, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..50d693aef5905cd18fa627fad87b18f680bfc4ae
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:45:26.602783Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "321426280448"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "vkwg4w421cdzqcahglfwssvuu5b1q8gu"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..27a3da6debdb8b89c8ee34c41b4fd70e72812d25
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":41},"_runtime":41}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..bc9779a9d0bf3680d0067cf70856f68288916b1e
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:45:26.68663644Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmprvq9b9d8/port-144557.txt","pid":144557,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:45:26.68728745Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":144557}
+{"time":"2025-12-26T15:45:26.687283169Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-144557-144641-1616249439/socket","Net":"unix"}}
+{"time":"2025-12-26T15:45:26.870074437Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:45:26.87639256Z","level":"INFO","msg":"handleInformInit: received","streamId":"q26c0nv5","id":"1(@)"}
+{"time":"2025-12-26T15:45:27.033575027Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"q26c0nv5","id":"1(@)"}
+{"time":"2025-12-26T15:46:08.203733372Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:46:08.204036586Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:46:08.204026427Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:46:08.204171571Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:46:08.204232653Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-144557-144641-1616249439/socket","Net":"unix"}}
+{"time":"2025-12-26T15:46:08.56635696Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:46:08.566388264Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:46:08.566398442Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..ec3dc3d43f8ff9e0c68fafcea73c3b31e2c9f14a
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:45:26.876521289Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:45:27.033345393Z","level":"INFO","msg":"stream: created new stream","id":"q26c0nv5"}
+{"time":"2025-12-26T15:45:27.033435831Z","level":"INFO","msg":"handler: started","stream_id":"q26c0nv5"}
+{"time":"2025-12-26T15:45:27.033566246Z","level":"INFO","msg":"stream: started","id":"q26c0nv5"}
+{"time":"2025-12-26T15:45:27.03360032Z","level":"INFO","msg":"writer: started","stream_id":"q26c0nv5"}
+{"time":"2025-12-26T15:45:27.033604326Z","level":"INFO","msg":"sender: started","stream_id":"q26c0nv5"}
+{"time":"2025-12-26T15:46:08.204026775Z","level":"INFO","msg":"stream: closing","id":"q26c0nv5"}
+{"time":"2025-12-26T15:46:08.409443264Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:46:08.565432375Z","level":"INFO","msg":"handler: closed","stream_id":"q26c0nv5"}
+{"time":"2025-12-26T15:46:08.565532154Z","level":"INFO","msg":"sender: closed","stream_id":"q26c0nv5"}
+{"time":"2025-12-26T15:46:08.565543641Z","level":"INFO","msg":"stream: closed","id":"q26c0nv5"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..42ac3d7dfd5783621d9cd1c00da0578e5269141a
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_setup.py:_flush():80] Configure stats pid to 144557
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug.log
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/logs/debug-internal.log
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:45:26,604 INFO    MainThread:144557 [wandb_init.py:init():889] starting backend
+2025-12-26 15:45:26,870 INFO    MainThread:144557 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:45:26,874 INFO    MainThread:144557 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:45:26,876 INFO    MainThread:144557 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:45:26,877 INFO    MainThread:144557 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:45:27,194 INFO    MainThread:144557 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:45:27,306 INFO    MainThread:144557 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:45:27,306 INFO    MainThread:144557 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:45:27,306 INFO    MainThread:144557 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:45:27,306 INFO    MainThread:144557 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:45:27,311 INFO    MainThread:144557 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:46:08,203 INFO    wandb-AsyncioManager-main:144557 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:46:08,203 INFO    wandb-AsyncioManager-main:144557 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/run-q26c0nv5.wandb b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/run-q26c0nv5.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..771bd8a152f6b812d015fc55a8868db56ecf5d69
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/run-q26c0nv5.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a07e0af88656e431ab6cb7240eaaa2675f2756481cbd83829b392186e283bcc
+size 402762
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..745d0568a8e09e68fa7288b4622083a18578f6e2
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            3ji02qitqhf5x4smg5nhdhm1umg80vpy:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "321918668800"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:48:23.847355Z"
+                writerId: 3ji02qitqhf5x4smg5nhdhm1umg80vpy
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..a7d6158bf224fb4a4f868c9a397945021a844dc2
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/output.log
@@ -0,0 +1,61 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:48:27,098 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 41.44it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:48:43,194 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:48:47,700 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 42.16it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:49:03,374 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:49:04,423 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+2025-12-26 15:49:04,439 - INFO - Formatting train DPO data...
+2025-12-26 15:49:04,752 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:49:04,753 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:49:04,753 - INFO - Formatting eval DPO data...
+2025-12-26 15:49:05,027 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:49:05,027 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:49:05,064 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2025-12-26 15:49:05,093 - WARNING - You passed `model_init_kwargs` to the `DPOConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 980, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 937, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 330, in __init__
+    processing_class = AutoProcessor.from_pretrained(model_id)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py", line 395, in from_pretrained
+    return processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1413, in from_pretrained
+    args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1524, in _get_arguments_from_pretrained
+    sub_processor = auto_processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 504, in from_pretrained
+    raise initial_exception
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 486, in from_pretrained
+    config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/image_processing_base.py", line 334, in get_image_processor_dict
+    raise OSError(
+OSError: Can't load image processor for '../../Models/Devstral-Small-2-24B-HS-CPT-SFT'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '../../Models/Devstral-Small-2-24B-HS-CPT-SFT' is the correct path to a directory containing a preprocessor_config.json file
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 980, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 937, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 330, in __init__
+    processing_class = AutoProcessor.from_pretrained(model_id)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py", line 395, in from_pretrained
+    return processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1413, in from_pretrained
+    args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1524, in _get_arguments_from_pretrained
+    sub_processor = auto_processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 504, in from_pretrained
+    raise initial_exception
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 486, in from_pretrained
+    config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/image_processing_base.py", line 334, in get_image_processor_dict
+    raise OSError(
+OSError: Can't load image processor for '../../Models/Devstral-Small-2-24B-HS-CPT-SFT'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '../../Models/Devstral-Small-2-24B-HS-CPT-SFT' is the correct path to a directory containing a preprocessor_config.json file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..dda3573301391a086531f9e0c74a10be6d22b033
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:48:23.847355Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "321918668800"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "3ji02qitqhf5x4smg5nhdhm1umg80vpy"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..f2b7ff96efd576179c1af4bf6b36aeadbff8186d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":40},"_runtime":40}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..ae9f328b8d3e0cdd581f8cb819699c08951a5f0f
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:48:23.937318911Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpggytnynn/port-145706.txt","pid":145706,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:48:23.938035931Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":145706}
+{"time":"2025-12-26T15:48:23.938023027Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-145706-145763-3440564257/socket","Net":"unix"}}
+{"time":"2025-12-26T15:48:24.119099105Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:48:24.125585319Z","level":"INFO","msg":"handleInformInit: received","streamId":"csl0hdpv","id":"1(@)"}
+{"time":"2025-12-26T15:48:24.282664117Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"csl0hdpv","id":"1(@)"}
+{"time":"2025-12-26T15:49:05.103655255Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:49:05.104161534Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:49:05.104286779Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:49:05.104189677Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:49:05.104500474Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-145706-145763-3440564257/socket","Net":"unix"}}
+{"time":"2025-12-26T15:49:05.41489096Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:49:05.414925712Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:49:05.414935603Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..8658087a31e4ba8ae147c9aae4166b4b881619fc
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:48:24.125714482Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:48:24.282329868Z","level":"INFO","msg":"stream: created new stream","id":"csl0hdpv"}
+{"time":"2025-12-26T15:48:24.282441971Z","level":"INFO","msg":"handler: started","stream_id":"csl0hdpv"}
+{"time":"2025-12-26T15:48:24.282651355Z","level":"INFO","msg":"stream: started","id":"csl0hdpv"}
+{"time":"2025-12-26T15:48:24.282683713Z","level":"INFO","msg":"writer: started","stream_id":"csl0hdpv"}
+{"time":"2025-12-26T15:48:24.282719634Z","level":"INFO","msg":"sender: started","stream_id":"csl0hdpv"}
+{"time":"2025-12-26T15:49:05.104099415Z","level":"INFO","msg":"stream: closing","id":"csl0hdpv"}
+{"time":"2025-12-26T15:49:05.302585562Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:49:05.414098773Z","level":"INFO","msg":"handler: closed","stream_id":"csl0hdpv"}
+{"time":"2025-12-26T15:49:05.414179255Z","level":"INFO","msg":"sender: closed","stream_id":"csl0hdpv"}
+{"time":"2025-12-26T15:49:05.414188341Z","level":"INFO","msg":"stream: closed","id":"csl0hdpv"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..e8d06bb8c0a06d1d7c7b530140416a36543085e5
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:48:23,848 INFO    MainThread:145706 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:48:23,849 INFO    MainThread:145706 [wandb_setup.py:_flush():80] Configure stats pid to 145706
+2025-12-26 15:48:23,849 INFO    MainThread:145706 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:48:23,849 INFO    MainThread:145706 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:48:23,849 INFO    MainThread:145706 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:48:23,849 INFO    MainThread:145706 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug.log
+2025-12-26 15:48:23,849 INFO    MainThread:145706 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/logs/debug-internal.log
+2025-12-26 15:48:23,849 INFO    MainThread:145706 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:48:23,849 INFO    MainThread:145706 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:48:23,849 INFO    MainThread:145706 [wandb_init.py:init():889] starting backend
+2025-12-26 15:48:24,119 INFO    MainThread:145706 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:48:24,123 INFO    MainThread:145706 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:48:24,125 INFO    MainThread:145706 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:48:24,126 INFO    MainThread:145706 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:48:24,633 INFO    MainThread:145706 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:48:24,746 INFO    MainThread:145706 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:48:24,746 INFO    MainThread:145706 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:48:24,746 INFO    MainThread:145706 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:48:24,746 INFO    MainThread:145706 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:48:24,751 INFO    MainThread:145706 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:49:05,103 INFO    wandb-AsyncioManager-main:145706 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:49:05,103 INFO    wandb-AsyncioManager-main:145706 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/run-csl0hdpv.wandb b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/run-csl0hdpv.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..06f24622b8e4666f19f2161450b84133e2b8bffb
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/run-csl0hdpv.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddcf923b86423dce44cd8c77ff1df1c56fcdfaec68a724d8dea8e970e146216b
+size 402336
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f7e1f376c8ab0da981842c1c85122b5b4966dad9
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            ameihbcwlgcy98oe1dp5374w3z3zxy65:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "322263662592"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:50:25.807997Z"
+                writerId: ameihbcwlgcy98oe1dp5374w3z3zxy65
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..f0d75156a085ddaa5068457aafb3721d81f945df
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/output.log
@@ -0,0 +1,70 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:50:29,039 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 41.61it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:50:45,063 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:50:49,556 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 42.90it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:51:05,069 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:51:06,031 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+2025-12-26 15:51:06,046 - INFO - Formatting train DPO data...
+2025-12-26 15:51:06,324 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:51:06,324 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:51:06,325 - INFO - Formatting eval DPO data...
+2025-12-26 15:51:06,578 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:51:06,579 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:51:06,614 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2025-12-26 15:51:06,644 - WARNING - You passed `model_init_kwargs` to the `DPOConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.
+Tokenizing train dataset:   0%|                                         | 0/6850 [00:00<?, ? examples/s]
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 981, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 937, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 981, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 937, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
+    train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
+    dataset = dataset.map(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
+    for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
+    for i, example in iter_outputs(shard_iterable):
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
+    yield i, apply_function(example, i, offset=offset)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
+    processor, tokenizer = processing_class, processing_class.tokenizer  # the processing class is a processor
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8951d1c557f596001aedbf9ff0fb1848611335be
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:50:25.807997Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "322263662592"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "ameihbcwlgcy98oe1dp5374w3z3zxy65"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..8323111ffe74c507a370d240910b88c091062665
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_runtime":40,"_wandb":{"runtime":40}}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..613ca903bc43ac3b5376a69196c04c583cad347b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:50:25.898004094Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmphdfrtc1r/port-146588.txt","pid":146588,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:50:25.898720751Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":146588}
+{"time":"2025-12-26T15:50:25.89871642Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-146588-146667-1489497856/socket","Net":"unix"}}
+{"time":"2025-12-26T15:50:26.080304648Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:50:26.086964394Z","level":"INFO","msg":"handleInformInit: received","streamId":"xzbi1gai","id":"1(@)"}
+{"time":"2025-12-26T15:50:26.260493848Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xzbi1gai","id":"1(@)"}
+{"time":"2025-12-26T15:51:07.191002727Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:51:07.191079395Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:51:07.191096732Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:51:07.19114494Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:51:07.191300523Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-146588-146667-1489497856/socket","Net":"unix"}}
+{"time":"2025-12-26T15:51:07.493519957Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:51:07.493558655Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:51:07.493571377Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..6a7296a434c0d2d34480e5d3c90f3a5933b3329a
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:50:26.087097028Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:50:26.260270879Z","level":"INFO","msg":"stream: created new stream","id":"xzbi1gai"}
+{"time":"2025-12-26T15:50:26.260348997Z","level":"INFO","msg":"handler: started","stream_id":"xzbi1gai"}
+{"time":"2025-12-26T15:50:26.260484421Z","level":"INFO","msg":"stream: started","id":"xzbi1gai"}
+{"time":"2025-12-26T15:50:26.260505711Z","level":"INFO","msg":"writer: started","stream_id":"xzbi1gai"}
+{"time":"2025-12-26T15:50:26.260529722Z","level":"INFO","msg":"sender: started","stream_id":"xzbi1gai"}
+{"time":"2025-12-26T15:51:07.191085313Z","level":"INFO","msg":"stream: closing","id":"xzbi1gai"}
+{"time":"2025-12-26T15:51:07.38941105Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:51:07.492694909Z","level":"INFO","msg":"handler: closed","stream_id":"xzbi1gai"}
+{"time":"2025-12-26T15:51:07.492797828Z","level":"INFO","msg":"sender: closed","stream_id":"xzbi1gai"}
+{"time":"2025-12-26T15:51:07.492811559Z","level":"INFO","msg":"stream: closed","id":"xzbi1gai"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..8c37b4008b11f888ad2b4834196dcb35004bba34
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:50:25,809 INFO    MainThread:146588 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:50:25,809 INFO    MainThread:146588 [wandb_setup.py:_flush():80] Configure stats pid to 146588
+2025-12-26 15:50:25,809 INFO    MainThread:146588 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:50:25,809 INFO    MainThread:146588 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:50:25,809 INFO    MainThread:146588 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:50:25,809 INFO    MainThread:146588 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug.log
+2025-12-26 15:50:25,809 INFO    MainThread:146588 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/logs/debug-internal.log
+2025-12-26 15:50:25,810 INFO    MainThread:146588 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:50:25,810 INFO    MainThread:146588 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:50:25,810 INFO    MainThread:146588 [wandb_init.py:init():889] starting backend
+2025-12-26 15:50:26,080 INFO    MainThread:146588 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:50:26,085 INFO    MainThread:146588 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:50:26,087 INFO    MainThread:146588 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:50:26,087 INFO    MainThread:146588 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:50:26,527 INFO    MainThread:146588 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:50:26,634 INFO    MainThread:146588 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:50:26,634 INFO    MainThread:146588 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:50:26,634 INFO    MainThread:146588 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:50:26,634 INFO    MainThread:146588 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:50:26,639 INFO    MainThread:146588 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:51:07,191 INFO    wandb-AsyncioManager-main:146588 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:51:07,191 INFO    wandb-AsyncioManager-main:146588 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/run-xzbi1gai.wandb b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/run-xzbi1gai.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..06a717650f1f648efc12e85931d56d877f6e49bf
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/run-xzbi1gai.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acf1f75c8f6f6945a7a0f6dc9504b3334f24638c6bcbcb7da4deb77ebae4fb55
+size 402565
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/config.yaml b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ed459e4e2445dde0c36f36ff474975642145259a
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/config.yaml
@@ -0,0 +1,165 @@
+_wandb:
+    value:
+        cli_version: 0.23.1
+        e:
+            sjavwrxaai633ke1bkz4snjioedvdxbv:
+                args:
+                    - --config
+                    - config_dpo.yaml
+                codePath: run_dpo.py
+                codePathLocal: run_dpo.py
+                cpu_count: 12
+                cpu_count_logical: 24
+                cudaVersion: "13.0"
+                disk:
+                    /:
+                        total: "791251738624"
+                        used: "322491088896"
+                email: shaiksirajuddin9949@gmail.com
+                executable: /workspace/llm_finetuning_env/bin/python
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
+                host: a100-2gpu-shell-session-757d587799-mfdvv
+                memory:
+                    total: "359047892992"
+                os: Linux-6.12.46+-x86_64-with-glibc2.35
+                program: /workspace/trainer-kit/DPO/run_dpo.py
+                python: CPython 3.10.12
+                root: runs/dpo_run_24b_v1
+                startedAt: "2025-12-26T15:52:04.244333Z"
+                writerId: sjavwrxaai633ke1bkz4snjioedvdxbv
+        m: []
+        python_version: 3.10.12
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 51
+                - 71
+                - 84
+                - 98
+            "3":
+                - 15
+                - 16
+            "4": 3.10.12
+            "5": 0.23.1
+            "6": 5.0.0.dev0
+            "12": 0.23.1
+            "13": linux-x86_64
+data:
+    value:
+        chosen_field: chosen
+        eval_jsonl: null
+        eval_split_ratio: 0.1
+        format_type: chatml
+        max_length: 2048
+        num_proc: 4
+        prompt_field: prompt
+        rejected_field: rejected
+        score_field: f1_score
+        shuffle: true
+        system_prompt: |
+            You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
+
+            ## Output Format
+
+            ##OUTPUT
+            Explain the data flow and why each component must change:
+            - Flow: [Input → Processing → Output with arrows]
+            - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
+            - Explain coupling between components
+
+            ##SELECT
+            modify::crates/path/to/file.rs::impl::ComponentName
+            add::crates/another/file.rs::function::AnotherComponent
+            <EOS>
+
+            ## Rules
+
+            1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
+            2. Use `::` for nested items: `status::StructName::Type::Name`
+            3. Always explain "must change because" and "without this"
+            3. Types of components: function, struct, enum, impl, trait
+            4. If there is extra information (e.g., enum variants), include that too.
+            5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
+        train_jsonl: dpo_pairs_generated.jsonl
+dpo:
+    value:
+        beta: 0.1
+        label_smoothing: 0
+        loss_type: sigmoid
+        reference_free: false
+        use_reference_model: true
+model:
+    value:
+        attn_implementation: null
+        base_local_dir: base_model
+        bnb_4bit_compute_dtype: bfloat16
+        bnb_4bit_quant_type: nf4
+        bnb_4bit_use_double_quant: false
+        device_map: auto
+        repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
+        revision: null
+        tokenizer_use_fast: true
+        torch_dtype: bfloat16
+        trust_remote_code: true
+        use_4bit: false
+peft:
+    value:
+        bias: none
+        enabled: true
+        lora_alpha: 32
+        lora_dropout: 0.05
+        r: 16
+        target_modules: auto
+run_dir:
+    value: runs/dpo_run_24b_v1
+train:
+    value:
+        early_stopping:
+            enabled: true
+            metric: eval_loss
+            min_delta: 0.001
+            mode: min
+            patience: 5
+        eval_steps: 25
+        evaluation_strategy: steps
+        gradient_accumulation_steps: 8
+        gradient_checkpointing: true
+        learning_rate: "5e-5"
+        load_best_model_at_end: true
+        logging_steps: 2
+        lr_scheduler_type: cosine
+        max_grad_norm: 1
+        num_train_epochs: 3
+        optim: adamw_torch
+        per_device_eval_batch_size: 1
+        per_device_train_batch_size: 1
+        resume_from_checkpoint: auto
+        save_steps: 100
+        save_strategy: steps
+        save_total_limit: 10
+        warmup_ratio: 0.1
+        weight_decay: 0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/output.log b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..a1a13ecf250b0b4c9612969d19f5d0daa8984889
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/output.log
@@ -0,0 +1,61 @@
+Wandb initialized: project='dpo-training', name='auto-generated'
+2025-12-26 15:52:07,378 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:14<00:00, 40.85it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:52:23,613 - INFO - Ensuring all parameters are materialized...
+Loading reference model (frozen copy)...
+2025-12-26 15:52:28,208 - INFO - Detected Mistral3 model architecture, loading with specific class
+Loading weights: 100%|█| 585/585 [00:13<00:00, 42.68it/s, Materializing param=model.vision_tower.transfo
+2025-12-26 15:52:43,800 - INFO - Ensuring all parameters are materialized...
+Reference model loaded and frozen
+2025-12-26 15:52:44,876 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
+2025-12-26 15:52:44,891 - INFO - Formatting train DPO data...
+2025-12-26 15:52:45,166 - INFO - Train dataset after filtering: 6850 examples
+2025-12-26 15:52:45,167 - INFO - train dataset validation passed: 6850 examples
+2025-12-26 15:52:45,167 - INFO - Formatting eval DPO data...
+2025-12-26 15:52:45,419 - INFO - Eval dataset after filtering: 762 examples
+2025-12-26 15:52:45,419 - INFO - eval dataset validation passed: 762 examples
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+Early stopping enabled: patience=5, min_delta=0.001
+2025-12-26 15:52:45,470 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
+warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2025-12-26 15:52:45,498 - WARNING - You passed `model_init_kwargs` to the `DPOConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 981, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 938, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 330, in __init__
+    processing_class = AutoProcessor.from_pretrained(model_id)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py", line 395, in from_pretrained
+    return processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1413, in from_pretrained
+    args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1524, in _get_arguments_from_pretrained
+    sub_processor = auto_processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 504, in from_pretrained
+    raise initial_exception
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 486, in from_pretrained
+    config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/image_processing_base.py", line 334, in get_image_processor_dict
+    raise OSError(
+OSError: Can't load image processor for '../../Models/Devstral-Small-2-24B-HS-CPT-SFT'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '../../Models/Devstral-Small-2-24B-HS-CPT-SFT' is the correct path to a directory containing a preprocessor_config.json file
+Traceback (most recent call last):
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 981, in <module>
+    main()
+  File "/workspace/trainer-kit/DPO/run_dpo.py", line 938, in main
+    trainer = DPOTrainer(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 330, in __init__
+    processing_class = AutoProcessor.from_pretrained(model_id)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py", line 395, in from_pretrained
+    return processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1413, in from_pretrained
+    args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/processing_utils.py", line 1524, in _get_arguments_from_pretrained
+    sub_processor = auto_processor_class.from_pretrained(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 504, in from_pretrained
+    raise initial_exception
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 486, in from_pretrained
+    config_dict, _ = ImageProcessingMixin.get_image_processor_dict(
+  File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/image_processing_base.py", line 334, in get_image_processor_dict
+    raise OSError(
+OSError: Can't load image processor for '../../Models/Devstral-Small-2-24B-HS-CPT-SFT'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '../../Models/Devstral-Small-2-24B-HS-CPT-SFT' is the correct path to a directory containing a preprocessor_config.json file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/requirements.txt b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a4241d8724f018c9bdfcd7c289f1f14578574b
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/requirements.txt
@@ -0,0 +1,104 @@
+exceptiongroup==1.3.1
+wheel==0.45.1
+python-dateutil==2.9.0.post0
+nvidia-ml-py==13.580.82
+huggingface_hub==1.2.3
+idna==3.11
+click==8.3.1
+numpy==2.2.6
+httpx==0.28.1
+tokenizers==0.22.1
+sympy==1.13.1
+yarl==1.22.0
+async-timeout==5.0.1
+datasets==4.4.2
+platformdirs==4.5.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+smmap==5.0.2
+accelerate==1.12.0
+requests==2.32.5
+aiohttp==3.13.2
+bitsandbytes==0.49.0
+nvidia-cublas-cu12==12.1.3.1
+mpmath==1.3.0
+torchaudio==2.5.1+cu121
+nvidia-cuda-runtime-cu12==12.1.105
+typing-inspection==0.4.2
+GitPython==3.1.45
+xxhash==3.6.0
+nvidia-cusolver-cu12==11.4.5.107
+pydantic_core==2.41.5
+six==1.17.0
+torchvision==0.20.1+cu121
+typing_extensions==4.15.0
+triton==3.1.0
+charset-normalizer==3.4.4
+nvitop==1.6.1
+wandb==0.23.1
+regex==2025.11.3
+pip==25.3
+nvidia-cusparse-cu12==12.1.0.106
+pytz==2025.2
+Jinja2==3.1.6
+psutil==7.2.0
+pillow==12.0.0
+packaging==25.0
+safetensors==0.7.0
+sentry-sdk==2.48.0
+gitdb==4.0.12
+httpcore==1.0.9
+setuptools==80.9.0
+nvidia-cufft-cu12==11.0.2.54
+anyio==4.12.0
+transformers==5.0.0.dev0
+pydantic==2.12.5
+fsspec==2025.10.0
+filelock==3.20.0
+PyYAML==6.0.3
+hf-xet==1.2.0
+nvidia-cudnn-cu12==9.1.0.70
+tqdm==4.67.1
+MarkupSafe==2.1.5
+attrs==25.4.0
+nvidia-cuda-nvrtc-cu12==12.1.105
+peft==0.18.0
+aiohappyeyeballs==2.6.1
+networkx==3.4.2
+nvidia-nvjitlink-cu12==12.9.86
+certifi==2025.11.12
+pyarrow==22.0.0
+dill==0.4.0
+protobuf==6.33.2
+aiosignal==1.4.0
+frozenlist==1.8.0
+urllib3==2.6.2
+propcache==0.4.1
+tzdata==2025.3
+pandas==2.3.3
+annotated-types==0.7.0
+shellingham==1.5.4
+nvidia-nccl-cu12==2.21.5
+multidict==6.7.0
+nvidia-curand-cu12==10.3.2.106
+trl==0.26.2
+torch==2.5.1+cu121
+h11==0.16.0
+multiprocess==0.70.18
+typer-slim==0.21.0
+wheel==0.45.1
+tomli==2.0.1
+autocommand==2.2.2
+jaraco.context==5.3.0
+zipp==3.19.2
+packaging==24.2
+inflect==7.3.1
+typing_extensions==4.12.2
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+jaraco.collections==5.1.0
+jaraco.text==3.12.1
+backports.tarfile==1.2.0
+more-itertools==10.3.0
+importlib_metadata==8.0.0
+typeguard==4.3.0
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/wandb-metadata.json b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..9ee3e409b3503c09332fd2e61cc43f9255f95baa
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/wandb-metadata.json
@@ -0,0 +1,47 @@
+{
+  "os":  "Linux-6.12.46+-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.12",
+  "startedAt":  "2025-12-26T15:52:04.244333Z",
+  "args":  [
+    "--config",
+    "config_dpo.yaml"
+  ],
+  "program":  "/workspace/trainer-kit/DPO/run_dpo.py",
+  "codePath":  "run_dpo.py",
+  "codePathLocal":  "run_dpo.py",
+  "email":  "shaiksirajuddin9949@gmail.com",
+  "root":  "runs/dpo_run_24b_v1",
+  "host":  "a100-2gpu-shell-session-757d587799-mfdvv",
+  "executable":  "/workspace/llm_finetuning_env/bin/python",
+  "cpu_count":  12,
+  "cpu_count_logical":  24,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "791251738624",
+      "used":  "322491088896"
+    }
+  },
+  "memory":  {
+    "total":  "359047892992"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
+    }
+  ],
+  "cudaVersion":  "13.0",
+  "writerId":  "sjavwrxaai633ke1bkz4snjioedvdxbv"
+}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/wandb-summary.json b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..f2b7ff96efd576179c1af4bf6b36aeadbff8186d
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":40},"_runtime":40}
\ No newline at end of file
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug-core.log b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..6ab90f11167978348542ba1569d49f2499030747
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-12-26T15:52:04.330407339Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpo_8110fk/port-147336.txt","pid":147336,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-26T15:52:04.331009824Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":147336}
+{"time":"2025-12-26T15:52:04.331007282Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-147336-147405-2752087646/socket","Net":"unix"}}
+{"time":"2025-12-26T15:52:04.513826014Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-26T15:52:04.520093766Z","level":"INFO","msg":"handleInformInit: received","streamId":"00msx40b","id":"1(@)"}
+{"time":"2025-12-26T15:52:04.677671614Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"00msx40b","id":"1(@)"}
+{"time":"2025-12-26T15:52:45.508962371Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-12-26T15:52:45.509021787Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-12-26T15:52:45.509073387Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-12-26T15:52:45.509093381Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-12-26T15:52:45.509238591Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-147336-147405-2752087646/socket","Net":"unix"}}
+{"time":"2025-12-26T15:52:45.822991281Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-12-26T15:52:45.823016099Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-12-26T15:52:45.823028271Z","level":"INFO","msg":"server is closed"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug-internal.log b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..5f36f167704caa7abd99a8abed599c6bacfb1641
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug-internal.log
@@ -0,0 +1,11 @@
+{"time":"2025-12-26T15:52:04.520208686Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
+{"time":"2025-12-26T15:52:04.677441728Z","level":"INFO","msg":"stream: created new stream","id":"00msx40b"}
+{"time":"2025-12-26T15:52:04.677538624Z","level":"INFO","msg":"handler: started","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:04.677662488Z","level":"INFO","msg":"stream: started","id":"00msx40b"}
+{"time":"2025-12-26T15:52:04.677684998Z","level":"INFO","msg":"writer: started","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:04.677696651Z","level":"INFO","msg":"sender: started","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:45.509029743Z","level":"INFO","msg":"stream: closing","id":"00msx40b"}
+{"time":"2025-12-26T15:52:45.704898985Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-12-26T15:52:45.822151941Z","level":"INFO","msg":"handler: closed","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:45.822254749Z","level":"INFO","msg":"sender: closed","stream_id":"00msx40b"}
+{"time":"2025-12-26T15:52:45.822266001Z","level":"INFO","msg":"stream: closed","id":"00msx40b"}
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug.log b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..997f3b39afc3e408f531cd3962917493439bd82e
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug.log
@@ -0,0 +1,23 @@
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Configure stats pid to 147336
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug.log
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug-internal.log
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:init():841] calling init triggers
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
+config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
+2025-12-26 15:52:04,246 INFO    MainThread:147336 [wandb_init.py:init():889] starting backend
+2025-12-26 15:52:04,513 INFO    MainThread:147336 [wandb_init.py:init():892] sending inform_init request
+2025-12-26 15:52:04,518 INFO    MainThread:147336 [wandb_init.py:init():900] backend started and connected
+2025-12-26 15:52:04,520 INFO    MainThread:147336 [wandb_init.py:init():970] updated telemetry
+2025-12-26 15:52:04,521 INFO    MainThread:147336 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
+2025-12-26 15:52:04,863 INFO    MainThread:147336 [wandb_init.py:init():1041] starting run threads in backend
+2025-12-26 15:52:04,981 INFO    MainThread:147336 [wandb_run.py:_console_start():2521] atexit reg
+2025-12-26 15:52:04,981 INFO    MainThread:147336 [wandb_run.py:_redirect():2369] redirect: wrap_raw
+2025-12-26 15:52:04,981 INFO    MainThread:147336 [wandb_run.py:_redirect():2438] Wrapping output streams.
+2025-12-26 15:52:04,981 INFO    MainThread:147336 [wandb_run.py:_redirect():2461] Redirects installed.
+2025-12-26 15:52:04,987 INFO    MainThread:147336 [wandb_init.py:init():1081] run started, returning control to user process
+2025-12-26 15:52:45,509 INFO    wandb-AsyncioManager-main:147336 [service_client.py:_forward_responses():80] Reached EOF.
+2025-12-26 15:52:45,509 INFO    wandb-AsyncioManager-main:147336 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
diff --git a/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/run-00msx40b.wandb b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/run-00msx40b.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..4e71ed39868c52f663622387354b9375048d983f
--- /dev/null
+++ b/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/run-00msx40b.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d9ddb9b46044f6b63eeca0854b01878b951da3fa7deb9395cf897d34e893057
+size 401545