SirajRLX commited on
Commit
f2bce21
·
verified ·
1 Parent(s): 6d55b8f

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. dpo_run_24b_v1/config_resolved.yaml +93 -0
  3. dpo_run_24b_v1/wandb/debug-internal.log +11 -0
  4. dpo_run_24b_v1/wandb/debug.log +23 -0
  5. dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/config.yaml +165 -0
  6. dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/output.log +44 -0
  7. dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/requirements.txt +104 -0
  8. dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-metadata.json +47 -0
  9. dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-summary.json +1 -0
  10. dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-core.log +14 -0
  11. dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-internal.log +11 -0
  12. dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug.log +23 -0
  13. dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/run-ymohys1q.wandb +0 -0
  14. dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/config.yaml +165 -0
  15. dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/output.log +44 -0
  16. dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/requirements.txt +104 -0
  17. dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-metadata.json +47 -0
  18. dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-summary.json +1 -0
  19. dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-core.log +14 -0
  20. dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-internal.log +11 -0
  21. dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug.log +23 -0
  22. dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/run-spwc3b4a.wandb +0 -0
  23. dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/config.yaml +165 -0
  24. dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/output.log +44 -0
  25. dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/requirements.txt +104 -0
  26. dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-metadata.json +47 -0
  27. dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-summary.json +1 -0
  28. dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-core.log +14 -0
  29. dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-internal.log +11 -0
  30. dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug.log +23 -0
  31. dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/run-uo02exvi.wandb +0 -0
  32. dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/config.yaml +165 -0
  33. dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/output.log +45 -0
  34. dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/requirements.txt +104 -0
  35. dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-metadata.json +47 -0
  36. dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-summary.json +1 -0
  37. dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-core.log +14 -0
  38. dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-internal.log +11 -0
  39. dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug.log +23 -0
  40. dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/run-wxs32uu8.wandb +0 -0
  41. dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/config.yaml +165 -0
  42. dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/output.log +76 -0
  43. dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/requirements.txt +104 -0
  44. dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-metadata.json +47 -0
  45. dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-summary.json +1 -0
  46. dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-core.log +14 -0
  47. dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-internal.log +11 -0
  48. dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug.log +23 -0
  49. dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/run-fb8js9es.wandb +3 -0
  50. dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/config.yaml +165 -0
.gitattributes CHANGED
@@ -59,3 +59,13 @@ grpo_qwen_14b/best_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
59
  grpo_qwen_14b/checkpoints/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
60
  grpo_qwen_14b/checkpoints/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
61
  grpo_qwen_14b/wandb/run-20251227_194423-jz7bptqa/run-jz7bptqa.wandb filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
59
  grpo_qwen_14b/checkpoints/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
60
  grpo_qwen_14b/checkpoints/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
61
  grpo_qwen_14b/wandb/run-20251227_194423-jz7bptqa/run-jz7bptqa.wandb filter=lfs diff=lfs merge=lfs -text
62
+ dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/run-fb8js9es.wandb filter=lfs diff=lfs merge=lfs -text
63
+ dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/run-g5bybskm.wandb filter=lfs diff=lfs merge=lfs -text
64
+ dpo_run_24b_v1/wandb/run-20251226_153729-pmpxe28f/run-pmpxe28f.wandb filter=lfs diff=lfs merge=lfs -text
65
+ dpo_run_24b_v1/wandb/run-20251226_153949-6fxdx0d2/run-6fxdx0d2.wandb filter=lfs diff=lfs merge=lfs -text
66
+ dpo_run_24b_v1/wandb/run-20251226_154144-0ek9e5bk/run-0ek9e5bk.wandb filter=lfs diff=lfs merge=lfs -text
67
+ dpo_run_24b_v1/wandb/run-20251226_154334-wvpf8qeo/run-wvpf8qeo.wandb filter=lfs diff=lfs merge=lfs -text
68
+ dpo_run_24b_v1/wandb/run-20251226_154526-q26c0nv5/run-q26c0nv5.wandb filter=lfs diff=lfs merge=lfs -text
69
+ dpo_run_24b_v1/wandb/run-20251226_154823-csl0hdpv/run-csl0hdpv.wandb filter=lfs diff=lfs merge=lfs -text
70
+ dpo_run_24b_v1/wandb/run-20251226_155025-xzbi1gai/run-xzbi1gai.wandb filter=lfs diff=lfs merge=lfs -text
71
+ dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/run-00msx40b.wandb filter=lfs diff=lfs merge=lfs -text
dpo_run_24b_v1/config_resolved.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run:
2
+ run_dir: ./runs/dpo_run_24b_v1
3
+ seed: 42
4
+ wandb:
5
+ enabled: true
6
+ project: dpo-training
7
+ entity: null
8
+ name: null
9
+ tags:
10
+ - dpo-lora
11
+ - preference-optimization
12
+ notes: null
13
+ model:
14
+ repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
15
+ revision: null
16
+ base_local_dir: base_model
17
+ trust_remote_code: true
18
+ tokenizer_use_fast: true
19
+ device_map: auto
20
+ torch_dtype: bfloat16
21
+ use_4bit: false
22
+ bnb_4bit_quant_type: nf4
23
+ bnb_4bit_use_double_quant: false
24
+ bnb_4bit_compute_dtype: bfloat16
25
+ attn_implementation: null
26
+ data:
27
+ train_jsonl: dpo_pairs_generated.jsonl
28
+ eval_jsonl: null
29
+ eval_split_ratio: 0.1
30
+ prompt_field: prompt
31
+ chosen_field: chosen
32
+ rejected_field: rejected
33
+ score_field: f1_score
34
+ format_type: chatml
35
+ system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\
36
+ \ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\
37
+ \ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\
38
+ \ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\
39
+ \ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\
40
+ \ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\
41
+ add::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n\
42
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\
43
+ \ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\
44
+ \ because\" and \"without this\"\n3. Types of components: function, struct, enum,\
45
+ \ impl, trait\n4. If there is extra information (e.g., enum variants), include\
46
+ \ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n"
47
+ max_length: 2048
48
+ shuffle: true
49
+ num_proc: 4
50
+ peft:
51
+ enabled: true
52
+ r: 16
53
+ lora_alpha: 32
54
+ lora_dropout: 0.05
55
+ bias: none
56
+ target_modules: auto
57
+ dpo:
58
+ beta: 0.1
59
+ label_smoothing: 0.0
60
+ loss_type: sigmoid
61
+ use_reference_model: true
62
+ reference_free: false
63
+ train:
64
+ num_train_epochs: 3
65
+ per_device_train_batch_size: 1
66
+ per_device_eval_batch_size: 1
67
+ gradient_accumulation_steps: 8
68
+ learning_rate: 5e-5
69
+ weight_decay: 0.0
70
+ warmup_ratio: 0.1
71
+ lr_scheduler_type: cosine
72
+ optim: adamw_torch
73
+ max_grad_norm: 1.0
74
+ gradient_checkpointing: true
75
+ logging_steps: 2
76
+ save_strategy: steps
77
+ save_steps: 100
78
+ save_total_limit: 10
79
+ evaluation_strategy: steps
80
+ eval_steps: 25
81
+ load_best_model_at_end: true
82
+ early_stopping:
83
+ enabled: true
84
+ patience: 5
85
+ min_delta: 0.001
86
+ metric: eval_loss
87
+ mode: min
88
+ resume_from_checkpoint: auto
89
+ merge:
90
+ enabled: true
91
+ merged_dtype: float16
92
+ max_shard_size: 2GB
93
+ output_dir: ./merged_14b_dpo_lora
dpo_run_24b_v1/wandb/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:52:04.520208686Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-26T15:52:04.677441728Z","level":"INFO","msg":"stream: created new stream","id":"00msx40b"}
3
+ {"time":"2025-12-26T15:52:04.677538624Z","level":"INFO","msg":"handler: started","stream_id":"00msx40b"}
4
+ {"time":"2025-12-26T15:52:04.677662488Z","level":"INFO","msg":"stream: started","id":"00msx40b"}
5
+ {"time":"2025-12-26T15:52:04.677684998Z","level":"INFO","msg":"writer: started","stream_id":"00msx40b"}
6
+ {"time":"2025-12-26T15:52:04.677696651Z","level":"INFO","msg":"sender: started","stream_id":"00msx40b"}
7
+ {"time":"2025-12-26T15:52:45.509029743Z","level":"INFO","msg":"stream: closing","id":"00msx40b"}
8
+ {"time":"2025-12-26T15:52:45.704898985Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-12-26T15:52:45.822151941Z","level":"INFO","msg":"handler: closed","stream_id":"00msx40b"}
10
+ {"time":"2025-12-26T15:52:45.822254749Z","level":"INFO","msg":"sender: closed","stream_id":"00msx40b"}
11
+ {"time":"2025-12-26T15:52:45.822266001Z","level":"INFO","msg":"stream: closed","id":"00msx40b"}
dpo_run_24b_v1/wandb/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_setup.py:_flush():80] Configure stats pid to 147336
3
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
5
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug.log
7
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_155204-00msx40b/logs/debug-internal.log
8
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
11
+ 2025-12-26 15:52:04,246 INFO MainThread:147336 [wandb_init.py:init():889] starting backend
12
+ 2025-12-26 15:52:04,513 INFO MainThread:147336 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-26 15:52:04,518 INFO MainThread:147336 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-26 15:52:04,520 INFO MainThread:147336 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-26 15:52:04,521 INFO MainThread:147336 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-26 15:52:04,863 INFO MainThread:147336 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-26 15:52:04,981 INFO MainThread:147336 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-26 15:52:04,981 INFO MainThread:147336 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-26 15:52:04,981 INFO MainThread:147336 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-26 15:52:04,981 INFO MainThread:147336 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-26 15:52:04,987 INFO MainThread:147336 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-26 15:52:45,509 INFO wandb-AsyncioManager-main:147336 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2025-12-26 15:52:45,509 INFO wandb-AsyncioManager-main:147336 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/config.yaml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.1
4
+ e:
5
+ 05zaxpswkd8t9j7nmlszf02dmdnkyst5:
6
+ args:
7
+ - --config
8
+ - config_dpo.yaml
9
+ codePath: run_dpo.py
10
+ codePathLocal: run_dpo.py
11
+ cpu_count: 12
12
+ cpu_count_logical: 24
13
+ cudaVersion: "13.0"
14
+ disk:
15
+ /:
16
+ total: "791251738624"
17
+ used: "315579105280"
18
+ email: shaiksirajuddin9949@gmail.com
19
+ executable: /workspace/llm_finetuning_env/bin/python
20
+ gpu: NVIDIA A100-SXM4-80GB
21
+ gpu_count: 2
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 6912
25
+ memoryTotal: "85899345920"
26
+ name: NVIDIA A100-SXM4-80GB
27
+ uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
28
+ - architecture: Ampere
29
+ cudaCores: 6912
30
+ memoryTotal: "85899345920"
31
+ name: NVIDIA A100-SXM4-80GB
32
+ uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
33
+ host: a100-2gpu-shell-session-757d587799-mfdvv
34
+ memory:
35
+ total: "359047892992"
36
+ os: Linux-6.12.46+-x86_64-with-glibc2.35
37
+ program: /workspace/trainer-kit/DPO/run_dpo.py
38
+ python: CPython 3.10.12
39
+ root: runs/dpo_run_24b_v1
40
+ startedAt: "2025-12-26T15:26:29.019842Z"
41
+ writerId: 05zaxpswkd8t9j7nmlszf02dmdnkyst5
42
+ m: []
43
+ python_version: 3.10.12
44
+ t:
45
+ "1":
46
+ - 1
47
+ - 11
48
+ - 41
49
+ - 49
50
+ - 51
51
+ - 71
52
+ - 84
53
+ - 98
54
+ "2":
55
+ - 1
56
+ - 11
57
+ - 41
58
+ - 49
59
+ - 51
60
+ - 71
61
+ - 84
62
+ - 98
63
+ "3":
64
+ - 15
65
+ - 16
66
+ "4": 3.10.12
67
+ "5": 0.23.1
68
+ "6": 5.0.0.dev0
69
+ "12": 0.23.1
70
+ "13": linux-x86_64
71
+ data:
72
+ value:
73
+ chosen_field: chosen
74
+ eval_jsonl: null
75
+ eval_split_ratio: 0.1
76
+ format_type: chatml
77
+ max_length: 2048
78
+ num_proc: 4
79
+ prompt_field: prompt
80
+ rejected_field: rejected
81
+ score_field: f1_score
82
+ shuffle: true
83
+ system_prompt: |
84
+ You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
85
+
86
+ ## Output Format
87
+
88
+ ##OUTPUT
89
+ Explain the data flow and why each component must change:
90
+ - Flow: [Input → Processing → Output with arrows]
91
+ - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
92
+ - Explain coupling between components
93
+
94
+ ##SELECT
95
+ modify::crates/path/to/file.rs::impl::ComponentName
96
+ add::crates/another/file.rs::function::AnotherComponent
97
+ <EOS>
98
+
99
+ ## Rules
100
+
101
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
102
+ 2. Use `::` for nested items: `status::StructName::Type::Name`
103
+ 3. Always explain "must change because" and "without this"
104
+ 3. Types of components: function, struct, enum, impl, trait
105
+ 4. If there is extra information (e.g., enum variants), include that too.
106
+ 5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
107
+ train_jsonl: dpo_pairs_generated.jsonl
108
+ dpo:
109
+ value:
110
+ beta: 0.1
111
+ label_smoothing: 0
112
+ loss_type: sigmoid
113
+ reference_free: false
114
+ use_reference_model: true
115
+ model:
116
+ value:
117
+ attn_implementation: null
118
+ base_local_dir: base_model
119
+ bnb_4bit_compute_dtype: bfloat16
120
+ bnb_4bit_quant_type: nf4
121
+ bnb_4bit_use_double_quant: false
122
+ device_map: auto
123
+ repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
124
+ revision: null
125
+ tokenizer_use_fast: true
126
+ torch_dtype: bfloat16
127
+ trust_remote_code: true
128
+ use_4bit: false
129
+ peft:
130
+ value:
131
+ bias: none
132
+ enabled: true
133
+ lora_alpha: 32
134
+ lora_dropout: 0.05
135
+ r: 16
136
+ target_modules: auto
137
+ run_dir:
138
+ value: runs/dpo_run_24b_v1
139
+ train:
140
+ value:
141
+ early_stopping:
142
+ enabled: true
143
+ metric: eval_loss
144
+ min_delta: 0.001
145
+ mode: min
146
+ patience: 5
147
+ eval_steps: 25
148
+ evaluation_strategy: steps
149
+ gradient_accumulation_steps: 8
150
+ gradient_checkpointing: true
151
+ learning_rate: "5e-5"
152
+ load_best_model_at_end: true
153
+ logging_steps: 2
154
+ lr_scheduler_type: cosine
155
+ max_grad_norm: 1
156
+ num_train_epochs: 3
157
+ optim: adamw_torch
158
+ per_device_eval_batch_size: 1
159
+ per_device_train_batch_size: 1
160
+ resume_from_checkpoint: auto
161
+ save_steps: 100
162
+ save_strategy: steps
163
+ save_total_limit: 10
164
+ warmup_ratio: 0.1
165
+ weight_decay: 0
dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/output.log ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Wandb initialized: project='dpo-training', name='auto-generated'
2
+ `torch_dtype` is deprecated! Use `dtype` instead!
3
+ Traceback (most recent call last):
4
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 555, in load_base_model_and_tokenizer
5
+ model = AutoModelForCausalLM.from_pretrained(
6
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
7
+ raise ValueError(
8
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
9
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
10
+
11
+ During handling of the above exception, another exception occurred:
12
+
13
+ Traceback (most recent call last):
14
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 953, in <module>
15
+ main()
16
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 744, in main
17
+ model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
18
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 568, in load_base_model_and_tokenizer
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
21
+ raise ValueError(
22
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
23
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
24
+ Traceback (most recent call last):
25
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 555, in load_base_model_and_tokenizer
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
28
+ raise ValueError(
29
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
30
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
31
+
32
+ During handling of the above exception, another exception occurred:
33
+
34
+ Traceback (most recent call last):
35
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 953, in <module>
36
+ main()
37
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 744, in main
38
+ model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
39
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 568, in load_base_model_and_tokenizer
40
+ model = AutoModelForCausalLM.from_pretrained(
41
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
42
+ raise ValueError(
43
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
44
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exceptiongroup==1.3.1
2
+ wheel==0.45.1
3
+ python-dateutil==2.9.0.post0
4
+ nvidia-ml-py==13.580.82
5
+ huggingface_hub==1.2.3
6
+ idna==3.11
7
+ click==8.3.1
8
+ numpy==2.2.6
9
+ httpx==0.28.1
10
+ tokenizers==0.22.1
11
+ sympy==1.13.1
12
+ yarl==1.22.0
13
+ async-timeout==5.0.1
14
+ datasets==4.4.2
15
+ platformdirs==4.5.1
16
+ nvidia-cuda-cupti-cu12==12.1.105
17
+ nvidia-nvtx-cu12==12.1.105
18
+ smmap==5.0.2
19
+ accelerate==1.12.0
20
+ requests==2.32.5
21
+ aiohttp==3.13.2
22
+ bitsandbytes==0.49.0
23
+ nvidia-cublas-cu12==12.1.3.1
24
+ mpmath==1.3.0
25
+ torchaudio==2.5.1+cu121
26
+ nvidia-cuda-runtime-cu12==12.1.105
27
+ typing-inspection==0.4.2
28
+ GitPython==3.1.45
29
+ xxhash==3.6.0
30
+ nvidia-cusolver-cu12==11.4.5.107
31
+ pydantic_core==2.41.5
32
+ six==1.17.0
33
+ torchvision==0.20.1+cu121
34
+ typing_extensions==4.15.0
35
+ triton==3.1.0
36
+ charset-normalizer==3.4.4
37
+ nvitop==1.6.1
38
+ wandb==0.23.1
39
+ regex==2025.11.3
40
+ pip==25.3
41
+ nvidia-cusparse-cu12==12.1.0.106
42
+ pytz==2025.2
43
+ Jinja2==3.1.6
44
+ psutil==7.2.0
45
+ pillow==12.0.0
46
+ packaging==25.0
47
+ safetensors==0.7.0
48
+ sentry-sdk==2.48.0
49
+ gitdb==4.0.12
50
+ httpcore==1.0.9
51
+ setuptools==80.9.0
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ anyio==4.12.0
54
+ transformers==5.0.0.dev0
55
+ pydantic==2.12.5
56
+ fsspec==2025.10.0
57
+ filelock==3.20.0
58
+ PyYAML==6.0.3
59
+ hf-xet==1.2.0
60
+ nvidia-cudnn-cu12==9.1.0.70
61
+ tqdm==4.67.1
62
+ MarkupSafe==2.1.5
63
+ attrs==25.4.0
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ peft==0.18.0
66
+ aiohappyeyeballs==2.6.1
67
+ networkx==3.4.2
68
+ nvidia-nvjitlink-cu12==12.9.86
69
+ certifi==2025.11.12
70
+ pyarrow==22.0.0
71
+ dill==0.4.0
72
+ protobuf==6.33.2
73
+ aiosignal==1.4.0
74
+ frozenlist==1.8.0
75
+ urllib3==2.6.2
76
+ propcache==0.4.1
77
+ tzdata==2025.3
78
+ pandas==2.3.3
79
+ annotated-types==0.7.0
80
+ shellingham==1.5.4
81
+ nvidia-nccl-cu12==2.21.5
82
+ multidict==6.7.0
83
+ nvidia-curand-cu12==10.3.2.106
84
+ trl==0.26.2
85
+ torch==2.5.1+cu121
86
+ h11==0.16.0
87
+ multiprocess==0.70.18
88
+ typer-slim==0.21.0
89
+ wheel==0.45.1
90
+ tomli==2.0.1
91
+ autocommand==2.2.2
92
+ jaraco.context==5.3.0
93
+ zipp==3.19.2
94
+ packaging==24.2
95
+ inflect==7.3.1
96
+ typing_extensions==4.12.2
97
+ platformdirs==4.2.2
98
+ jaraco.functools==4.0.1
99
+ jaraco.collections==5.1.0
100
+ jaraco.text==3.12.1
101
+ backports.tarfile==1.2.0
102
+ more-itertools==10.3.0
103
+ importlib_metadata==8.0.0
104
+ typeguard==4.3.0
dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.12.46+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.12",
4
+ "startedAt": "2025-12-26T15:26:29.019842Z",
5
+ "args": [
6
+ "--config",
7
+ "config_dpo.yaml"
8
+ ],
9
+ "program": "/workspace/trainer-kit/DPO/run_dpo.py",
10
+ "codePath": "run_dpo.py",
11
+ "codePathLocal": "run_dpo.py",
12
+ "email": "shaiksirajuddin9949@gmail.com",
13
+ "root": "runs/dpo_run_24b_v1",
14
+ "host": "a100-2gpu-shell-session-757d587799-mfdvv",
15
+ "executable": "/workspace/llm_finetuning_env/bin/python",
16
+ "cpu_count": 12,
17
+ "cpu_count_logical": 24,
18
+ "gpu": "NVIDIA A100-SXM4-80GB",
19
+ "gpu_count": 2,
20
+ "disk": {
21
+ "/": {
22
+ "total": "791251738624",
23
+ "used": "315579105280"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "359047892992"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A100-SXM4-80GB",
32
+ "memoryTotal": "85899345920",
33
+ "cudaCores": 6912,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
36
+ },
37
+ {
38
+ "name": "NVIDIA A100-SXM4-80GB",
39
+ "memoryTotal": "85899345920",
40
+ "cudaCores": 6912,
41
+ "architecture": "Ampere",
42
+ "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
43
+ }
44
+ ],
45
+ "cudaVersion": "13.0",
46
+ "writerId": "05zaxpswkd8t9j7nmlszf02dmdnkyst5"
47
+ }
dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":2},"_runtime":2}
dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:26:29.104421101Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpap_k1_i4/port-135896.txt","pid":135896,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-26T15:26:29.105138524Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":135896}
3
+ {"time":"2025-12-26T15:26:29.105108641Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-135896-135976-1788857957/socket","Net":"unix"}}
4
+ {"time":"2025-12-26T15:26:29.287192608Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-26T15:26:29.293378709Z","level":"INFO","msg":"handleInformInit: received","streamId":"ymohys1q","id":"1(@)"}
6
+ {"time":"2025-12-26T15:26:29.445681764Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ymohys1q","id":"1(@)"}
7
+ {"time":"2025-12-26T15:26:32.2673862Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-12-26T15:26:32.267537637Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-12-26T15:26:32.26754914Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2025-12-26T15:26:32.267646825Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-12-26T15:26:32.267756746Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-135896-135976-1788857957/socket","Net":"unix"}}
12
+ {"time":"2025-12-26T15:26:32.914746116Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-12-26T15:26:32.914789004Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-12-26T15:26:32.914805725Z","level":"INFO","msg":"server is closed"}
dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:26:29.293501638Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-26T15:26:29.445455498Z","level":"INFO","msg":"stream: created new stream","id":"ymohys1q"}
3
+ {"time":"2025-12-26T15:26:29.445544858Z","level":"INFO","msg":"handler: started","stream_id":"ymohys1q"}
4
+ {"time":"2025-12-26T15:26:29.445672321Z","level":"INFO","msg":"stream: started","id":"ymohys1q"}
5
+ {"time":"2025-12-26T15:26:29.445699121Z","level":"INFO","msg":"writer: started","stream_id":"ymohys1q"}
6
+ {"time":"2025-12-26T15:26:29.445775229Z","level":"INFO","msg":"sender: started","stream_id":"ymohys1q"}
7
+ {"time":"2025-12-26T15:26:32.267559653Z","level":"INFO","msg":"stream: closing","id":"ymohys1q"}
8
+ {"time":"2025-12-26T15:26:32.786868259Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-12-26T15:26:32.913988671Z","level":"INFO","msg":"handler: closed","stream_id":"ymohys1q"}
10
+ {"time":"2025-12-26T15:26:32.914088362Z","level":"INFO","msg":"sender: closed","stream_id":"ymohys1q"}
11
+ {"time":"2025-12-26T15:26:32.914111382Z","level":"INFO","msg":"stream: closed","id":"ymohys1q"}
dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_setup.py:_flush():80] Configure stats pid to 135896
3
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
5
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug.log
7
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/logs/debug-internal.log
8
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
11
+ 2025-12-26 15:26:29,021 INFO MainThread:135896 [wandb_init.py:init():889] starting backend
12
+ 2025-12-26 15:26:29,287 INFO MainThread:135896 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-26 15:26:29,291 INFO MainThread:135896 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-26 15:26:29,293 INFO MainThread:135896 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-26 15:26:29,294 INFO MainThread:135896 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-26 15:26:29,634 INFO MainThread:135896 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-26 15:26:29,742 INFO MainThread:135896 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-26 15:26:29,743 INFO MainThread:135896 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-26 15:26:29,743 INFO MainThread:135896 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-26 15:26:29,743 INFO MainThread:135896 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-26 15:26:29,748 INFO MainThread:135896 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-26 15:26:32,267 INFO wandb-AsyncioManager-main:135896 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2025-12-26 15:26:32,267 INFO wandb-AsyncioManager-main:135896 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
dpo_run_24b_v1/wandb/run-20251226_152629-ymohys1q/run-ymohys1q.wandb ADDED
Binary file (16.4 kB). View file
 
dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/config.yaml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.1
4
+ e:
5
+ 3md0aqv809sr0p87xotdtgert1x2be48:
6
+ args:
7
+ - --config
8
+ - config_dpo.yaml
9
+ codePath: run_dpo.py
10
+ codePathLocal: run_dpo.py
11
+ cpu_count: 12
12
+ cpu_count_logical: 24
13
+ cudaVersion: "13.0"
14
+ disk:
15
+ /:
16
+ total: "791251738624"
17
+ used: "316388069376"
18
+ email: shaiksirajuddin9949@gmail.com
19
+ executable: /workspace/llm_finetuning_env/bin/python
20
+ gpu: NVIDIA A100-SXM4-80GB
21
+ gpu_count: 2
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 6912
25
+ memoryTotal: "85899345920"
26
+ name: NVIDIA A100-SXM4-80GB
27
+ uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
28
+ - architecture: Ampere
29
+ cudaCores: 6912
30
+ memoryTotal: "85899345920"
31
+ name: NVIDIA A100-SXM4-80GB
32
+ uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
33
+ host: a100-2gpu-shell-session-757d587799-mfdvv
34
+ memory:
35
+ total: "359047892992"
36
+ os: Linux-6.12.46+-x86_64-with-glibc2.35
37
+ program: /workspace/trainer-kit/DPO/run_dpo.py
38
+ python: CPython 3.10.12
39
+ root: runs/dpo_run_24b_v1
40
+ startedAt: "2025-12-26T15:29:02.292056Z"
41
+ writerId: 3md0aqv809sr0p87xotdtgert1x2be48
42
+ m: []
43
+ python_version: 3.10.12
44
+ t:
45
+ "1":
46
+ - 1
47
+ - 11
48
+ - 41
49
+ - 49
50
+ - 51
51
+ - 71
52
+ - 84
53
+ - 98
54
+ "2":
55
+ - 1
56
+ - 11
57
+ - 41
58
+ - 49
59
+ - 51
60
+ - 71
61
+ - 84
62
+ - 98
63
+ "3":
64
+ - 15
65
+ - 16
66
+ "4": 3.10.12
67
+ "5": 0.23.1
68
+ "6": 5.0.0.dev0
69
+ "12": 0.23.1
70
+ "13": linux-x86_64
71
+ data:
72
+ value:
73
+ chosen_field: chosen
74
+ eval_jsonl: null
75
+ eval_split_ratio: 0.1
76
+ format_type: chatml
77
+ max_length: 2048
78
+ num_proc: 4
79
+ prompt_field: prompt
80
+ rejected_field: rejected
81
+ score_field: f1_score
82
+ shuffle: true
83
+ system_prompt: |
84
+ You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
85
+
86
+ ## Output Format
87
+
88
+ ##OUTPUT
89
+ Explain the data flow and why each component must change:
90
+ - Flow: [Input → Processing → Output with arrows]
91
+ - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
92
+ - Explain coupling between components
93
+
94
+ ##SELECT
95
+ modify::crates/path/to/file.rs::impl::ComponentName
96
+ add::crates/another/file.rs::function::AnotherComponent
97
+ <EOS>
98
+
99
+ ## Rules
100
+
101
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
102
+ 2. Use `::` for nested items: `status::StructName::Type::Name`
103
+ 3. Always explain "must change because" and "without this"
104
+ 3. Types of components: function, struct, enum, impl, trait
105
+ 4. If there is extra information (e.g., enum variants), include that too.
106
+ 5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
107
+ train_jsonl: dpo_pairs_generated.jsonl
108
+ dpo:
109
+ value:
110
+ beta: 0.1
111
+ label_smoothing: 0
112
+ loss_type: sigmoid
113
+ reference_free: false
114
+ use_reference_model: true
115
+ model:
116
+ value:
117
+ attn_implementation: null
118
+ base_local_dir: base_model
119
+ bnb_4bit_compute_dtype: bfloat16
120
+ bnb_4bit_quant_type: nf4
121
+ bnb_4bit_use_double_quant: false
122
+ device_map: auto
123
+ repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
124
+ revision: null
125
+ tokenizer_use_fast: true
126
+ torch_dtype: bfloat16
127
+ trust_remote_code: true
128
+ use_4bit: false
129
+ peft:
130
+ value:
131
+ bias: none
132
+ enabled: true
133
+ lora_alpha: 32
134
+ lora_dropout: 0.05
135
+ r: 16
136
+ target_modules: auto
137
+ run_dir:
138
+ value: runs/dpo_run_24b_v1
139
+ train:
140
+ value:
141
+ early_stopping:
142
+ enabled: true
143
+ metric: eval_loss
144
+ min_delta: 0.001
145
+ mode: min
146
+ patience: 5
147
+ eval_steps: 25
148
+ evaluation_strategy: steps
149
+ gradient_accumulation_steps: 8
150
+ gradient_checkpointing: true
151
+ learning_rate: "5e-5"
152
+ load_best_model_at_end: true
153
+ logging_steps: 2
154
+ lr_scheduler_type: cosine
155
+ max_grad_norm: 1
156
+ num_train_epochs: 3
157
+ optim: adamw_torch
158
+ per_device_eval_batch_size: 1
159
+ per_device_train_batch_size: 1
160
+ resume_from_checkpoint: auto
161
+ save_steps: 100
162
+ save_strategy: steps
163
+ save_total_limit: 10
164
+ warmup_ratio: 0.1
165
+ weight_decay: 0
dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/output.log ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Wandb initialized: project='dpo-training', name='auto-generated'
2
+ 2025-12-26 15:29:05,363 - INFO - Loading model config from ../../Models/Devstral-Small-2-24B-HS-CPT-SFT with trust_remote_code=True
3
+ Traceback (most recent call last):
4
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 560, in load_base_model_and_tokenizer
5
+ model = AutoModelForCausalLM.from_pretrained(
6
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
7
+ raise ValueError(
8
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
9
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
10
+
11
+ During handling of the above exception, another exception occurred:
12
+
13
+ Traceback (most recent call last):
14
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 960, in <module>
15
+ main()
16
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 751, in main
17
+ model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
18
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 574, in load_base_model_and_tokenizer
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
21
+ raise ValueError(
22
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
23
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
24
+ Traceback (most recent call last):
25
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 560, in load_base_model_and_tokenizer
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
28
+ raise ValueError(
29
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
30
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
31
+
32
+ During handling of the above exception, another exception occurred:
33
+
34
+ Traceback (most recent call last):
35
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 960, in <module>
36
+ main()
37
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 751, in main
38
+ model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
39
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 574, in load_base_model_and_tokenizer
40
+ model = AutoModelForCausalLM.from_pretrained(
41
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
42
+ raise ValueError(
43
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
44
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exceptiongroup==1.3.1
2
+ wheel==0.45.1
3
+ python-dateutil==2.9.0.post0
4
+ nvidia-ml-py==13.580.82
5
+ huggingface_hub==1.2.3
6
+ idna==3.11
7
+ click==8.3.1
8
+ numpy==2.2.6
9
+ httpx==0.28.1
10
+ tokenizers==0.22.1
11
+ sympy==1.13.1
12
+ yarl==1.22.0
13
+ async-timeout==5.0.1
14
+ datasets==4.4.2
15
+ platformdirs==4.5.1
16
+ nvidia-cuda-cupti-cu12==12.1.105
17
+ nvidia-nvtx-cu12==12.1.105
18
+ smmap==5.0.2
19
+ accelerate==1.12.0
20
+ requests==2.32.5
21
+ aiohttp==3.13.2
22
+ bitsandbytes==0.49.0
23
+ nvidia-cublas-cu12==12.1.3.1
24
+ mpmath==1.3.0
25
+ torchaudio==2.5.1+cu121
26
+ nvidia-cuda-runtime-cu12==12.1.105
27
+ typing-inspection==0.4.2
28
+ GitPython==3.1.45
29
+ xxhash==3.6.0
30
+ nvidia-cusolver-cu12==11.4.5.107
31
+ pydantic_core==2.41.5
32
+ six==1.17.0
33
+ torchvision==0.20.1+cu121
34
+ typing_extensions==4.15.0
35
+ triton==3.1.0
36
+ charset-normalizer==3.4.4
37
+ nvitop==1.6.1
38
+ wandb==0.23.1
39
+ regex==2025.11.3
40
+ pip==25.3
41
+ nvidia-cusparse-cu12==12.1.0.106
42
+ pytz==2025.2
43
+ Jinja2==3.1.6
44
+ psutil==7.2.0
45
+ pillow==12.0.0
46
+ packaging==25.0
47
+ safetensors==0.7.0
48
+ sentry-sdk==2.48.0
49
+ gitdb==4.0.12
50
+ httpcore==1.0.9
51
+ setuptools==80.9.0
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ anyio==4.12.0
54
+ transformers==5.0.0.dev0
55
+ pydantic==2.12.5
56
+ fsspec==2025.10.0
57
+ filelock==3.20.0
58
+ PyYAML==6.0.3
59
+ hf-xet==1.2.0
60
+ nvidia-cudnn-cu12==9.1.0.70
61
+ tqdm==4.67.1
62
+ MarkupSafe==2.1.5
63
+ attrs==25.4.0
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ peft==0.18.0
66
+ aiohappyeyeballs==2.6.1
67
+ networkx==3.4.2
68
+ nvidia-nvjitlink-cu12==12.9.86
69
+ certifi==2025.11.12
70
+ pyarrow==22.0.0
71
+ dill==0.4.0
72
+ protobuf==6.33.2
73
+ aiosignal==1.4.0
74
+ frozenlist==1.8.0
75
+ urllib3==2.6.2
76
+ propcache==0.4.1
77
+ tzdata==2025.3
78
+ pandas==2.3.3
79
+ annotated-types==0.7.0
80
+ shellingham==1.5.4
81
+ nvidia-nccl-cu12==2.21.5
82
+ multidict==6.7.0
83
+ nvidia-curand-cu12==10.3.2.106
84
+ trl==0.26.2
85
+ torch==2.5.1+cu121
86
+ h11==0.16.0
87
+ multiprocess==0.70.18
88
+ typer-slim==0.21.0
89
+ wheel==0.45.1
90
+ tomli==2.0.1
91
+ autocommand==2.2.2
92
+ jaraco.context==5.3.0
93
+ zipp==3.19.2
94
+ packaging==24.2
95
+ inflect==7.3.1
96
+ typing_extensions==4.12.2
97
+ platformdirs==4.2.2
98
+ jaraco.functools==4.0.1
99
+ jaraco.collections==5.1.0
100
+ jaraco.text==3.12.1
101
+ backports.tarfile==1.2.0
102
+ more-itertools==10.3.0
103
+ importlib_metadata==8.0.0
104
+ typeguard==4.3.0
dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.12.46+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.12",
4
+ "startedAt": "2025-12-26T15:29:02.292056Z",
5
+ "args": [
6
+ "--config",
7
+ "config_dpo.yaml"
8
+ ],
9
+ "program": "/workspace/trainer-kit/DPO/run_dpo.py",
10
+ "codePath": "run_dpo.py",
11
+ "codePathLocal": "run_dpo.py",
12
+ "email": "shaiksirajuddin9949@gmail.com",
13
+ "root": "runs/dpo_run_24b_v1",
14
+ "host": "a100-2gpu-shell-session-757d587799-mfdvv",
15
+ "executable": "/workspace/llm_finetuning_env/bin/python",
16
+ "cpu_count": 12,
17
+ "cpu_count_logical": 24,
18
+ "gpu": "NVIDIA A100-SXM4-80GB",
19
+ "gpu_count": 2,
20
+ "disk": {
21
+ "/": {
22
+ "total": "791251738624",
23
+ "used": "316388069376"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "359047892992"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A100-SXM4-80GB",
32
+ "memoryTotal": "85899345920",
33
+ "cudaCores": 6912,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
36
+ },
37
+ {
38
+ "name": "NVIDIA A100-SXM4-80GB",
39
+ "memoryTotal": "85899345920",
40
+ "cudaCores": 6912,
41
+ "architecture": "Ampere",
42
+ "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
43
+ }
44
+ ],
45
+ "cudaVersion": "13.0",
46
+ "writerId": "3md0aqv809sr0p87xotdtgert1x2be48"
47
+ }
dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":2},"_runtime":2}
dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:29:02.378098877Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmph5oh7hv8/port-136944.txt","pid":136944,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-26T15:29:02.378909563Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":136944}
3
+ {"time":"2025-12-26T15:29:02.378811163Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-136944-137024-1203613879/socket","Net":"unix"}}
4
+ {"time":"2025-12-26T15:29:02.55980819Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-26T15:29:02.56654094Z","level":"INFO","msg":"handleInformInit: received","streamId":"spwc3b4a","id":"1(@)"}
6
+ {"time":"2025-12-26T15:29:02.726927644Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"spwc3b4a","id":"1(@)"}
7
+ {"time":"2025-12-26T15:29:05.56156698Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-12-26T15:29:05.56167497Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-12-26T15:29:05.561719256Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-12-26T15:29:05.561768332Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-12-26T15:29:05.561970735Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-136944-137024-1203613879/socket","Net":"unix"}}
12
+ {"time":"2025-12-26T15:29:05.964901945Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-12-26T15:29:05.964947772Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-12-26T15:29:05.964970982Z","level":"INFO","msg":"server is closed"}
dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:29:02.566705931Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-26T15:29:02.726675423Z","level":"INFO","msg":"stream: created new stream","id":"spwc3b4a"}
3
+ {"time":"2025-12-26T15:29:02.726786973Z","level":"INFO","msg":"handler: started","stream_id":"spwc3b4a"}
4
+ {"time":"2025-12-26T15:29:02.726918023Z","level":"INFO","msg":"stream: started","id":"spwc3b4a"}
5
+ {"time":"2025-12-26T15:29:02.726947115Z","level":"INFO","msg":"writer: started","stream_id":"spwc3b4a"}
6
+ {"time":"2025-12-26T15:29:02.726982198Z","level":"INFO","msg":"sender: started","stream_id":"spwc3b4a"}
7
+ {"time":"2025-12-26T15:29:05.56168481Z","level":"INFO","msg":"stream: closing","id":"spwc3b4a"}
8
+ {"time":"2025-12-26T15:29:05.886288482Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-12-26T15:29:05.963872485Z","level":"INFO","msg":"handler: closed","stream_id":"spwc3b4a"}
10
+ {"time":"2025-12-26T15:29:05.96397042Z","level":"INFO","msg":"sender: closed","stream_id":"spwc3b4a"}
11
+ {"time":"2025-12-26T15:29:05.963978351Z","level":"INFO","msg":"stream: closed","id":"spwc3b4a"}
dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-26 15:29:02,293 INFO MainThread:136944 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-26 15:29:02,293 INFO MainThread:136944 [wandb_setup.py:_flush():80] Configure stats pid to 136944
3
+ 2025-12-26 15:29:02,293 INFO MainThread:136944 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-26 15:29:02,293 INFO MainThread:136944 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
5
+ 2025-12-26 15:29:02,293 INFO MainThread:136944 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-26 15:29:02,293 INFO MainThread:136944 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug.log
7
+ 2025-12-26 15:29:02,294 INFO MainThread:136944 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/logs/debug-internal.log
8
+ 2025-12-26 15:29:02,294 INFO MainThread:136944 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-26 15:29:02,294 INFO MainThread:136944 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
11
+ 2025-12-26 15:29:02,294 INFO MainThread:136944 [wandb_init.py:init():889] starting backend
12
+ 2025-12-26 15:29:02,560 INFO MainThread:136944 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-26 15:29:02,564 INFO MainThread:136944 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-26 15:29:02,566 INFO MainThread:136944 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-26 15:29:02,567 INFO MainThread:136944 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-26 15:29:02,898 INFO MainThread:136944 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-26 15:29:03,007 INFO MainThread:136944 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-26 15:29:03,007 INFO MainThread:136944 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-26 15:29:03,007 INFO MainThread:136944 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-26 15:29:03,007 INFO MainThread:136944 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-26 15:29:03,012 INFO MainThread:136944 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-26 15:29:05,561 INFO wandb-AsyncioManager-main:136944 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2025-12-26 15:29:05,561 INFO wandb-AsyncioManager-main:136944 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
dpo_run_24b_v1/wandb/run-20251226_152902-spwc3b4a/run-spwc3b4a.wandb ADDED
Binary file (16.5 kB). View file
 
dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/config.yaml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.1
4
+ e:
5
+ b8zcal9au539h6npajcoix9vzmhwciop:
6
+ args:
7
+ - --config
8
+ - config_dpo.yaml
9
+ codePath: run_dpo.py
10
+ codePathLocal: run_dpo.py
11
+ cpu_count: 12
12
+ cpu_count_logical: 24
13
+ cudaVersion: "13.0"
14
+ disk:
15
+ /:
16
+ total: "791251738624"
17
+ used: "317141377024"
18
+ email: shaiksirajuddin9949@gmail.com
19
+ executable: /workspace/llm_finetuning_env/bin/python
20
+ gpu: NVIDIA A100-SXM4-80GB
21
+ gpu_count: 2
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 6912
25
+ memoryTotal: "85899345920"
26
+ name: NVIDIA A100-SXM4-80GB
27
+ uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
28
+ - architecture: Ampere
29
+ cudaCores: 6912
30
+ memoryTotal: "85899345920"
31
+ name: NVIDIA A100-SXM4-80GB
32
+ uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
33
+ host: a100-2gpu-shell-session-757d587799-mfdvv
34
+ memory:
35
+ total: "359047892992"
36
+ os: Linux-6.12.46+-x86_64-with-glibc2.35
37
+ program: /workspace/trainer-kit/DPO/run_dpo.py
38
+ python: CPython 3.10.12
39
+ root: runs/dpo_run_24b_v1
40
+ startedAt: "2025-12-26T15:30:52.545366Z"
41
+ writerId: b8zcal9au539h6npajcoix9vzmhwciop
42
+ m: []
43
+ python_version: 3.10.12
44
+ t:
45
+ "1":
46
+ - 1
47
+ - 11
48
+ - 41
49
+ - 49
50
+ - 51
51
+ - 71
52
+ - 84
53
+ - 98
54
+ "2":
55
+ - 1
56
+ - 11
57
+ - 41
58
+ - 49
59
+ - 51
60
+ - 71
61
+ - 84
62
+ - 98
63
+ "3":
64
+ - 15
65
+ - 16
66
+ "4": 3.10.12
67
+ "5": 0.23.1
68
+ "6": 5.0.0.dev0
69
+ "12": 0.23.1
70
+ "13": linux-x86_64
71
+ data:
72
+ value:
73
+ chosen_field: chosen
74
+ eval_jsonl: null
75
+ eval_split_ratio: 0.1
76
+ format_type: chatml
77
+ max_length: 2048
78
+ num_proc: 4
79
+ prompt_field: prompt
80
+ rejected_field: rejected
81
+ score_field: f1_score
82
+ shuffle: true
83
+ system_prompt: |
84
+ You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
85
+
86
+ ## Output Format
87
+
88
+ ##OUTPUT
89
+ Explain the data flow and why each component must change:
90
+ - Flow: [Input → Processing → Output with arrows]
91
+ - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
92
+ - Explain coupling between components
93
+
94
+ ##SELECT
95
+ modify::crates/path/to/file.rs::impl::ComponentName
96
+ add::crates/another/file.rs::function::AnotherComponent
97
+ <EOS>
98
+
99
+ ## Rules
100
+
101
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
102
+ 2. Use `::` for nested items: `status::StructName::Type::Name`
103
+ 3. Always explain "must change because" and "without this"
104
+ 3. Types of components: function, struct, enum, impl, trait
105
+ 4. If there is extra information (e.g., enum variants), include that too.
106
+ 5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
107
+ train_jsonl: dpo_pairs_generated.jsonl
108
+ dpo:
109
+ value:
110
+ beta: 0.1
111
+ label_smoothing: 0
112
+ loss_type: sigmoid
113
+ reference_free: false
114
+ use_reference_model: true
115
+ model:
116
+ value:
117
+ attn_implementation: null
118
+ base_local_dir: base_model
119
+ bnb_4bit_compute_dtype: bfloat16
120
+ bnb_4bit_quant_type: nf4
121
+ bnb_4bit_use_double_quant: false
122
+ device_map: auto
123
+ repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
124
+ revision: null
125
+ tokenizer_use_fast: true
126
+ torch_dtype: bfloat16
127
+ trust_remote_code: true
128
+ use_4bit: false
129
+ peft:
130
+ value:
131
+ bias: none
132
+ enabled: true
133
+ lora_alpha: 32
134
+ lora_dropout: 0.05
135
+ r: 16
136
+ target_modules: auto
137
+ run_dir:
138
+ value: runs/dpo_run_24b_v1
139
+ train:
140
+ value:
141
+ early_stopping:
142
+ enabled: true
143
+ metric: eval_loss
144
+ min_delta: 0.001
145
+ mode: min
146
+ patience: 5
147
+ eval_steps: 25
148
+ evaluation_strategy: steps
149
+ gradient_accumulation_steps: 8
150
+ gradient_checkpointing: true
151
+ learning_rate: "5e-5"
152
+ load_best_model_at_end: true
153
+ logging_steps: 2
154
+ lr_scheduler_type: cosine
155
+ max_grad_norm: 1
156
+ num_train_epochs: 3
157
+ optim: adamw_torch
158
+ per_device_eval_batch_size: 1
159
+ per_device_train_batch_size: 1
160
+ resume_from_checkpoint: auto
161
+ save_steps: 100
162
+ save_strategy: steps
163
+ save_total_limit: 10
164
+ warmup_ratio: 0.1
165
+ weight_decay: 0
dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/output.log ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Wandb initialized: project='dpo-training', name='auto-generated'
2
+ 2025-12-26 15:30:55,534 - INFO - Loading model config from ../../Models/Devstral-Small-2-24B-HS-CPT-SFT with trust_remote_code=True
3
+ Traceback (most recent call last):
4
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 560, in load_base_model_and_tokenizer
5
+ model = AutoModelForCausalLM.from_pretrained(
6
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
7
+ raise ValueError(
8
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
9
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
10
+
11
+ During handling of the above exception, another exception occurred:
12
+
13
+ Traceback (most recent call last):
14
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 960, in <module>
15
+ main()
16
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 751, in main
17
+ model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
18
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 574, in load_base_model_and_tokenizer
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
21
+ raise ValueError(
22
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
23
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
24
+ Traceback (most recent call last):
25
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 560, in load_base_model_and_tokenizer
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
28
+ raise ValueError(
29
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
30
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
31
+
32
+ During handling of the above exception, another exception occurred:
33
+
34
+ Traceback (most recent call last):
35
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 960, in <module>
36
+ main()
37
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 751, in main
38
+ model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
39
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 574, in load_base_model_and_tokenizer
40
+ model = AutoModelForCausalLM.from_pretrained(
41
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
42
+ raise ValueError(
43
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
44
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exceptiongroup==1.3.1
2
+ wheel==0.45.1
3
+ python-dateutil==2.9.0.post0
4
+ nvidia-ml-py==13.580.82
5
+ huggingface_hub==1.2.3
6
+ idna==3.11
7
+ click==8.3.1
8
+ numpy==2.2.6
9
+ httpx==0.28.1
10
+ tokenizers==0.22.1
11
+ sympy==1.13.1
12
+ yarl==1.22.0
13
+ async-timeout==5.0.1
14
+ datasets==4.4.2
15
+ platformdirs==4.5.1
16
+ nvidia-cuda-cupti-cu12==12.1.105
17
+ nvidia-nvtx-cu12==12.1.105
18
+ smmap==5.0.2
19
+ accelerate==1.12.0
20
+ requests==2.32.5
21
+ aiohttp==3.13.2
22
+ bitsandbytes==0.49.0
23
+ nvidia-cublas-cu12==12.1.3.1
24
+ mpmath==1.3.0
25
+ torchaudio==2.5.1+cu121
26
+ nvidia-cuda-runtime-cu12==12.1.105
27
+ typing-inspection==0.4.2
28
+ GitPython==3.1.45
29
+ xxhash==3.6.0
30
+ nvidia-cusolver-cu12==11.4.5.107
31
+ pydantic_core==2.41.5
32
+ six==1.17.0
33
+ torchvision==0.20.1+cu121
34
+ typing_extensions==4.15.0
35
+ triton==3.1.0
36
+ charset-normalizer==3.4.4
37
+ nvitop==1.6.1
38
+ wandb==0.23.1
39
+ regex==2025.11.3
40
+ pip==25.3
41
+ nvidia-cusparse-cu12==12.1.0.106
42
+ pytz==2025.2
43
+ Jinja2==3.1.6
44
+ psutil==7.2.0
45
+ pillow==12.0.0
46
+ packaging==25.0
47
+ safetensors==0.7.0
48
+ sentry-sdk==2.48.0
49
+ gitdb==4.0.12
50
+ httpcore==1.0.9
51
+ setuptools==80.9.0
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ anyio==4.12.0
54
+ transformers==5.0.0.dev0
55
+ pydantic==2.12.5
56
+ fsspec==2025.10.0
57
+ filelock==3.20.0
58
+ PyYAML==6.0.3
59
+ hf-xet==1.2.0
60
+ nvidia-cudnn-cu12==9.1.0.70
61
+ tqdm==4.67.1
62
+ MarkupSafe==2.1.5
63
+ attrs==25.4.0
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ peft==0.18.0
66
+ aiohappyeyeballs==2.6.1
67
+ networkx==3.4.2
68
+ nvidia-nvjitlink-cu12==12.9.86
69
+ certifi==2025.11.12
70
+ pyarrow==22.0.0
71
+ dill==0.4.0
72
+ protobuf==6.33.2
73
+ aiosignal==1.4.0
74
+ frozenlist==1.8.0
75
+ urllib3==2.6.2
76
+ propcache==0.4.1
77
+ tzdata==2025.3
78
+ pandas==2.3.3
79
+ annotated-types==0.7.0
80
+ shellingham==1.5.4
81
+ nvidia-nccl-cu12==2.21.5
82
+ multidict==6.7.0
83
+ nvidia-curand-cu12==10.3.2.106
84
+ trl==0.26.2
85
+ torch==2.5.1+cu121
86
+ h11==0.16.0
87
+ multiprocess==0.70.18
88
+ typer-slim==0.21.0
89
+ wheel==0.45.1
90
+ tomli==2.0.1
91
+ autocommand==2.2.2
92
+ jaraco.context==5.3.0
93
+ zipp==3.19.2
94
+ packaging==24.2
95
+ inflect==7.3.1
96
+ typing_extensions==4.12.2
97
+ platformdirs==4.2.2
98
+ jaraco.functools==4.0.1
99
+ jaraco.collections==5.1.0
100
+ jaraco.text==3.12.1
101
+ backports.tarfile==1.2.0
102
+ more-itertools==10.3.0
103
+ importlib_metadata==8.0.0
104
+ typeguard==4.3.0
dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.12.46+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.12",
4
+ "startedAt": "2025-12-26T15:30:52.545366Z",
5
+ "args": [
6
+ "--config",
7
+ "config_dpo.yaml"
8
+ ],
9
+ "program": "/workspace/trainer-kit/DPO/run_dpo.py",
10
+ "codePath": "run_dpo.py",
11
+ "codePathLocal": "run_dpo.py",
12
+ "email": "shaiksirajuddin9949@gmail.com",
13
+ "root": "runs/dpo_run_24b_v1",
14
+ "host": "a100-2gpu-shell-session-757d587799-mfdvv",
15
+ "executable": "/workspace/llm_finetuning_env/bin/python",
16
+ "cpu_count": 12,
17
+ "cpu_count_logical": 24,
18
+ "gpu": "NVIDIA A100-SXM4-80GB",
19
+ "gpu_count": 2,
20
+ "disk": {
21
+ "/": {
22
+ "total": "791251738624",
23
+ "used": "317141377024"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "359047892992"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A100-SXM4-80GB",
32
+ "memoryTotal": "85899345920",
33
+ "cudaCores": 6912,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
36
+ },
37
+ {
38
+ "name": "NVIDIA A100-SXM4-80GB",
39
+ "memoryTotal": "85899345920",
40
+ "cudaCores": 6912,
41
+ "architecture": "Ampere",
42
+ "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
43
+ }
44
+ ],
45
+ "cudaVersion": "13.0",
46
+ "writerId": "b8zcal9au539h6npajcoix9vzmhwciop"
47
+ }
dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":2},"_runtime":2}
dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:30:52.629789303Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvr96ff7k/port-138039.txt","pid":138039,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-26T15:30:52.630401044Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":138039}
3
+ {"time":"2025-12-26T15:30:52.630403718Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-138039-138113-2700074418/socket","Net":"unix"}}
4
+ {"time":"2025-12-26T15:30:52.815186583Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-26T15:30:52.821468691Z","level":"INFO","msg":"handleInformInit: received","streamId":"uo02exvi","id":"1(@)"}
6
+ {"time":"2025-12-26T15:30:52.972985125Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"uo02exvi","id":"1(@)"}
7
+ {"time":"2025-12-26T15:30:55.736694152Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-12-26T15:30:55.736777163Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-12-26T15:30:55.736810022Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-12-26T15:30:55.736839825Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-12-26T15:30:55.736962241Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-138039-138113-2700074418/socket","Net":"unix"}}
12
+ {"time":"2025-12-26T15:30:56.144618955Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-12-26T15:30:56.144657804Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-12-26T15:30:56.144687588Z","level":"INFO","msg":"server is closed"}
dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:30:52.821600318Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-26T15:30:52.972768524Z","level":"INFO","msg":"stream: created new stream","id":"uo02exvi"}
3
+ {"time":"2025-12-26T15:30:52.972861145Z","level":"INFO","msg":"handler: started","stream_id":"uo02exvi"}
4
+ {"time":"2025-12-26T15:30:52.972976097Z","level":"INFO","msg":"stream: started","id":"uo02exvi"}
5
+ {"time":"2025-12-26T15:30:52.973005528Z","level":"INFO","msg":"writer: started","stream_id":"uo02exvi"}
6
+ {"time":"2025-12-26T15:30:52.973011764Z","level":"INFO","msg":"sender: started","stream_id":"uo02exvi"}
7
+ {"time":"2025-12-26T15:30:55.736754973Z","level":"INFO","msg":"stream: closing","id":"uo02exvi"}
8
+ {"time":"2025-12-26T15:30:56.052989718Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-12-26T15:30:56.143693278Z","level":"INFO","msg":"handler: closed","stream_id":"uo02exvi"}
10
+ {"time":"2025-12-26T15:30:56.143802165Z","level":"INFO","msg":"sender: closed","stream_id":"uo02exvi"}
11
+ {"time":"2025-12-26T15:30:56.143823041Z","level":"INFO","msg":"stream: closed","id":"uo02exvi"}
dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-26 15:30:52,546 INFO MainThread:138039 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-26 15:30:52,546 INFO MainThread:138039 [wandb_setup.py:_flush():80] Configure stats pid to 138039
3
+ 2025-12-26 15:30:52,547 INFO MainThread:138039 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-26 15:30:52,547 INFO MainThread:138039 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
5
+ 2025-12-26 15:30:52,547 INFO MainThread:138039 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-26 15:30:52,547 INFO MainThread:138039 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug.log
7
+ 2025-12-26 15:30:52,547 INFO MainThread:138039 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/logs/debug-internal.log
8
+ 2025-12-26 15:30:52,547 INFO MainThread:138039 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-26 15:30:52,547 INFO MainThread:138039 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
11
+ 2025-12-26 15:30:52,547 INFO MainThread:138039 [wandb_init.py:init():889] starting backend
12
+ 2025-12-26 15:30:52,815 INFO MainThread:138039 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-26 15:30:52,819 INFO MainThread:138039 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-26 15:30:52,821 INFO MainThread:138039 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-26 15:30:52,822 INFO MainThread:138039 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-26 15:30:53,141 INFO MainThread:138039 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-26 15:30:53,249 INFO MainThread:138039 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-26 15:30:53,249 INFO MainThread:138039 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-26 15:30:53,250 INFO MainThread:138039 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-26 15:30:53,250 INFO MainThread:138039 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-26 15:30:53,255 INFO MainThread:138039 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-26 15:30:55,736 INFO wandb-AsyncioManager-main:138039 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2025-12-26 15:30:55,736 INFO wandb-AsyncioManager-main:138039 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
dpo_run_24b_v1/wandb/run-20251226_153052-uo02exvi/run-uo02exvi.wandb ADDED
Binary file (16.5 kB). View file
 
dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/config.yaml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.1
4
+ e:
5
+ 3kcue4vill6aqei124wdyde6gjjhpcn4:
6
+ args:
7
+ - --config
8
+ - config_dpo.yaml
9
+ codePath: run_dpo.py
10
+ codePathLocal: run_dpo.py
11
+ cpu_count: 12
12
+ cpu_count_logical: 24
13
+ cudaVersion: "13.0"
14
+ disk:
15
+ /:
16
+ total: "791251738624"
17
+ used: "317655887872"
18
+ email: shaiksirajuddin9949@gmail.com
19
+ executable: /workspace/llm_finetuning_env/bin/python
20
+ gpu: NVIDIA A100-SXM4-80GB
21
+ gpu_count: 2
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 6912
25
+ memoryTotal: "85899345920"
26
+ name: NVIDIA A100-SXM4-80GB
27
+ uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
28
+ - architecture: Ampere
29
+ cudaCores: 6912
30
+ memoryTotal: "85899345920"
31
+ name: NVIDIA A100-SXM4-80GB
32
+ uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
33
+ host: a100-2gpu-shell-session-757d587799-mfdvv
34
+ memory:
35
+ total: "359047892992"
36
+ os: Linux-6.12.46+-x86_64-with-glibc2.35
37
+ program: /workspace/trainer-kit/DPO/run_dpo.py
38
+ python: CPython 3.10.12
39
+ root: runs/dpo_run_24b_v1
40
+ startedAt: "2025-12-26T15:31:52.735494Z"
41
+ writerId: 3kcue4vill6aqei124wdyde6gjjhpcn4
42
+ m: []
43
+ python_version: 3.10.12
44
+ t:
45
+ "1":
46
+ - 1
47
+ - 11
48
+ - 41
49
+ - 49
50
+ - 51
51
+ - 71
52
+ - 84
53
+ - 98
54
+ "2":
55
+ - 1
56
+ - 11
57
+ - 41
58
+ - 49
59
+ - 51
60
+ - 71
61
+ - 84
62
+ - 98
63
+ "3":
64
+ - 15
65
+ - 16
66
+ "4": 3.10.12
67
+ "5": 0.23.1
68
+ "6": 5.0.0.dev0
69
+ "12": 0.23.1
70
+ "13": linux-x86_64
71
+ data:
72
+ value:
73
+ chosen_field: chosen
74
+ eval_jsonl: null
75
+ eval_split_ratio: 0.1
76
+ format_type: chatml
77
+ max_length: 2048
78
+ num_proc: 4
79
+ prompt_field: prompt
80
+ rejected_field: rejected
81
+ score_field: f1_score
82
+ shuffle: true
83
+ system_prompt: |
84
+ You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
85
+
86
+ ## Output Format
87
+
88
+ ##OUTPUT
89
+ Explain the data flow and why each component must change:
90
+ - Flow: [Input → Processing → Output with arrows]
91
+ - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
92
+ - Explain coupling between components
93
+
94
+ ##SELECT
95
+ modify::crates/path/to/file.rs::impl::ComponentName
96
+ add::crates/another/file.rs::function::AnotherComponent
97
+ <EOS>
98
+
99
+ ## Rules
100
+
101
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
102
+ 2. Use `::` for nested items: `status::StructName::Type::Name`
103
+ 3. Always explain "must change because" and "without this"
104
+ 3. Types of components: function, struct, enum, impl, trait
105
+ 4. If there is extra information (e.g., enum variants), include that too.
106
+ 5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
107
+ train_jsonl: dpo_pairs_generated.jsonl
108
+ dpo:
109
+ value:
110
+ beta: 0.1
111
+ label_smoothing: 0
112
+ loss_type: sigmoid
113
+ reference_free: false
114
+ use_reference_model: true
115
+ model:
116
+ value:
117
+ attn_implementation: null
118
+ base_local_dir: base_model
119
+ bnb_4bit_compute_dtype: bfloat16
120
+ bnb_4bit_quant_type: nf4
121
+ bnb_4bit_use_double_quant: false
122
+ device_map: auto
123
+ repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
124
+ revision: null
125
+ tokenizer_use_fast: true
126
+ torch_dtype: bfloat16
127
+ trust_remote_code: true
128
+ use_4bit: false
129
+ peft:
130
+ value:
131
+ bias: none
132
+ enabled: true
133
+ lora_alpha: 32
134
+ lora_dropout: 0.05
135
+ r: 16
136
+ target_modules: auto
137
+ run_dir:
138
+ value: runs/dpo_run_24b_v1
139
+ train:
140
+ value:
141
+ early_stopping:
142
+ enabled: true
143
+ metric: eval_loss
144
+ min_delta: 0.001
145
+ mode: min
146
+ patience: 5
147
+ eval_steps: 25
148
+ evaluation_strategy: steps
149
+ gradient_accumulation_steps: 8
150
+ gradient_checkpointing: true
151
+ learning_rate: "5e-5"
152
+ load_best_model_at_end: true
153
+ logging_steps: 2
154
+ lr_scheduler_type: cosine
155
+ max_grad_norm: 1
156
+ num_train_epochs: 3
157
+ optim: adamw_torch
158
+ per_device_eval_batch_size: 1
159
+ per_device_train_batch_size: 1
160
+ resume_from_checkpoint: auto
161
+ save_steps: 100
162
+ save_strategy: steps
163
+ save_total_limit: 10
164
+ warmup_ratio: 0.1
165
+ weight_decay: 0
dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/output.log ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Wandb initialized: project='dpo-training', name='auto-generated'
2
+ 2025-12-26 15:31:55,903 - INFO - Loading model from ../../Models/Devstral-Small-2-24B-HS-CPT-SFT with trust_remote_code=True
3
+ `torch_dtype` is deprecated! Use `dtype` instead!
4
+ Traceback (most recent call last):
5
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 559, in load_base_model_and_tokenizer
6
+ model = AutoModelForCausalLM.from_pretrained(
7
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
8
+ raise ValueError(
9
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
10
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
11
+
12
+ During handling of the above exception, another exception occurred:
13
+
14
+ Traceback (most recent call last):
15
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 957, in <module>
16
+ main()
17
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 748, in main
18
+ model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
19
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 572, in load_base_model_and_tokenizer
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
22
+ raise ValueError(
23
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
24
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
25
+ Traceback (most recent call last):
26
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 559, in load_base_model_and_tokenizer
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
29
+ raise ValueError(
30
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
31
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
32
+
33
+ During handling of the above exception, another exception occurred:
34
+
35
+ Traceback (most recent call last):
36
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 957, in <module>
37
+ main()
38
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 748, in main
39
+ model, tokenizer = load_base_model_and_tokenizer(cfg, base_dir)
40
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 572, in load_base_model_and_tokenizer
41
+ model = AutoModelForCausalLM.from_pretrained(
42
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 376, in from_pretrained
43
+ raise ValueError(
44
+ ValueError: Unrecognized configuration class <class 'transformers.models.mistral3.configuration_mistral3.Mistral3Config'> for this kind of AutoModel: AutoModelForCausalLM.
45
+ Model type should be one of AfmoeConfig, ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, CwmConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, Jais2Config, JambaConfig, JetMoeConfig, Lfm2Config, Lfm2MoeConfig, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, Ministral3Config, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NanoChatConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, StableLmConfig, Starcoder2Config, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.
dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exceptiongroup==1.3.1
2
+ wheel==0.45.1
3
+ python-dateutil==2.9.0.post0
4
+ nvidia-ml-py==13.580.82
5
+ huggingface_hub==1.2.3
6
+ idna==3.11
7
+ click==8.3.1
8
+ numpy==2.2.6
9
+ httpx==0.28.1
10
+ tokenizers==0.22.1
11
+ sympy==1.13.1
12
+ yarl==1.22.0
13
+ async-timeout==5.0.1
14
+ datasets==4.4.2
15
+ platformdirs==4.5.1
16
+ nvidia-cuda-cupti-cu12==12.1.105
17
+ nvidia-nvtx-cu12==12.1.105
18
+ smmap==5.0.2
19
+ accelerate==1.12.0
20
+ requests==2.32.5
21
+ aiohttp==3.13.2
22
+ bitsandbytes==0.49.0
23
+ nvidia-cublas-cu12==12.1.3.1
24
+ mpmath==1.3.0
25
+ torchaudio==2.5.1+cu121
26
+ nvidia-cuda-runtime-cu12==12.1.105
27
+ typing-inspection==0.4.2
28
+ GitPython==3.1.45
29
+ xxhash==3.6.0
30
+ nvidia-cusolver-cu12==11.4.5.107
31
+ pydantic_core==2.41.5
32
+ six==1.17.0
33
+ torchvision==0.20.1+cu121
34
+ typing_extensions==4.15.0
35
+ triton==3.1.0
36
+ charset-normalizer==3.4.4
37
+ nvitop==1.6.1
38
+ wandb==0.23.1
39
+ regex==2025.11.3
40
+ pip==25.3
41
+ nvidia-cusparse-cu12==12.1.0.106
42
+ pytz==2025.2
43
+ Jinja2==3.1.6
44
+ psutil==7.2.0
45
+ pillow==12.0.0
46
+ packaging==25.0
47
+ safetensors==0.7.0
48
+ sentry-sdk==2.48.0
49
+ gitdb==4.0.12
50
+ httpcore==1.0.9
51
+ setuptools==80.9.0
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ anyio==4.12.0
54
+ transformers==5.0.0.dev0
55
+ pydantic==2.12.5
56
+ fsspec==2025.10.0
57
+ filelock==3.20.0
58
+ PyYAML==6.0.3
59
+ hf-xet==1.2.0
60
+ nvidia-cudnn-cu12==9.1.0.70
61
+ tqdm==4.67.1
62
+ MarkupSafe==2.1.5
63
+ attrs==25.4.0
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ peft==0.18.0
66
+ aiohappyeyeballs==2.6.1
67
+ networkx==3.4.2
68
+ nvidia-nvjitlink-cu12==12.9.86
69
+ certifi==2025.11.12
70
+ pyarrow==22.0.0
71
+ dill==0.4.0
72
+ protobuf==6.33.2
73
+ aiosignal==1.4.0
74
+ frozenlist==1.8.0
75
+ urllib3==2.6.2
76
+ propcache==0.4.1
77
+ tzdata==2025.3
78
+ pandas==2.3.3
79
+ annotated-types==0.7.0
80
+ shellingham==1.5.4
81
+ nvidia-nccl-cu12==2.21.5
82
+ multidict==6.7.0
83
+ nvidia-curand-cu12==10.3.2.106
84
+ trl==0.26.2
85
+ torch==2.5.1+cu121
86
+ h11==0.16.0
87
+ multiprocess==0.70.18
88
+ typer-slim==0.21.0
89
+ wheel==0.45.1
90
+ tomli==2.0.1
91
+ autocommand==2.2.2
92
+ jaraco.context==5.3.0
93
+ zipp==3.19.2
94
+ packaging==24.2
95
+ inflect==7.3.1
96
+ typing_extensions==4.12.2
97
+ platformdirs==4.2.2
98
+ jaraco.functools==4.0.1
99
+ jaraco.collections==5.1.0
100
+ jaraco.text==3.12.1
101
+ backports.tarfile==1.2.0
102
+ more-itertools==10.3.0
103
+ importlib_metadata==8.0.0
104
+ typeguard==4.3.0
dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.12.46+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.12",
4
+ "startedAt": "2025-12-26T15:31:52.735494Z",
5
+ "args": [
6
+ "--config",
7
+ "config_dpo.yaml"
8
+ ],
9
+ "program": "/workspace/trainer-kit/DPO/run_dpo.py",
10
+ "codePath": "run_dpo.py",
11
+ "codePathLocal": "run_dpo.py",
12
+ "email": "shaiksirajuddin9949@gmail.com",
13
+ "root": "runs/dpo_run_24b_v1",
14
+ "host": "a100-2gpu-shell-session-757d587799-mfdvv",
15
+ "executable": "/workspace/llm_finetuning_env/bin/python",
16
+ "cpu_count": 12,
17
+ "cpu_count_logical": 24,
18
+ "gpu": "NVIDIA A100-SXM4-80GB",
19
+ "gpu_count": 2,
20
+ "disk": {
21
+ "/": {
22
+ "total": "791251738624",
23
+ "used": "317655887872"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "359047892992"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A100-SXM4-80GB",
32
+ "memoryTotal": "85899345920",
33
+ "cudaCores": 6912,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
36
+ },
37
+ {
38
+ "name": "NVIDIA A100-SXM4-80GB",
39
+ "memoryTotal": "85899345920",
40
+ "cudaCores": 6912,
41
+ "architecture": "Ampere",
42
+ "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
43
+ }
44
+ ],
45
+ "cudaVersion": "13.0",
46
+ "writerId": "3kcue4vill6aqei124wdyde6gjjhpcn4"
47
+ }
dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":2},"_runtime":2}
dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:31:52.82802196Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqkio4efp/port-138465.txt","pid":138465,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-26T15:31:52.828849471Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":138465}
3
+ {"time":"2025-12-26T15:31:52.828808724Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-138465-138539-4238173929/socket","Net":"unix"}}
4
+ {"time":"2025-12-26T15:31:53.011057052Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-26T15:31:53.017383764Z","level":"INFO","msg":"handleInformInit: received","streamId":"wxs32uu8","id":"1(@)"}
6
+ {"time":"2025-12-26T15:31:53.176090883Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"wxs32uu8","id":"1(@)"}
7
+ {"time":"2025-12-26T15:31:56.103528174Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-12-26T15:31:56.103608964Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-12-26T15:31:56.103654139Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-12-26T15:31:56.103666347Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-12-26T15:31:56.103767972Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-138465-138539-4238173929/socket","Net":"unix"}}
12
+ {"time":"2025-12-26T15:31:56.539431271Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-12-26T15:31:56.539481282Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-12-26T15:31:56.539506784Z","level":"INFO","msg":"server is closed"}
dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:31:53.017531144Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-26T15:31:53.175841849Z","level":"INFO","msg":"stream: created new stream","id":"wxs32uu8"}
3
+ {"time":"2025-12-26T15:31:53.175922557Z","level":"INFO","msg":"handler: started","stream_id":"wxs32uu8"}
4
+ {"time":"2025-12-26T15:31:53.176076095Z","level":"INFO","msg":"stream: started","id":"wxs32uu8"}
5
+ {"time":"2025-12-26T15:31:53.176100332Z","level":"INFO","msg":"writer: started","stream_id":"wxs32uu8"}
6
+ {"time":"2025-12-26T15:31:53.176110819Z","level":"INFO","msg":"sender: started","stream_id":"wxs32uu8"}
7
+ {"time":"2025-12-26T15:31:56.103613554Z","level":"INFO","msg":"stream: closing","id":"wxs32uu8"}
8
+ {"time":"2025-12-26T15:31:56.401489323Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-12-26T15:31:56.538246836Z","level":"INFO","msg":"handler: closed","stream_id":"wxs32uu8"}
10
+ {"time":"2025-12-26T15:31:56.538337251Z","level":"INFO","msg":"sender: closed","stream_id":"wxs32uu8"}
11
+ {"time":"2025-12-26T15:31:56.538352318Z","level":"INFO","msg":"stream: closed","id":"wxs32uu8"}
dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_setup.py:_flush():80] Configure stats pid to 138465
3
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
5
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug.log
7
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/logs/debug-internal.log
8
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
11
+ 2025-12-26 15:31:52,737 INFO MainThread:138465 [wandb_init.py:init():889] starting backend
12
+ 2025-12-26 15:31:53,010 INFO MainThread:138465 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-26 15:31:53,015 INFO MainThread:138465 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-26 15:31:53,017 INFO MainThread:138465 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-26 15:31:53,018 INFO MainThread:138465 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-26 15:31:53,479 INFO MainThread:138465 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-26 15:31:53,592 INFO MainThread:138465 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-26 15:31:53,592 INFO MainThread:138465 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-26 15:31:53,592 INFO MainThread:138465 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-26 15:31:53,592 INFO MainThread:138465 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-26 15:31:53,597 INFO MainThread:138465 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-26 15:31:56,103 INFO wandb-AsyncioManager-main:138465 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2025-12-26 15:31:56,103 INFO wandb-AsyncioManager-main:138465 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
dpo_run_24b_v1/wandb/run-20251226_153152-wxs32uu8/run-wxs32uu8.wandb ADDED
Binary file (16.6 kB). View file
 
dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/config.yaml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.1
4
+ e:
5
+ yg288fsgj0ia8gpi3lsxxd4iicxacpwh:
6
+ args:
7
+ - --config
8
+ - config_dpo.yaml
9
+ codePath: run_dpo.py
10
+ codePathLocal: run_dpo.py
11
+ cpu_count: 12
12
+ cpu_count_logical: 24
13
+ cudaVersion: "13.0"
14
+ disk:
15
+ /:
16
+ total: "791251738624"
17
+ used: "318370951168"
18
+ email: shaiksirajuddin9949@gmail.com
19
+ executable: /workspace/llm_finetuning_env/bin/python
20
+ gpu: NVIDIA A100-SXM4-80GB
21
+ gpu_count: 2
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 6912
25
+ memoryTotal: "85899345920"
26
+ name: NVIDIA A100-SXM4-80GB
27
+ uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
28
+ - architecture: Ampere
29
+ cudaCores: 6912
30
+ memoryTotal: "85899345920"
31
+ name: NVIDIA A100-SXM4-80GB
32
+ uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
33
+ host: a100-2gpu-shell-session-757d587799-mfdvv
34
+ memory:
35
+ total: "359047892992"
36
+ os: Linux-6.12.46+-x86_64-with-glibc2.35
37
+ program: /workspace/trainer-kit/DPO/run_dpo.py
38
+ python: CPython 3.10.12
39
+ root: runs/dpo_run_24b_v1
40
+ startedAt: "2025-12-26T15:33:36.434359Z"
41
+ writerId: yg288fsgj0ia8gpi3lsxxd4iicxacpwh
42
+ m: []
43
+ python_version: 3.10.12
44
+ t:
45
+ "1":
46
+ - 1
47
+ - 11
48
+ - 41
49
+ - 49
50
+ - 51
51
+ - 71
52
+ - 84
53
+ - 98
54
+ "2":
55
+ - 1
56
+ - 11
57
+ - 41
58
+ - 49
59
+ - 51
60
+ - 71
61
+ - 84
62
+ - 98
63
+ "3":
64
+ - 15
65
+ - 16
66
+ "4": 3.10.12
67
+ "5": 0.23.1
68
+ "6": 5.0.0.dev0
69
+ "12": 0.23.1
70
+ "13": linux-x86_64
71
+ data:
72
+ value:
73
+ chosen_field: chosen
74
+ eval_jsonl: null
75
+ eval_split_ratio: 0.1
76
+ format_type: chatml
77
+ max_length: 2048
78
+ num_proc: 4
79
+ prompt_field: prompt
80
+ rejected_field: rejected
81
+ score_field: f1_score
82
+ shuffle: true
83
+ system_prompt: |
84
+ You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
85
+
86
+ ## Output Format
87
+
88
+ ##OUTPUT
89
+ Explain the data flow and why each component must change:
90
+ - Flow: [Input → Processing → Output with arrows]
91
+ - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
92
+ - Explain coupling between components
93
+
94
+ ##SELECT
95
+ modify::crates/path/to/file.rs::impl::ComponentName
96
+ add::crates/another/file.rs::function::AnotherComponent
97
+ <EOS>
98
+
99
+ ## Rules
100
+
101
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
102
+ 2. Use `::` for nested items: `status::StructName::Type::Name`
103
+ 3. Always explain "must change because" and "without this"
104
+ 3. Types of components: function, struct, enum, impl, trait
105
+ 4. If there is extra information (e.g., enum variants), include that too.
106
+ 5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
107
+ train_jsonl: dpo_pairs_generated.jsonl
108
+ dpo:
109
+ value:
110
+ beta: 0.1
111
+ label_smoothing: 0
112
+ loss_type: sigmoid
113
+ reference_free: false
114
+ use_reference_model: true
115
+ model:
116
+ value:
117
+ attn_implementation: null
118
+ base_local_dir: base_model
119
+ bnb_4bit_compute_dtype: bfloat16
120
+ bnb_4bit_quant_type: nf4
121
+ bnb_4bit_use_double_quant: false
122
+ device_map: auto
123
+ repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
124
+ revision: null
125
+ tokenizer_use_fast: true
126
+ torch_dtype: bfloat16
127
+ trust_remote_code: true
128
+ use_4bit: false
129
+ peft:
130
+ value:
131
+ bias: none
132
+ enabled: true
133
+ lora_alpha: 32
134
+ lora_dropout: 0.05
135
+ r: 16
136
+ target_modules: auto
137
+ run_dir:
138
+ value: runs/dpo_run_24b_v1
139
+ train:
140
+ value:
141
+ early_stopping:
142
+ enabled: true
143
+ metric: eval_loss
144
+ min_delta: 0.001
145
+ mode: min
146
+ patience: 5
147
+ eval_steps: 25
148
+ evaluation_strategy: steps
149
+ gradient_accumulation_steps: 8
150
+ gradient_checkpointing: true
151
+ learning_rate: "5e-5"
152
+ load_best_model_at_end: true
153
+ logging_steps: 2
154
+ lr_scheduler_type: cosine
155
+ max_grad_norm: 1
156
+ num_train_epochs: 3
157
+ optim: adamw_torch
158
+ per_device_eval_batch_size: 1
159
+ per_device_train_batch_size: 1
160
+ resume_from_checkpoint: auto
161
+ save_steps: 100
162
+ save_strategy: steps
163
+ save_total_limit: 10
164
+ warmup_ratio: 0.1
165
+ weight_decay: 0
dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/output.log ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Wandb initialized: project='dpo-training', name='auto-generated'
2
+ 2025-12-26 15:33:39,399 - INFO - Detected Mistral3 model architecture, loading with specific class
3
+ Loading weights: 100%|█| 585/585 [00:13<00:00, 41.90it/s, Materializing param=model.vision_tower.transfo
4
+ 2025-12-26 15:33:57,664 - INFO - Ensuring all parameters are materialized...
5
+ Loading reference model (frozen copy)...
6
+ 2025-12-26 15:34:02,022 - INFO - Detected Mistral3 model architecture, loading with specific class
7
+ Loading weights: 100%|█| 585/585 [00:13<00:00, 42.74it/s, Materializing param=model.vision_tower.transfo
8
+ 2025-12-26 15:34:17,738 - INFO - Ensuring all parameters are materialized...
9
+ Reference model loaded and frozen
10
+ 2025-12-26 15:34:18,661 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
11
+ Generating train split: 7612 examples [00:00, 76349.43 examples/s]
12
+ 2025-12-26 15:34:18,785 - INFO - Formatting train DPO data...
13
+ Formatting train DPO data (num_proc=4): 100%|██████████████| 6850/6850 [00:02<00:00, 2809.98 examples/s]
14
+ Filter: 100%|█████████████████████████████████████████████| 6850/6850 [00:00<00:00, 58478.08 examples/s]
15
+ 2025-12-26 15:34:21,607 - INFO - Train dataset after filtering: 6850 examples
16
+ 2025-12-26 15:34:21,608 - INFO - train dataset validation passed: 6850 examples
17
+ 2025-12-26 15:34:21,608 - INFO - Formatting eval DPO data...
18
+ Formatting eval DPO data (num_proc=4): 100%|██████████████████| 762/762 [00:02<00:00, 332.47 examples/s]
19
+ Filter: 100%|███████████████████████████████████████████████| 762/762 [00:00<00:00, 36813.75 examples/s]
20
+ 2025-12-26 15:34:24,198 - INFO - Eval dataset after filtering: 762 examples
21
+ 2025-12-26 15:34:24,199 - INFO - eval dataset validation passed: 762 examples
22
+ warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
23
+ Early stopping enabled: patience=5, min_delta=0.001
24
+ 2025-12-26 15:34:24,239 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
25
+ warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
26
+ Extracting prompt in train dataset: 100%|██████████████████| 6850/6850 [00:01<00:00, 5396.61 examples/s]
27
+ Applying chat template to train dataset: 100%|█████████████| 6850/6850 [00:00<00:00, 8653.30 examples/s]
28
+ Tokenizing train dataset: 0%| | 0/6850 [00:00<?, ? examples/s]
29
+ Traceback (most recent call last):
30
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 1011, in <module>
31
+ main()
32
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 967, in main
33
+ trainer = DPOTrainer(
34
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
35
+ train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
36
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
37
+ dataset = dataset.map(
38
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
39
+ out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
40
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
41
+ for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
42
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
43
+ for i, example in iter_outputs(shard_iterable):
44
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
45
+ yield i, apply_function(example, i, offset=offset)
46
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
47
+ processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
48
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
49
+ processor, tokenizer = processing_class, processing_class.tokenizer # the processing class is a processor
50
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
51
+ raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
52
+ AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
53
+ Traceback (most recent call last):
54
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 1011, in <module>
55
+ main()
56
+ File "/workspace/trainer-kit/DPO/run_dpo.py", line 967, in main
57
+ trainer = DPOTrainer(
58
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 480, in __init__
59
+ train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
60
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
61
+ dataset = dataset.map(
62
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
63
+ out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
64
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
65
+ for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
66
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
67
+ for i, example in iter_outputs(shard_iterable):
68
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
69
+ yield i, apply_function(example, i, offset=offset)
70
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
71
+ processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
72
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 749, in process_row
73
+ processor, tokenizer = processing_class, processing_class.tokenizer # the processing class is a processor
74
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1331, in __getattr__
75
+ raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
76
+ AttributeError: TokenizersBackend has no attribute tokenizer. Did you mean: '_tokenizer'?
dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exceptiongroup==1.3.1
2
+ wheel==0.45.1
3
+ python-dateutil==2.9.0.post0
4
+ nvidia-ml-py==13.580.82
5
+ huggingface_hub==1.2.3
6
+ idna==3.11
7
+ click==8.3.1
8
+ numpy==2.2.6
9
+ httpx==0.28.1
10
+ tokenizers==0.22.1
11
+ sympy==1.13.1
12
+ yarl==1.22.0
13
+ async-timeout==5.0.1
14
+ datasets==4.4.2
15
+ platformdirs==4.5.1
16
+ nvidia-cuda-cupti-cu12==12.1.105
17
+ nvidia-nvtx-cu12==12.1.105
18
+ smmap==5.0.2
19
+ accelerate==1.12.0
20
+ requests==2.32.5
21
+ aiohttp==3.13.2
22
+ bitsandbytes==0.49.0
23
+ nvidia-cublas-cu12==12.1.3.1
24
+ mpmath==1.3.0
25
+ torchaudio==2.5.1+cu121
26
+ nvidia-cuda-runtime-cu12==12.1.105
27
+ typing-inspection==0.4.2
28
+ GitPython==3.1.45
29
+ xxhash==3.6.0
30
+ nvidia-cusolver-cu12==11.4.5.107
31
+ pydantic_core==2.41.5
32
+ six==1.17.0
33
+ torchvision==0.20.1+cu121
34
+ typing_extensions==4.15.0
35
+ triton==3.1.0
36
+ charset-normalizer==3.4.4
37
+ nvitop==1.6.1
38
+ wandb==0.23.1
39
+ regex==2025.11.3
40
+ pip==25.3
41
+ nvidia-cusparse-cu12==12.1.0.106
42
+ pytz==2025.2
43
+ Jinja2==3.1.6
44
+ psutil==7.2.0
45
+ pillow==12.0.0
46
+ packaging==25.0
47
+ safetensors==0.7.0
48
+ sentry-sdk==2.48.0
49
+ gitdb==4.0.12
50
+ httpcore==1.0.9
51
+ setuptools==80.9.0
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ anyio==4.12.0
54
+ transformers==5.0.0.dev0
55
+ pydantic==2.12.5
56
+ fsspec==2025.10.0
57
+ filelock==3.20.0
58
+ PyYAML==6.0.3
59
+ hf-xet==1.2.0
60
+ nvidia-cudnn-cu12==9.1.0.70
61
+ tqdm==4.67.1
62
+ MarkupSafe==2.1.5
63
+ attrs==25.4.0
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ peft==0.18.0
66
+ aiohappyeyeballs==2.6.1
67
+ networkx==3.4.2
68
+ nvidia-nvjitlink-cu12==12.9.86
69
+ certifi==2025.11.12
70
+ pyarrow==22.0.0
71
+ dill==0.4.0
72
+ protobuf==6.33.2
73
+ aiosignal==1.4.0
74
+ frozenlist==1.8.0
75
+ urllib3==2.6.2
76
+ propcache==0.4.1
77
+ tzdata==2025.3
78
+ pandas==2.3.3
79
+ annotated-types==0.7.0
80
+ shellingham==1.5.4
81
+ nvidia-nccl-cu12==2.21.5
82
+ multidict==6.7.0
83
+ nvidia-curand-cu12==10.3.2.106
84
+ trl==0.26.2
85
+ torch==2.5.1+cu121
86
+ h11==0.16.0
87
+ multiprocess==0.70.18
88
+ typer-slim==0.21.0
89
+ wheel==0.45.1
90
+ tomli==2.0.1
91
+ autocommand==2.2.2
92
+ jaraco.context==5.3.0
93
+ zipp==3.19.2
94
+ packaging==24.2
95
+ inflect==7.3.1
96
+ typing_extensions==4.12.2
97
+ platformdirs==4.2.2
98
+ jaraco.functools==4.0.1
99
+ jaraco.collections==5.1.0
100
+ jaraco.text==3.12.1
101
+ backports.tarfile==1.2.0
102
+ more-itertools==10.3.0
103
+ importlib_metadata==8.0.0
104
+ typeguard==4.3.0
dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.12.46+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.12",
4
+ "startedAt": "2025-12-26T15:33:36.434359Z",
5
+ "args": [
6
+ "--config",
7
+ "config_dpo.yaml"
8
+ ],
9
+ "program": "/workspace/trainer-kit/DPO/run_dpo.py",
10
+ "codePath": "run_dpo.py",
11
+ "codePathLocal": "run_dpo.py",
12
+ "email": "shaiksirajuddin9949@gmail.com",
13
+ "root": "runs/dpo_run_24b_v1",
14
+ "host": "a100-2gpu-shell-session-757d587799-mfdvv",
15
+ "executable": "/workspace/llm_finetuning_env/bin/python",
16
+ "cpu_count": 12,
17
+ "cpu_count_logical": 24,
18
+ "gpu": "NVIDIA A100-SXM4-80GB",
19
+ "gpu_count": 2,
20
+ "disk": {
21
+ "/": {
22
+ "total": "791251738624",
23
+ "used": "318370951168"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "359047892992"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A100-SXM4-80GB",
32
+ "memoryTotal": "85899345920",
33
+ "cudaCores": 6912,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
36
+ },
37
+ {
38
+ "name": "NVIDIA A100-SXM4-80GB",
39
+ "memoryTotal": "85899345920",
40
+ "cudaCores": 6912,
41
+ "architecture": "Ampere",
42
+ "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
43
+ }
44
+ ],
45
+ "cudaVersion": "13.0",
46
+ "writerId": "yg288fsgj0ia8gpi3lsxxd4iicxacpwh"
47
+ }
dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime":49,"_wandb":{"runtime":49}}
dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:33:36.517152278Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmptn5phprh/port-138967.txt","pid":138967,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-26T15:33:36.517842446Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":138967}
3
+ {"time":"2025-12-26T15:33:36.517839815Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-138967-139051-1880805868/socket","Net":"unix"}}
4
+ {"time":"2025-12-26T15:33:36.700377241Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-26T15:33:36.707152235Z","level":"INFO","msg":"handleInformInit: received","streamId":"fb8js9es","id":"1(@)"}
6
+ {"time":"2025-12-26T15:33:36.860726066Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"fb8js9es","id":"1(@)"}
7
+ {"time":"2025-12-26T15:34:26.830462396Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-12-26T15:34:26.830539704Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-12-26T15:34:26.830534975Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2025-12-26T15:34:26.830623895Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-12-26T15:34:26.830653617Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-138967-139051-1880805868/socket","Net":"unix"}}
12
+ {"time":"2025-12-26T15:34:27.129538377Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-12-26T15:34:27.129564503Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-12-26T15:34:27.129572669Z","level":"INFO","msg":"server is closed"}
dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:33:36.707301381Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-26T15:33:36.860475041Z","level":"INFO","msg":"stream: created new stream","id":"fb8js9es"}
3
+ {"time":"2025-12-26T15:33:36.860568007Z","level":"INFO","msg":"handler: started","stream_id":"fb8js9es"}
4
+ {"time":"2025-12-26T15:33:36.860715948Z","level":"INFO","msg":"stream: started","id":"fb8js9es"}
5
+ {"time":"2025-12-26T15:33:36.860739836Z","level":"INFO","msg":"writer: started","stream_id":"fb8js9es"}
6
+ {"time":"2025-12-26T15:33:36.860764109Z","level":"INFO","msg":"sender: started","stream_id":"fb8js9es"}
7
+ {"time":"2025-12-26T15:34:26.830537011Z","level":"INFO","msg":"stream: closing","id":"fb8js9es"}
8
+ {"time":"2025-12-26T15:34:27.027238553Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-12-26T15:34:27.128722564Z","level":"INFO","msg":"handler: closed","stream_id":"fb8js9es"}
10
+ {"time":"2025-12-26T15:34:27.12882072Z","level":"INFO","msg":"sender: closed","stream_id":"fb8js9es"}
11
+ {"time":"2025-12-26T15:34:27.128830906Z","level":"INFO","msg":"stream: closed","id":"fb8js9es"}
dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-26 15:33:36,435 INFO MainThread:138967 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-26 15:33:36,436 INFO MainThread:138967 [wandb_setup.py:_flush():80] Configure stats pid to 138967
3
+ 2025-12-26 15:33:36,436 INFO MainThread:138967 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-26 15:33:36,436 INFO MainThread:138967 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO/wandb/settings
5
+ 2025-12-26 15:33:36,436 INFO MainThread:138967 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-26 15:33:36,436 INFO MainThread:138967 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug.log
7
+ 2025-12-26 15:33:36,436 INFO MainThread:138967 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/logs/debug-internal.log
8
+ 2025-12-26 15:33:36,436 INFO MainThread:138967 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-26 15:33:36,436 INFO MainThread:138967 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'model': {'repo_id': '../../Models/Devstral-Small-2-24B-HS-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_24b_v1', '_wandb': {}}
11
+ 2025-12-26 15:33:36,436 INFO MainThread:138967 [wandb_init.py:init():889] starting backend
12
+ 2025-12-26 15:33:36,700 INFO MainThread:138967 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-26 15:33:36,705 INFO MainThread:138967 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-26 15:33:36,708 INFO MainThread:138967 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-26 15:33:36,709 INFO MainThread:138967 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-26 15:33:36,984 INFO MainThread:138967 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-26 15:33:37,095 INFO MainThread:138967 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-26 15:33:37,095 INFO MainThread:138967 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-26 15:33:37,095 INFO MainThread:138967 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-26 15:33:37,095 INFO MainThread:138967 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-26 15:33:37,101 INFO MainThread:138967 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-26 15:34:26,830 INFO wandb-AsyncioManager-main:138967 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2025-12-26 15:34:26,830 INFO wandb-AsyncioManager-main:138967 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
dpo_run_24b_v1/wandb/run-20251226_153336-fb8js9es/run-fb8js9es.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47b1b5cb59559e68f367989a74a0cbd677bc17c04c539a4bc2448b00f2fcb402
3
+ size 410520
dpo_run_24b_v1/wandb/run-20251226_153517-g5bybskm/files/config.yaml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.1
4
+ e:
5
+ 47sn84x90gu1yvzd0dzhhujsd7q0za53:
6
+ args:
7
+ - --config
8
+ - config_dpo.yaml
9
+ codePath: run_dpo.py
10
+ codePathLocal: run_dpo.py
11
+ cpu_count: 12
12
+ cpu_count_logical: 24
13
+ cudaVersion: "13.0"
14
+ disk:
15
+ /:
16
+ total: "791251738624"
17
+ used: "319211442176"
18
+ email: shaiksirajuddin9949@gmail.com
19
+ executable: /workspace/llm_finetuning_env/bin/python
20
+ gpu: NVIDIA A100-SXM4-80GB
21
+ gpu_count: 2
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 6912
25
+ memoryTotal: "85899345920"
26
+ name: NVIDIA A100-SXM4-80GB
27
+ uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
28
+ - architecture: Ampere
29
+ cudaCores: 6912
30
+ memoryTotal: "85899345920"
31
+ name: NVIDIA A100-SXM4-80GB
32
+ uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
33
+ host: a100-2gpu-shell-session-757d587799-mfdvv
34
+ memory:
35
+ total: "359047892992"
36
+ os: Linux-6.12.46+-x86_64-with-glibc2.35
37
+ program: /workspace/trainer-kit/DPO/run_dpo.py
38
+ python: CPython 3.10.12
39
+ root: runs/dpo_run_24b_v1
40
+ startedAt: "2025-12-26T15:35:17.840098Z"
41
+ writerId: 47sn84x90gu1yvzd0dzhhujsd7q0za53
42
+ m: []
43
+ python_version: 3.10.12
44
+ t:
45
+ "1":
46
+ - 1
47
+ - 11
48
+ - 41
49
+ - 49
50
+ - 51
51
+ - 71
52
+ - 84
53
+ - 98
54
+ "2":
55
+ - 1
56
+ - 11
57
+ - 41
58
+ - 49
59
+ - 51
60
+ - 71
61
+ - 84
62
+ - 98
63
+ "3":
64
+ - 15
65
+ - 16
66
+ "4": 3.10.12
67
+ "5": 0.23.1
68
+ "6": 5.0.0.dev0
69
+ "12": 0.23.1
70
+ "13": linux-x86_64
71
+ data:
72
+ value:
73
+ chosen_field: chosen
74
+ eval_jsonl: null
75
+ eval_split_ratio: 0.1
76
+ format_type: chatml
77
+ max_length: 2048
78
+ num_proc: 4
79
+ prompt_field: prompt
80
+ rejected_field: rejected
81
+ score_field: f1_score
82
+ shuffle: true
83
+ system_prompt: |
84
+ You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
85
+
86
+ ## Output Format
87
+
88
+ ##OUTPUT
89
+ Explain the data flow and why each component must change:
90
+ - Flow: [Input → Processing → Output with arrows]
91
+ - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
92
+ - Explain coupling between components
93
+
94
+ ##SELECT
95
+ modify::crates/path/to/file.rs::impl::ComponentName
96
+ add::crates/another/file.rs::function::AnotherComponent
97
+ <EOS>
98
+
99
+ ## Rules
100
+
101
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
102
+ 2. Use `::` for nested items: `status::StructName::Type::Name`
103
+ 3. Always explain "must change because" and "without this"
104
+ 3. Types of components: function, struct, enum, impl, trait
105
+ 4. If there is extra information (e.g., enum variants), include that too.
106
+ 5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
107
+ train_jsonl: dpo_pairs_generated.jsonl
108
+ dpo:
109
+ value:
110
+ beta: 0.1
111
+ label_smoothing: 0
112
+ loss_type: sigmoid
113
+ reference_free: false
114
+ use_reference_model: true
115
+ model:
116
+ value:
117
+ attn_implementation: null
118
+ base_local_dir: base_model
119
+ bnb_4bit_compute_dtype: bfloat16
120
+ bnb_4bit_quant_type: nf4
121
+ bnb_4bit_use_double_quant: false
122
+ device_map: auto
123
+ repo_id: ../../Models/Devstral-Small-2-24B-HS-CPT-SFT
124
+ revision: null
125
+ tokenizer_use_fast: true
126
+ torch_dtype: bfloat16
127
+ trust_remote_code: true
128
+ use_4bit: false
129
+ peft:
130
+ value:
131
+ bias: none
132
+ enabled: true
133
+ lora_alpha: 32
134
+ lora_dropout: 0.05
135
+ r: 16
136
+ target_modules: auto
137
+ run_dir:
138
+ value: runs/dpo_run_24b_v1
139
+ train:
140
+ value:
141
+ early_stopping:
142
+ enabled: true
143
+ metric: eval_loss
144
+ min_delta: 0.001
145
+ mode: min
146
+ patience: 5
147
+ eval_steps: 25
148
+ evaluation_strategy: steps
149
+ gradient_accumulation_steps: 8
150
+ gradient_checkpointing: true
151
+ learning_rate: "5e-5"
152
+ load_best_model_at_end: true
153
+ logging_steps: 2
154
+ lr_scheduler_type: cosine
155
+ max_grad_norm: 1
156
+ num_train_epochs: 3
157
+ optim: adamw_torch
158
+ per_device_eval_batch_size: 1
159
+ per_device_train_batch_size: 1
160
+ resume_from_checkpoint: auto
161
+ save_steps: 100
162
+ save_strategy: steps
163
+ save_total_limit: 10
164
+ warmup_ratio: 0.1
165
+ weight_decay: 0