saranshagarwal2020 commited on
Commit
bfa44bb
·
verified ·
1 Parent(s): f3a5828

Upload folder using huggingface_hub

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token -}}
2
+ {%- set keep_past_thinking = keep_past_thinking | default(false) -%}
3
+ {%- set ns = namespace(system_prompt="") -%}
4
+ {%- if messages[0]["role"] == "system" -%}
5
+ {%- set ns.system_prompt = messages[0]["content"] -%}
6
+ {%- set messages = messages[1:] -%}
7
+ {%- endif -%}
8
+ {%- if tools -%}
9
+ {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
10
+ {%- for tool in tools -%}
11
+ {%- if tool is not string -%}
12
+ {%- set tool = tool | tojson -%}
13
+ {%- endif -%}
14
+ {%- set ns.system_prompt = ns.system_prompt + tool -%}
15
+ {%- if not loop.last -%}
16
+ {%- set ns.system_prompt = ns.system_prompt + ", " -%}
17
+ {%- endif -%}
18
+ {%- endfor -%}
19
+ {%- set ns.system_prompt = ns.system_prompt + "]" -%}
20
+ {%- endif -%}
21
+ {%- if ns.system_prompt -%}
22
+ {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
23
+ {%- endif -%}
24
+ {%- set ns.last_assistant_index = -1 -%}
25
+ {%- for message in messages -%}
26
+ {%- if message["role"] == "assistant" -%}
27
+ {%- set ns.last_assistant_index = loop.index0 -%}
28
+ {%- endif -%}
29
+ {%- endfor -%}
30
+ {%- for message in messages -%}
31
+ {{- "<|im_start|>" + message["role"] + "\n" -}}
32
+ {%- set content = message["content"] -%}
33
+ {%- if content is not string -%}
34
+ {%- set content = content | tojson -%}
35
+ {%- endif -%}
36
+ {%- if message["role"] == "assistant" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}
37
+ {%- if "</think>" in content -%}
38
+ {%- set content = content.split("</think>")[-1] | trim -%}
39
+ {%- endif -%}
40
+ {%- endif -%}
41
+ {{- content + "<|im_end|>\n" -}}
42
+ {%- endfor -%}
43
+ {%- if add_generation_prompt -%}
44
+ {{- "<|im_start|>assistant\n" -}}
45
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Lfm2ForCausalLM"
4
+ ],
5
+ "block_auto_adjust_ff_dim": true,
6
+ "block_dim": 2048,
7
+ "block_ff_dim": 12288,
8
+ "block_ffn_dim_multiplier": 1.0,
9
+ "block_mlp_init_scale": 1.0,
10
+ "block_multiple_of": 256,
11
+ "block_norm_eps": 1e-05,
12
+ "block_out_init_scale": 1.0,
13
+ "block_use_swiglu": true,
14
+ "block_use_xavier_init": true,
15
+ "bos_token_id": 1,
16
+ "conv_L_cache": 3,
17
+ "conv_bias": false,
18
+ "conv_dim": 2048,
19
+ "conv_use_xavier_init": true,
20
+ "dtype": "bfloat16",
21
+ "eos_token_id": 7,
22
+ "hidden_size": 2048,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 12288,
25
+ "layer_types": [
26
+ "conv",
27
+ "conv",
28
+ "full_attention",
29
+ "conv",
30
+ "conv",
31
+ "full_attention",
32
+ "conv",
33
+ "conv",
34
+ "full_attention",
35
+ "conv",
36
+ "full_attention",
37
+ "conv",
38
+ "full_attention",
39
+ "conv",
40
+ "full_attention",
41
+ "conv"
42
+ ],
43
+ "max_position_embeddings": 128000,
44
+ "model_type": "lfm2",
45
+ "norm_eps": 1e-05,
46
+ "num_attention_heads": 32,
47
+ "num_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pad_token_id": 0,
51
+ "rope_parameters": {
52
+ "rope_theta": 1000000.0,
53
+ "rope_type": "default"
54
+ },
55
+ "tie_embedding": true,
56
+ "tie_word_embeddings": true,
57
+ "transformers_version": "5.2.0",
58
+ "use_cache": false,
59
+ "use_pos_enc": true,
60
+ "vocab_size": 65536
61
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 7
6
+ ],
7
+ "pad_token_id": 0,
8
+ "transformers_version": "5.2.0"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:850061de923b002e5085928549185e88bca0c00b117b8989bf1f993e6788ae67
3
+ size 2340697936
run_meta.json CHANGED
@@ -1,25 +1,25 @@
1
  {
2
- "run_name": "dpo_fft_LFM2.5-1.2B-Instruct_argilla__distilabel-math-preference-dpo_20260222_210527",
3
  "model": "LiquidAI/LFM2.5-1.2B-Instruct",
4
- "dataset": "argilla/distilabel-math-preference-dpo",
5
- "timestamp": "20260222_210527",
6
  "args": {
7
- "dataset": "argilla/distilabel-math-preference-dpo",
8
  "dataset_split": "train",
9
- "instruction_col": null,
10
- "chosen_col": null,
11
- "rejected_col": null,
12
  "max_samples": null,
13
  "seed": 42,
14
  "model_name": "LiquidAI/LFM2.5-1.2B-Instruct",
15
  "ref_4bit": false,
16
- "num_epochs": 3,
17
  "batch_size": 4,
18
  "grad_accum": 4,
19
  "learning_rate": 2e-06,
20
  "beta": 0.2,
21
  "max_length": 1024,
22
- "max_prompt_length": 512,
23
  "warmup_ratio": 0.1,
24
  "optim": "paged_adamw_8bit",
25
  "logging_steps": 10,
@@ -29,11 +29,11 @@
29
  "run_name": null
30
  },
31
  "train_metrics": {
32
- "train_runtime": 1607.6176,
33
- "train_samples_per_second": 4.286,
34
- "train_steps_per_second": 0.269,
35
  "total_flos": 0.0,
36
- "train_loss": 0.5204887290795644,
37
- "epoch": 3.0
38
  }
39
  }
 
1
  {
2
+ "run_name": "dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854",
3
  "model": "LiquidAI/LFM2.5-1.2B-Instruct",
4
+ "dataset": "xinlai/Math-Step-DPO-10K",
5
+ "timestamp": "20260223_022854",
6
  "args": {
7
+ "dataset": "xinlai/Math-Step-DPO-10K",
8
  "dataset_split": "train",
9
+ "instruction_col": "initial_reason_steps",
10
+ "chosen_col": "chosen",
11
+ "rejected_col": "rejected",
12
  "max_samples": null,
13
  "seed": 42,
14
  "model_name": "LiquidAI/LFM2.5-1.2B-Instruct",
15
  "ref_4bit": false,
16
+ "num_epochs": 1,
17
  "batch_size": 4,
18
  "grad_accum": 4,
19
  "learning_rate": 2e-06,
20
  "beta": 0.2,
21
  "max_length": 1024,
22
+ "max_prompt_length": 768,
23
  "warmup_ratio": 0.1,
24
  "optim": "paged_adamw_8bit",
25
  "logging_steps": 10,
 
29
  "run_name": null
30
  },
31
  "train_metrics": {
32
+ "train_runtime": 2398.316,
33
+ "train_samples_per_second": 4.276,
34
+ "train_steps_per_second": 0.267,
35
  "total_flos": 0.0,
36
+ "train_loss": 0.5289894797128746,
37
+ "epoch": 1.0
38
  }
39
  }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|startoftext|>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|im_end|>",
6
+ "is_local": false,
7
+ "legacy": false,
8
+ "model_input_names": [
9
+ "input_ids",
10
+ "attention_mask"
11
+ ],
12
+ "model_max_length": 1000000000000000019884624838656,
13
+ "pad_token": "<|pad|>",
14
+ "padding_side": "right",
15
+ "sp_model_kwargs": {},
16
+ "spaces_between_special_tokens": false,
17
+ "tokenizer_class": "TokenizersBackend",
18
+ "use_default_system_prompt": false,
19
+ "use_fast": true
20
+ }
train10k.txt ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ========================================
2
+ DPO Full Fine-Tuning
3
+ ========================================
4
+ Model : LiquidAI/LFM2.5-1.2B-Instruct
5
+ Dataset : xinlai/Math-Step-DPO-10K
6
+ Epochs : 1
7
+ Batch size : 4 (grad_accum=4, eff=16)
8
+ Learning rate : 2e-6
9
+ DPO beta : 0.2
10
+ Reference : NF4 4-bit (pass --no_ref_4bit for bfloat16)
11
+ Output dir : models
12
+ ========================================
13
+
14
+ [dpo_train] Run : dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854
15
+ [dpo_train] Output : models/dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854
16
+ [dpo_train] Loading dataset: xinlai/Math-Step-DPO-10K split=train
17
+ [dpo_train] Full size : 10,795 rows | columns: ['dataset', 'prompt', 'initial_reason_steps', 'chosen', 'rejected', 'full_chosen', 'full_rejected', 'answer']
18
+ [dpo_train] Columns : instruction='initial_reason_steps' chosen='chosen' rejected='rejected'
19
+ [dpo_train] After cleaning: 10,795 rows
20
+ [dpo_train] Train: 10,255 Eval: 540
21
+ [dpo_train] Loading policy model (bfloat16, trainable) …
22
+ [dpo_train] Loading reference model (bfloat16, frozen) …
23
+ [dpo_train] Policy params : 1170M (all trainable)
24
+
25
+ [dpo_train] Starting DPO full fine-tuning (epochs=1 eff_batch=16) …
26
+
27
+ {'loss': '0.6923', 'grad_norm': '54.5', 'learning_rate': '2.769e-07', 'rewards/chosen': '0.009294', 'rewards/rejected': '0.004497', 'rewards/accuracies': '0.4187', 'rewards/margins': '0.004797', 'logps/chosen': '-129.8', 'logps/rejected': '-139.6', 'logits/chosen': '-0.9724', 'logits/rejected': '-0.9586', 'epoch': '0.0156'}
28
+ {'loss': '0.6889', 'grad_norm': '42.75', 'learning_rate': '5.846e-07', 'rewards/chosen': '-0.0111', 'rewards/rejected': '-0.02361', 'rewards/accuracies': '0.5375', 'rewards/margins': '0.01251', 'logps/chosen': '-132.7', 'logps/rejected': '-141.9', 'logits/chosen': '-0.9568', 'logits/rejected': '-0.9363', 'epoch': '0.0312'}
29
+ {'loss': '0.672', 'grad_norm': '52.25', 'learning_rate': '8.923e-07', 'rewards/chosen': '-0.1026', 'rewards/rejected': '-0.1517', 'rewards/accuracies': '0.6313', 'rewards/margins': '0.04918', 'logps/chosen': '-139.9', 'logps/rejected': '-141.5', 'logits/chosen': '-0.95', 'logits/rejected': '-0.9532', 'epoch': '0.0468'}
30
+ {'loss': '0.6529', 'grad_norm': '49.5', 'learning_rate': '1.2e-06', 'rewards/chosen': '-0.2349', 'rewards/rejected': '-0.3385', 'rewards/accuracies': '0.6375', 'rewards/margins': '0.1035', 'logps/chosen': '-139.2', 'logps/rejected': '-149.2', 'logits/chosen': '-1', 'logits/rejected': '-1.021', 'epoch': '0.0624'}
31
+ {'loss': '0.6221', 'grad_norm': '42.75', 'learning_rate': '1.508e-06', 'rewards/chosen': '-0.4437', 'rewards/rejected': '-0.6489', 'rewards/accuracies': '0.6687', 'rewards/margins': '0.2052', 'logps/chosen': '-129.3', 'logps/rejected': '-140.3', 'logits/chosen': '-0.999', 'logits/rejected': '-1.017', 'epoch': '0.078'}
32
+ {'loss': '0.6159', 'grad_norm': '39.5', 'learning_rate': '1.815e-06', 'rewards/chosen': '-0.5384', 'rewards/rejected': '-0.8259', 'rewards/accuracies': '0.6062', 'rewards/margins': '0.2875', 'logps/chosen': '-141.1', 'logps/rejected': '-143.8', 'logits/chosen': '-1.001', 'logits/rejected': '-0.978', 'epoch': '0.0936'}
33
+ {'loss': '0.5934', 'grad_norm': '47', 'learning_rate': '2e-06', 'rewards/chosen': '-0.2281', 'rewards/rejected': '-0.5834', 'rewards/accuracies': '0.6562', 'rewards/margins': '0.3553', 'logps/chosen': '-135.7', 'logps/rejected': '-145.3', 'logits/chosen': '-0.9597', 'logits/rejected': '-0.9586', 'epoch': '0.1092'}
34
+ {'loss': '0.5266', 'grad_norm': '40.75', 'learning_rate': '1.997e-06', 'rewards/chosen': '-0.2324', 'rewards/rejected': '-0.9226', 'rewards/accuracies': '0.6375', 'rewards/margins': '0.6902', 'logps/chosen': '-135', 'logps/rejected': '-149.8', 'logits/chosen': '-0.9169', 'logits/rejected': '-0.958', 'epoch': '0.1248'}
35
+ {'loss': '0.5594', 'grad_norm': '39.75', 'learning_rate': '1.991e-06', 'rewards/chosen': '-0.4102', 'rewards/rejected': '-1.067', 'rewards/accuracies': '0.6875', 'rewards/margins': '0.6563', 'logps/chosen': '-140.4', 'logps/rejected': '-161.5', 'logits/chosen': '-0.9946', 'logits/rejected': '-1.007', 'epoch': '0.1404'}
36
+ {'loss': '0.5212', 'grad_norm': '42', 'learning_rate': '1.983e-06', 'rewards/chosen': '-0.4026', 'rewards/rejected': '-1.258', 'rewards/accuracies': '0.7125', 'rewards/margins': '0.8554', 'logps/chosen': '-140.5', 'logps/rejected': '-151.5', 'logits/chosen': '-0.9036', 'logits/rejected': '-0.9337', 'epoch': '0.156'}
37
+ {'eval_loss': '0.5339', 'eval_runtime': '40.28', 'eval_samples_per_second': '13.41', 'eval_steps_per_second': '3.351', 'eval_rewards/chosen': '-0.4095', 'eval_rewards/rejected': '-1.24', 'eval_rewards/accuracies': '0.6593', 'eval_rewards/margins': '0.8309', 'eval_logps/chosen': '-141', 'eval_logps/rejected': '-154.7', 'eval_logits/chosen': '-0.9475', 'eval_logits/rejected': '-0.9603', 'epoch': '0.156'}
38
+ {'loss': '0.58', 'grad_norm': '55.5', 'learning_rate': '1.971e-06', 'rewards/chosen': '-0.3429', 'rewards/rejected': '-0.9838', 'rewards/accuracies': '0.6438', 'rewards/margins': '0.6409', 'logps/chosen': '-148.6', 'logps/rejected': '-154.7', 'logits/chosen': '-0.9508', 'logits/rejected': '-0.9895', 'epoch': '0.1716'}
39
+ {'loss': '0.5932', 'grad_norm': '50.5', 'learning_rate': '1.957e-06', 'rewards/chosen': '-0.4143', 'rewards/rejected': '-1.028', 'rewards/accuracies': '0.6187', 'rewards/margins': '0.6136', 'logps/chosen': '-135.8', 'logps/rejected': '-151.8', 'logits/chosen': '-0.9729', 'logits/rejected': '-0.9898', 'epoch': '0.1872'}
40
+ {'loss': '0.5053', 'grad_norm': '51', 'learning_rate': '1.94e-06', 'rewards/chosen': '-0.538', 'rewards/rejected': '-1.577', 'rewards/accuracies': '0.675', 'rewards/margins': '1.039', 'logps/chosen': '-137.6', 'logps/rejected': '-160.1', 'logits/chosen': '-0.9419', 'logits/rejected': '-0.9662', 'epoch': '0.2028'}
41
+ {'loss': '0.4892', 'grad_norm': '33', 'learning_rate': '1.92e-06', 'rewards/chosen': '-0.5704', 'rewards/rejected': '-1.761', 'rewards/accuracies': '0.6875', 'rewards/margins': '1.191', 'logps/chosen': '-139.2', 'logps/rejected': '-153.3', 'logits/chosen': '-0.9681', 'logits/rejected': '-1.009', 'epoch': '0.2184'}
42
+ {'loss': '0.5108', 'grad_norm': '36.5', 'learning_rate': '1.897e-06', 'rewards/chosen': '-0.7801', 'rewards/rejected': '-1.915', 'rewards/accuracies': '0.6938', 'rewards/margins': '1.134', 'logps/chosen': '-146.1', 'logps/rejected': '-160.9', 'logits/chosen': '-0.9884', 'logits/rejected': '-1.018', 'epoch': '0.234'}
43
+ {'loss': '0.5325', 'grad_norm': '40.75', 'learning_rate': '1.871e-06', 'rewards/chosen': '-0.6749', 'rewards/rejected': '-1.703', 'rewards/accuracies': '0.6625', 'rewards/margins': '1.028', 'logps/chosen': '-142.7', 'logps/rejected': '-156.6', 'logits/chosen': '-0.9584', 'logits/rejected': '-0.9916', 'epoch': '0.2496'}
44
+ {'loss': '0.4619', 'grad_norm': '40.25', 'learning_rate': '1.843e-06', 'rewards/chosen': '-0.3065', 'rewards/rejected': '-1.582', 'rewards/accuracies': '0.7312', 'rewards/margins': '1.276', 'logps/chosen': '-138.2', 'logps/rejected': '-153.2', 'logits/chosen': '-0.9471', 'logits/rejected': '-0.9452', 'epoch': '0.2652'}
45
+ {'loss': '0.542', 'grad_norm': '51', 'learning_rate': '1.813e-06', 'rewards/chosen': '-0.2091', 'rewards/rejected': '-1.171', 'rewards/accuracies': '0.6375', 'rewards/margins': '0.9614', 'logps/chosen': '-143', 'logps/rejected': '-152.9', 'logits/chosen': '-0.9258', 'logits/rejected': '-0.9312', 'epoch': '0.2808'}
46
+ {'loss': '0.5062', 'grad_norm': '44.5', 'learning_rate': '1.78e-06', 'rewards/chosen': '-0.1293', 'rewards/rejected': '-1.125', 'rewards/accuracies': '0.725', 'rewards/margins': '0.9959', 'logps/chosen': '-136', 'logps/rejected': '-146.1', 'logits/chosen': '-0.9742', 'logits/rejected': '-0.9909', 'epoch': '0.2964'}
47
+ {'loss': '0.5246', 'grad_norm': '40.25', 'learning_rate': '1.745e-06', 'rewards/chosen': '-0.1886', 'rewards/rejected': '-1.127', 'rewards/accuracies': '0.675', 'rewards/margins': '0.9379', 'logps/chosen': '-142', 'logps/rejected': '-156.9', 'logits/chosen': '-0.9913', 'logits/rejected': '-0.9865', 'epoch': '0.312'}
48
+ {'eval_loss': '0.5121', 'eval_runtime': '40.17', 'eval_samples_per_second': '13.44', 'eval_steps_per_second': '3.36', 'eval_rewards/chosen': '-0.2147', 'eval_rewards/rejected': '-1.269', 'eval_rewards/accuracies': '0.687', 'eval_rewards/margins': '1.055', 'eval_logps/chosen': '-140', 'eval_logps/rejected': '-154.8', 'eval_logits/chosen': '-0.955', 'eval_logits/rejected': '-0.9694', 'epoch': '0.312'}
49
+ {'loss': '0.5359', 'grad_norm': '41', 'learning_rate': '1.707e-06', 'rewards/chosen': '-0.3423', 'rewards/rejected': '-1.273', 'rewards/accuracies': '0.6562', 'rewards/margins': '0.9307', 'logps/chosen': '-140.6', 'logps/rejected': '-148.6', 'logits/chosen': '-1.003', 'logits/rejected': '-1.016', 'epoch': '0.3276'}
50
+ {'loss': '0.5775', 'grad_norm': '49', 'learning_rate': '1.668e-06', 'rewards/chosen': '-0.2953', 'rewards/rejected': '-1.016', 'rewards/accuracies': '0.6125', 'rewards/margins': '0.721', 'logps/chosen': '-135.6', 'logps/rejected': '-144.4', 'logits/chosen': '-0.9878', 'logits/rejected': '-0.9839', 'epoch': '0.3432'}
51
+ {'loss': '0.5026', 'grad_norm': '43.5', 'learning_rate': '1.626e-06', 'rewards/chosen': '-0.1806', 'rewards/rejected': '-1.201', 'rewards/accuracies': '0.7063', 'rewards/margins': '1.02', 'logps/chosen': '-143.2', 'logps/rejected': '-153.5', 'logits/chosen': '-0.9418', 'logits/rejected': '-0.9669', 'epoch': '0.3588'}
52
+ {'loss': '0.5222', 'grad_norm': '44.25', 'learning_rate': '1.582e-06', 'rewards/chosen': '0.05273', 'rewards/rejected': '-0.954', 'rewards/accuracies': '0.6687', 'rewards/margins': '1.007', 'logps/chosen': '-136.3', 'logps/rejected': '-148.8', 'logits/chosen': '-0.9431', 'logits/rejected': '-0.9376', 'epoch': '0.3744'}
53
+ {'loss': '0.4862', 'grad_norm': '41.5', 'learning_rate': '1.537e-06', 'rewards/chosen': '0.05254', 'rewards/rejected': '-1.19', 'rewards/accuracies': '0.6687', 'rewards/margins': '1.243', 'logps/chosen': '-135.3', 'logps/rejected': '-151', 'logits/chosen': '-0.9207', 'logits/rejected': '-0.9492', 'epoch': '0.39'}
54
+ {'loss': '0.5452', 'grad_norm': '66', 'learning_rate': '1.491e-06', 'rewards/chosen': '-0.02821', 'rewards/rejected': '-0.986', 'rewards/accuracies': '0.625', 'rewards/margins': '0.9578', 'logps/chosen': '-136.3', 'logps/rejected': '-151.1', 'logits/chosen': '-0.9681', 'logits/rejected': '-1.002', 'epoch': '0.4056'}
55
+ {'loss': '0.5194', 'grad_norm': '36.75', 'learning_rate': '1.442e-06', 'rewards/chosen': '-0.1964', 'rewards/rejected': '-1.143', 'rewards/accuracies': '0.7063', 'rewards/margins': '0.9462', 'logps/chosen': '-144', 'logps/rejected': '-149.6', 'logits/chosen': '-0.9456', 'logits/rejected': '-0.9882', 'epoch': '0.4212'}
56
+ {'loss': '0.4679', 'grad_norm': '36.5', 'learning_rate': '1.393e-06', 'rewards/chosen': '-0.03599', 'rewards/rejected': '-1.404', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.368', 'logps/chosen': '-143.2', 'logps/rejected': '-149.7', 'logits/chosen': '-0.9715', 'logits/rejected': '-1', 'epoch': '0.4368'}
57
+ {'loss': '0.5053', 'grad_norm': '50.75', 'learning_rate': '1.342e-06', 'rewards/chosen': '0.1214', 'rewards/rejected': '-0.8408', 'rewards/accuracies': '0.7', 'rewards/margins': '0.9622', 'logps/chosen': '-137.8', 'logps/rejected': '-144.2', 'logits/chosen': '-1.024', 'logits/rejected': '-0.9874', 'epoch': '0.4524'}
58
+ {'loss': '0.4918', 'grad_norm': '41.5', 'learning_rate': '1.29e-06', 'rewards/chosen': '0.08779', 'rewards/rejected': '-0.99', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.078', 'logps/chosen': '-136.1', 'logps/rejected': '-149.7', 'logits/chosen': '-0.9538', 'logits/rejected': '-0.991', 'epoch': '0.468'}
59
+ {'eval_loss': '0.5046', 'eval_runtime': '40.13', 'eval_samples_per_second': '13.46', 'eval_steps_per_second': '3.364', 'eval_rewards/chosen': '0.1132', 'eval_rewards/rejected': '-1.029', 'eval_rewards/accuracies': '0.6741', 'eval_rewards/margins': '1.142', 'eval_logps/chosen': '-138.4', 'eval_logps/rejected': '-153.6', 'eval_logits/chosen': '-0.9587', 'eval_logits/rejected': '-0.9758', 'epoch': '0.468'}
60
+ {'loss': '0.4804', 'grad_norm': '41.25', 'learning_rate': '1.238e-06', 'rewards/chosen': '0.05371', 'rewards/rejected': '-1.108', 'rewards/accuracies': '0.7875', 'rewards/margins': '1.162', 'logps/chosen': '-132.4', 'logps/rejected': '-152.8', 'logits/chosen': '-0.9403', 'logits/rejected': '-0.9417', 'epoch': '0.4836'}
61
+ {'loss': '0.5502', 'grad_norm': '54.5', 'learning_rate': '1.184e-06', 'rewards/chosen': '0.02032', 'rewards/rejected': '-0.762', 'rewards/accuracies': '0.7125', 'rewards/margins': '0.7824', 'logps/chosen': '-139.8', 'logps/rejected': '-155.8', 'logits/chosen': '-1.034', 'logits/rejected': '-0.9944', 'epoch': '0.4992'}
62
+ {'loss': '0.5258', 'grad_norm': '36.25', 'learning_rate': '1.131e-06', 'rewards/chosen': '0.01367', 'rewards/rejected': '-1.062', 'rewards/accuracies': '0.6562', 'rewards/margins': '1.076', 'logps/chosen': '-139.7', 'logps/rejected': '-149', 'logits/chosen': '-1.02', 'logits/rejected': '-1.025', 'epoch': '0.5148'}
63
+ {'loss': '0.513', 'grad_norm': '73.5', 'learning_rate': '1.076e-06', 'rewards/chosen': '-0.08328', 'rewards/rejected': '-1.046', 'rewards/accuracies': '0.7312', 'rewards/margins': '0.9629', 'logps/chosen': '-145', 'logps/rejected': '-151.1', 'logits/chosen': '-0.9922', 'logits/rejected': '-1.009', 'epoch': '0.5304'}
64
+ {'loss': '0.5549', 'grad_norm': '47.5', 'learning_rate': '1.022e-06', 'rewards/chosen': '-0.1006', 'rewards/rejected': '-0.9635', 'rewards/accuracies': '0.7', 'rewards/margins': '0.8629', 'logps/chosen': '-138.2', 'logps/rejected': '-153.4', 'logits/chosen': '-0.9654', 'logits/rejected': '-0.9779', 'epoch': '0.546'}
65
+ {'loss': '0.5152', 'grad_norm': '37.5', 'learning_rate': '9.673e-07', 'rewards/chosen': '-0.1695', 'rewards/rejected': '-1.247', 'rewards/accuracies': '0.6438', 'rewards/margins': '1.077', 'logps/chosen': '-132', 'logps/rejected': '-147.7', 'logits/chosen': '-0.9814', 'logits/rejected': '-0.9655', 'epoch': '0.5616'}
66
+ {'loss': '0.5521', 'grad_norm': '35.25', 'learning_rate': '9.128e-07', 'rewards/chosen': '-0.1743', 'rewards/rejected': '-0.9556', 'rewards/accuracies': '0.7', 'rewards/margins': '0.7813', 'logps/chosen': '-133.1', 'logps/rejected': '-141.6', 'logits/chosen': '-0.9953', 'logits/rejected': '-1.015', 'epoch': '0.5772'}
67
+ {'loss': '0.5144', 'grad_norm': '48.5', 'learning_rate': '8.587e-07', 'rewards/chosen': '-0.191', 'rewards/rejected': '-1.271', 'rewards/accuracies': '0.6625', 'rewards/margins': '1.08', 'logps/chosen': '-138.5', 'logps/rejected': '-154.2', 'logits/chosen': '-1.013', 'logits/rejected': '-1.014', 'epoch': '0.5928'}
68
+ {'loss': '0.5276', 'grad_norm': '45.5', 'learning_rate': '8.049e-07', 'rewards/chosen': '-0.1877', 'rewards/rejected': '-1.247', 'rewards/accuracies': '0.6875', 'rewards/margins': '1.059', 'logps/chosen': '-139.2', 'logps/rejected': '-144.6', 'logits/chosen': '-0.9666', 'logits/rejected': '-0.9784', 'epoch': '0.6084'}
69
+ {'loss': '0.5085', 'grad_norm': '38.75', 'learning_rate': '7.517e-07', 'rewards/chosen': '-0.2559', 'rewards/rejected': '-1.253', 'rewards/accuracies': '0.725', 'rewards/margins': '0.9974', 'logps/chosen': '-139.7', 'logps/rejected': '-151.4', 'logits/chosen': '-0.9496', 'logits/rejected': '-0.9449', 'epoch': '0.624'}
70
+ {'eval_loss': '0.5048', 'eval_runtime': '40.12', 'eval_samples_per_second': '13.46', 'eval_steps_per_second': '3.365', 'eval_rewards/chosen': '-0.2371', 'eval_rewards/rejected': '-1.434', 'eval_rewards/accuracies': '0.687', 'eval_rewards/margins': '1.197', 'eval_logps/chosen': '-140.1', 'eval_logps/rejected': '-155.6', 'eval_logits/chosen': '-0.9802', 'eval_logits/rejected': '-0.9983', 'epoch': '0.624'}
71
+ {'loss': '0.5101', 'grad_norm': '39.75', 'learning_rate': '6.993e-07', 'rewards/chosen': '-0.2794', 'rewards/rejected': '-1.527', 'rewards/accuracies': '0.7312', 'rewards/margins': '1.247', 'logps/chosen': '-136.8', 'logps/rejected': '-152', 'logits/chosen': '-0.9347', 'logits/rejected': '-0.9738', 'epoch': '0.6396'}
72
+ {'loss': '0.5132', 'grad_norm': '47', 'learning_rate': '6.477e-07', 'rewards/chosen': '-0.2584', 'rewards/rejected': '-1.329', 'rewards/accuracies': '0.7125', 'rewards/margins': '1.071', 'logps/chosen': '-132.8', 'logps/rejected': '-148.7', 'logits/chosen': '-0.951', 'logits/rejected': '-0.9631', 'epoch': '0.6552'}
73
+ {'loss': '0.5442', 'grad_norm': '57', 'learning_rate': '5.973e-07', 'rewards/chosen': '-0.354', 'rewards/rejected': '-1.106', 'rewards/accuracies': '0.7188', 'rewards/margins': '0.7517', 'logps/chosen': '-138.7', 'logps/rejected': '-154.1', 'logits/chosen': '-1.014', 'logits/rejected': '-1.03', 'epoch': '0.6708'}
74
+ {'loss': '0.4362', 'grad_norm': '33.25', 'learning_rate': '5.48e-07', 'rewards/chosen': '-0.1957', 'rewards/rejected': '-1.659', 'rewards/accuracies': '0.75', 'rewards/margins': '1.464', 'logps/chosen': '-133.2', 'logps/rejected': '-154.8', 'logits/chosen': '-0.999', 'logits/rejected': '-0.9674', 'epoch': '0.6864'}
75
+ {'loss': '0.4756', 'grad_norm': '39', 'learning_rate': '5e-07', 'rewards/chosen': '-0.24', 'rewards/rejected': '-1.591', 'rewards/accuracies': '0.7875', 'rewards/margins': '1.351', 'logps/chosen': '-146.1', 'logps/rejected': '-159.7', 'logits/chosen': '-0.9821', 'logits/rejected': '-0.9917', 'epoch': '0.702'}
76
+ {'loss': '0.5326', 'grad_norm': '50.25', 'learning_rate': '4.535e-07', 'rewards/chosen': '-0.2618', 'rewards/rejected': '-1.15', 'rewards/accuracies': '0.7', 'rewards/margins': '0.8884', 'logps/chosen': '-138.5', 'logps/rejected': '-143.4', 'logits/chosen': '-0.9793', 'logits/rejected': '-1.013', 'epoch': '0.7176'}
77
+ {'loss': '0.5539', 'grad_norm': '47.5', 'learning_rate': '4.087e-07', 'rewards/chosen': '-0.2694', 'rewards/rejected': '-1.051', 'rewards/accuracies': '0.675', 'rewards/margins': '0.7815', 'logps/chosen': '-133.2', 'logps/rejected': '-145.2', 'logits/chosen': '-0.9868', 'logits/rejected': '-1.012', 'epoch': '0.7332'}
78
+ {'loss': '0.4758', 'grad_norm': '34.25', 'learning_rate': '3.656e-07', 'rewards/chosen': '-0.2661', 'rewards/rejected': '-1.672', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.406', 'logps/chosen': '-138.3', 'logps/rejected': '-152.5', 'logits/chosen': '-0.9374', 'logits/rejected': '-0.9612', 'epoch': '0.7488'}
79
+ {'loss': '0.5411', 'grad_norm': '48.75', 'learning_rate': '3.244e-07', 'rewards/chosen': '-0.1937', 'rewards/rejected': '-1.203', 'rewards/accuracies': '0.675', 'rewards/margins': '1.009', 'logps/chosen': '-139.5', 'logps/rejected': '-150.4', 'logits/chosen': '-1.003', 'logits/rejected': '-1.018', 'epoch': '0.7644'}
80
+ {'loss': '0.4845', 'grad_norm': '40.75', 'learning_rate': '2.852e-07', 'rewards/chosen': '-0.2131', 'rewards/rejected': '-1.448', 'rewards/accuracies': '0.7312', 'rewards/margins': '1.235', 'logps/chosen': '-137.3', 'logps/rejected': '-154.6', 'logits/chosen': '-0.9683', 'logits/rejected': '-0.9468', 'epoch': '0.78'}
81
+ {'eval_loss': '0.4993', 'eval_runtime': '40.18', 'eval_samples_per_second': '13.44', 'eval_steps_per_second': '3.36', 'eval_rewards/chosen': '-0.2173', 'eval_rewards/rejected': '-1.439', 'eval_rewards/accuracies': '0.7', 'eval_rewards/margins': '1.222', 'eval_logps/chosen': '-140', 'eval_logps/rejected': '-155.7', 'eval_logits/chosen': '-0.9807', 'eval_logits/rejected': '-0.9991', 'epoch': '0.78'}
82
+ {'loss': '0.4628', 'grad_norm': '46', 'learning_rate': '2.482e-07', 'rewards/chosen': '-0.1764', 'rewards/rejected': '-1.582', 'rewards/accuracies': '0.7375', 'rewards/margins': '1.406', 'logps/chosen': '-135.9', 'logps/rejected': '-150.9', 'logits/chosen': '-0.9796', 'logits/rejected': '-1.017', 'epoch': '0.7956'}
83
+ {'loss': '0.4962', 'grad_norm': '52.25', 'learning_rate': '2.133e-07', 'rewards/chosen': '-0.2718', 'rewards/rejected': '-1.503', 'rewards/accuracies': '0.7375', 'rewards/margins': '1.231', 'logps/chosen': '-139.9', 'logps/rejected': '-151.5', 'logits/chosen': '-0.9783', 'logits/rejected': '-0.9835', 'epoch': '0.8112'}
84
+ {'loss': '0.5023', 'grad_norm': '36.5', 'learning_rate': '1.808e-07', 'rewards/chosen': '-0.1669', 'rewards/rejected': '-1.383', 'rewards/accuracies': '0.6938', 'rewards/margins': '1.216', 'logps/chosen': '-143', 'logps/rejected': '-156.3', 'logits/chosen': '-0.9845', 'logits/rejected': '-1.014', 'epoch': '0.8268'}
85
+ {'loss': '0.489', 'grad_norm': '49.5', 'learning_rate': '1.508e-07', 'rewards/chosen': '-0.1592', 'rewards/rejected': '-1.255', 'rewards/accuracies': '0.8', 'rewards/margins': '1.095', 'logps/chosen': '-138.2', 'logps/rejected': '-147.7', 'logits/chosen': '-1.021', 'logits/rejected': '-1.025', 'epoch': '0.8424'}
86
+ {'loss': '0.5108', 'grad_norm': '40.5', 'learning_rate': '1.233e-07', 'rewards/chosen': '-0.1103', 'rewards/rejected': '-1.092', 'rewards/accuracies': '0.7', 'rewards/margins': '0.982', 'logps/chosen': '-135.1', 'logps/rejected': '-144.9', 'logits/chosen': '-0.9653', 'logits/rejected': '-0.9778', 'epoch': '0.858'}
87
+ {'loss': '0.4898', 'grad_norm': '40.25', 'learning_rate': '9.836e-08', 'rewards/chosen': '-0.1521', 'rewards/rejected': '-1.389', 'rewards/accuracies': '0.725', 'rewards/margins': '1.237', 'logps/chosen': '-132.6', 'logps/rejected': '-144.8', 'logits/chosen': '-0.9742', 'logits/rejected': '-1.003', 'epoch': '0.8736'}
88
+ {'loss': '0.5178', 'grad_norm': '44.25', 'learning_rate': '7.612e-08', 'rewards/chosen': '-0.2263', 'rewards/rejected': '-1.307', 'rewards/accuracies': '0.6687', 'rewards/margins': '1.081', 'logps/chosen': '-137.1', 'logps/rejected': '-151.2', 'logits/chosen': '-1.009', 'logits/rejected': '-1.01', 'epoch': '0.8892'}
89
+ {'loss': '0.5001', 'grad_norm': '29.38', 'learning_rate': '5.663e-08', 'rewards/chosen': '-0.196', 'rewards/rejected': '-1.202', 'rewards/accuracies': '0.7188', 'rewards/margins': '1.006', 'logps/chosen': '-137.1', 'logps/rejected': '-147.7', 'logits/chosen': '-0.9683', 'logits/rejected': '-1.018', 'epoch': '0.9048'}
90
+ {'loss': '0.4811', 'grad_norm': '44.5', 'learning_rate': '3.995e-08', 'rewards/chosen': '-0.2124', 'rewards/rejected': '-1.397', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.184', 'logps/chosen': '-136.5', 'logps/rejected': '-152.3', 'logits/chosen': '-1.023', 'logits/rejected': '-1.046', 'epoch': '0.9204'}
91
+ {'loss': '0.5045', 'grad_norm': '27.75', 'learning_rate': '2.612e-08', 'rewards/chosen': '-0.2044', 'rewards/rejected': '-1.235', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.03', 'logps/chosen': '-134.6', 'logps/rejected': '-151.1', 'logits/chosen': '-0.9686', 'logits/rejected': '-0.9986', 'epoch': '0.936'}
92
+ {'eval_loss': '0.5009', 'eval_runtime': '40.15', 'eval_samples_per_second': '13.45', 'eval_steps_per_second': '3.362', 'eval_rewards/chosen': '-0.2003', 'eval_rewards/rejected': '-1.422', 'eval_rewards/accuracies': '0.7148', 'eval_rewards/margins': '1.221', 'eval_logps/chosen': '-140', 'eval_logps/rejected': '-155.6', 'eval_logits/chosen': '-0.9798', 'eval_logits/rejected': '-0.9978', 'epoch': '0.936'}
93
+ {'loss': '0.5156', 'grad_norm': '43', 'learning_rate': '1.519e-08', 'rewards/chosen': '-0.2099', 'rewards/rejected': '-1.355', 'rewards/accuracies': '0.6938', 'rewards/margins': '1.145', 'logps/chosen': '-135.9', 'logps/rejected': '-156.8', 'logits/chosen': '-0.9927', 'logits/rejected': '-1.001', 'epoch': '0.9516'}
94
+ {'loss': '0.5259', 'grad_norm': '29.12', 'learning_rate': '7.19e-09', 'rewards/chosen': '-0.2067', 'rewards/rejected': '-1.218', 'rewards/accuracies': '0.6625', 'rewards/margins': '1.011', 'logps/chosen': '-143.6', 'logps/rejected': '-154.5', 'logits/chosen': '-1.017', 'logits/rejected': '-1.012', 'epoch': '0.9672'}
95
+ {'loss': '0.4977', 'grad_norm': '41', 'learning_rate': '2.141e-09', 'rewards/chosen': '-0.2147', 'rewards/rejected': '-1.301', 'rewards/accuracies': '0.7563', 'rewards/margins': '1.087', 'logps/chosen': '-142.9', 'logps/rejected': '-158.3', 'logits/chosen': '-0.9882', 'logits/rejected': '-0.9938', 'epoch': '0.9828'}
96
+ {'loss': '0.5042', 'grad_norm': '50.5', 'learning_rate': '5.949e-11', 'rewards/chosen': '-0.1956', 'rewards/rejected': '-1.333', 'rewards/accuracies': '0.7437', 'rewards/margins': '1.138', 'logps/chosen': '-136.6', 'logps/rejected': '-152.5', 'logits/chosen': '-0.9518', 'logits/rejected': '-0.9941', 'epoch': '0.9984'}
97
+ {'train_runtime': '2398', 'train_samples_per_second': '4.276', 'train_steps_per_second': '0.267', 'train_loss': '0.529', 'epoch': '1'}
98
+ [dpo_train] Final model saved → models/dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854/final_model
99
+ [dpo_train] Run metadata → models/dpo_fft_LFM2.5-1.2B-Instruct_xinlai__Math-Step-DPO-10K_20260223_022854/run_meta.json
100
+
101
+ [dpo_train] Done.
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c937a92854585ec95fb58489fcc514bfc02871c7b725449e33dbfcf1c2ffc49d
3
+ size 6225